code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42
  43 from ganeti import ssh
  44 from ganeti import utils
  45 from ganeti import errors
  46 from ganeti import hypervisor
  47 from ganeti import locking
  48 from ganeti import constants
  49 from ganeti import objects
  50 from ganeti import serializer
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56
  57 import ganeti.masterd.instance # pylint: disable-msg=W0611
  58
  59
  60 # Modifiable default values; need to define these here before the
  61 # actual LUs
  62
  63 def _EmptyList():
  64   """Returns an empty list.
  65
  66   """
  67   return []
  68
  69
  70 def _EmptyDict():
  71   """Returns an empty dict.
  72
  73   """
  74   return {}
  75
  76
  77 #: The without-default default value
  78 _NoDefault = object()
  79
  80
  81 #: The no-type (value to complex to check it in the type system)
  82 _NoType = object()
  83
  84
  85 # Some basic types
  86 def _TNotNone(val):
  87   """Checks if the given value is not None.
  88
  89   """
  90   return val is not None
  91
  92
  93 def _TNone(val):
  94   """Checks if the given value is None.
  95
  96   """
  97   return val is None
  98
  99
 100 def _TBool(val):
 101   """Checks if the given value is a boolean.
 102
 103   """
 104   return isinstance(val, bool)
 105
 106
 107 def _TInt(val):
 108   """Checks if the given value is an integer.
 109
 110   """
 111   return isinstance(val, int)
 112
 113
 114 def _TFloat(val):
 115   """Checks if the given value is a float.
 116
 117   """
 118   return isinstance(val, float)
 119
 120
 121 def _TString(val):
 122   """Checks if the given value is a string.
 123
 124   """
 125   return isinstance(val, basestring)
 126
 127
 128 def _TTrue(val):
 129   """Checks if a given value evaluates to a boolean True value.
 130
 131   """
 132   return bool(val)
 133
 134
 135 def _TElemOf(target_list):
 136   """Builds a function that checks if a given value is a member of a list.
 137
 138   """
 139   return lambda val: val in target_list
 140
 141
 142 # Container types
 143 def _TList(val):
 144   """Checks if the given value is a list.
 145
 146   """
 147   return isinstance(val, list)
 148
 149
 150 def _TDict(val):
 151   """Checks if the given value is a dictionary.
 152
 153   """
 154   return isinstance(val, dict)
 155
 156
 157 # Combinator types
 158 def _TAnd(*args):
 159   """Combine multiple functions using an AND operation.
 160
 161   """
 162   def fn(val):
 163     return compat.all(t(val) for t in args)
 164   return fn
 165
 166
 167 def _TOr(*args):
 168   """Combine multiple functions using an AND operation.
 169
 170   """
 171   def fn(val):
 172     return compat.any(t(val) for t in args)
 173   return fn
 174
 175
 176 # Type aliases
 177
 178 #: a non-empty string
 179 _TNonEmptyString = _TAnd(_TString, _TTrue)
 180
 181
 182 #: a maybe non-empty string
 183 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
 184
 185
 186 #: a maybe boolean (bool or none)
 187 _TMaybeBool = _TOr(_TBool, _TNone)
 188
 189
 190 #: a positive integer
 191 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
 192
 193 #: a strictly positive integer
 194 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
 195
 196
 197 def _TListOf(my_type):
 198   """Checks if a given value is a list with all elements of the same type.
 199
 200   """
 201   return _TAnd(_TList,
 202                lambda lst: compat.all(my_type(v) for v in lst))
 203
 204
 205 def _TDictOf(key_type, val_type):
 206   """Checks a dict type for the type of its key/values.
 207
 208   """
 209   return _TAnd(_TDict,
 210                lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
 211                                 and compat.all(val_type(v)
 212                                                for v in my_dict.values())))
 213
 214
 215 # Common opcode attributes
 216
 217 #: output fields for a query operation
 218 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
 219
 220
 221 #: the shutdown timeout
 222 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
 223                      _TPositiveInt)
 224
 225 #: the force parameter
 226 _PForce = ("force", False, _TBool)
 227
 228 #: a required instance name (for single-instance LUs)
 229 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
 230
 231
 232 #: a required node name (for single-node LUs)
 233 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
 234
 235 #: the migration type (live/non-live)
 236 _PMigrationMode = ("mode", None, _TOr(_TNone,
 237                                       _TElemOf(constants.HT_MIGRATION_MODES)))
 238
 239 #: the obsolete 'live' mode (boolean)
 240 _PMigrationLive = ("live", None, _TMaybeBool)
 241
 242
 243 # End types
 244 class LogicalUnit(object):
 245   """Logical Unit base class.
 246
 247   Subclasses must follow these rules:
 248     - implement ExpandNames
 249     - implement CheckPrereq (except when tasklets are used)
 250     - implement Exec (except when tasklets are used)
 251     - implement BuildHooksEnv
 252     - redefine HPATH and HTYPE
 253     - optionally redefine their run requirements:
 254         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 255
 256   Note that all commands require root permissions.
 257
 258   @ivar dry_run_result: the value (if any) that will be returned to the caller
 259       in dry-run mode (signalled by opcode dry_run parameter)
 260   @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
 261       they should get if not already defined, and types they must match
 262
 263   """
 264   HPATH = None
 265   HTYPE = None
 266   _OP_PARAMS = []
 267   REQ_BGL = True
 268
 269   def __init__(self, processor, op, context, rpc):
 270     """Constructor for LogicalUnit.
 271
 272     This needs to be overridden in derived classes in order to check op
 273     validity.
 274
 275     """
 276     self.proc = processor
 277     self.op = op
 278     self.cfg = context.cfg
 279     self.context = context
 280     self.rpc = rpc
 281     # Dicts used to declare locking needs to mcpu
 282     self.needed_locks = None
 283     self.acquired_locks = {}
 284     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 285     self.add_locks = {}
 286     self.remove_locks = {}
 287     # Used to force good behavior when calling helper functions
 288     self.recalculate_locks = {}
 289     self.__ssh = None
 290     # logging
 291     self.Log = processor.Log # pylint: disable-msg=C0103
 292     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 293     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 294     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 295     # support for dry-run
 296     self.dry_run_result = None
 297     # support for generic debug attribute
 298     if (not hasattr(self.op, "debug_level") or
 299         not isinstance(self.op.debug_level, int)):
 300       self.op.debug_level = 0
 301
 302     # Tasklets
 303     self.tasklets = None
 304
 305     # The new kind-of-type-system
 306     op_id = self.op.OP_ID
 307     for attr_name, aval, test in self._OP_PARAMS:
 308       if not hasattr(op, attr_name):
 309         if aval == _NoDefault:
 310           raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
 311                                      (op_id, attr_name), errors.ECODE_INVAL)
 312         else:
 313           if callable(aval):
 314             dval = aval()
 315           else:
 316             dval = aval
 317           setattr(self.op, attr_name, dval)
 318       attr_val = getattr(op, attr_name)
 319       if test == _NoType:
 320         # no tests here
 321         continue
 322       if not callable(test):
 323         raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
 324                                      " given type is not a proper type (%s)" %
 325                                      (op_id, attr_name, test))
 326       if not test(attr_val):
 327         logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
 328                       self.op.OP_ID, attr_name, type(attr_val), attr_val)
 329         raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
 330                                    (op_id, attr_name), errors.ECODE_INVAL)
 331
 332     self.CheckArguments()
 333
 334   def __GetSSH(self):
 335     """Returns the SshRunner object
 336
 337     """
 338     if not self.__ssh:
 339       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 340     return self.__ssh
 341
 342   ssh = property(fget=__GetSSH)
 343
 344   def CheckArguments(self):
 345     """Check syntactic validity for the opcode arguments.
 346
 347     This method is for doing a simple syntactic check and ensure
 348     validity of opcode parameters, without any cluster-related
 349     checks. While the same can be accomplished in ExpandNames and/or
 350     CheckPrereq, doing these separate is better because:
 351
 352       - ExpandNames is left as as purely a lock-related function
 353       - CheckPrereq is run after we have acquired locks (and possible
 354         waited for them)
 355
 356     The function is allowed to change the self.op attribute so that
 357     later methods can no longer worry about missing parameters.
 358
 359     """
 360     pass
 361
 362   def ExpandNames(self):
 363     """Expand names for this LU.
 364
 365     This method is called before starting to execute the opcode, and it should
 366     update all the parameters of the opcode to their canonical form (e.g. a
 367     short node name must be fully expanded after this method has successfully
 368     completed). This way locking, hooks, logging, ecc. can work correctly.
 369
 370     LUs which implement this method must also populate the self.needed_locks
 371     member, as a dict with lock levels as keys, and a list of needed lock names
 372     as values. Rules:
 373
 374       - use an empty dict if you don't need any lock
 375       - if you don't need any lock at a particular level omit that level
 376       - don't put anything for the BGL level
 377       - if you want all locks at a level use locking.ALL_SET as a value
 378
 379     If you need to share locks (rather than acquire them exclusively) at one
 380     level you can modify self.share_locks, setting a true value (usually 1) for
 381     that level. By default locks are not shared.
 382
 383     This function can also define a list of tasklets, which then will be
 384     executed in order instead of the usual LU-level CheckPrereq and Exec
 385     functions, if those are not defined by the LU.
 386
 387     Examples::
 388
 389       # Acquire all nodes and one instance
 390       self.needed_locks = {
 391         locking.LEVEL_NODE: locking.ALL_SET,
 392         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 393       }
 394       # Acquire just two nodes
 395       self.needed_locks = {
 396         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 397       }
 398       # Acquire no locks
 399       self.needed_locks = {} # No, you can't leave it to the default value None
 400
 401     """
 402     # The implementation of this method is mandatory only if the new LU is
 403     # concurrent, so that old LUs don't need to be changed all at the same
 404     # time.
 405     if self.REQ_BGL:
 406       self.needed_locks = {} # Exclusive LUs don't need locks.
 407     else:
 408       raise NotImplementedError
 409
 410   def DeclareLocks(self, level):
 411     """Declare LU locking needs for a level
 412
 413     While most LUs can just declare their locking needs at ExpandNames time,
 414     sometimes there's the need to calculate some locks after having acquired
 415     the ones before. This function is called just before acquiring locks at a
 416     particular level, but after acquiring the ones at lower levels, and permits
 417     such calculations. It can be used to modify self.needed_locks, and by
 418     default it does nothing.
 419
 420     This function is only called if you have something already set in
 421     self.needed_locks for the level.
 422
 423     @param level: Locking level which is going to be locked
 424     @type level: member of ganeti.locking.LEVELS
 425
 426     """
 427
 428   def CheckPrereq(self):
 429     """Check prerequisites for this LU.
 430
 431     This method should check that the prerequisites for the execution
 432     of this LU are fulfilled. It can do internode communication, but
 433     it should be idempotent - no cluster or system changes are
 434     allowed.
 435
 436     The method should raise errors.OpPrereqError in case something is
 437     not fulfilled. Its return value is ignored.
 438
 439     This method should also update all the parameters of the opcode to
 440     their canonical form if it hasn't been done by ExpandNames before.
 441
 442     """
 443     if self.tasklets is not None:
 444       for (idx, tl) in enumerate(self.tasklets):
 445         logging.debug("Checking prerequisites for tasklet %s/%s",
 446                       idx + 1, len(self.tasklets))
 447         tl.CheckPrereq()
 448     else:
 449       pass
 450
 451   def Exec(self, feedback_fn):
 452     """Execute the LU.
 453
 454     This method should implement the actual work. It should raise
 455     errors.OpExecError for failures that are somewhat dealt with in
 456     code, or expected.
 457
 458     """
 459     if self.tasklets is not None:
 460       for (idx, tl) in enumerate(self.tasklets):
 461         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 462         tl.Exec(feedback_fn)
 463     else:
 464       raise NotImplementedError
 465
 466   def BuildHooksEnv(self):
 467     """Build hooks environment for this LU.
 468
 469     This method should return a three-node tuple consisting of: a dict
 470     containing the environment that will be used for running the
 471     specific hook for this LU, a list of node names on which the hook
 472     should run before the execution, and a list of node names on which
 473     the hook should run after the execution.
 474
 475     The keys of the dict must not have 'GANETI_' prefixed as this will
 476     be handled in the hooks runner. Also note additional keys will be
 477     added by the hooks runner. If the LU doesn't define any
 478     environment, an empty dict (and not None) should be returned.
 479
 480     No nodes should be returned as an empty list (and not None).
 481
 482     Note that if the HPATH for a LU class is None, this function will
 483     not be called.
 484
 485     """
 486     raise NotImplementedError
 487
 488   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 489     """Notify the LU about the results of its hooks.
 490
 491     This method is called every time a hooks phase is executed, and notifies
 492     the Logical Unit about the hooks' result. The LU can then use it to alter
 493     its result based on the hooks.  By default the method does nothing and the
 494     previous result is passed back unchanged but any LU can define it if it
 495     wants to use the local cluster hook-scripts somehow.
 496
 497     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 498         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 499     @param hook_results: the results of the multi-node hooks rpc call
 500     @param feedback_fn: function used send feedback back to the caller
 501     @param lu_result: the previous Exec result this LU had, or None
 502         in the PRE phase
 503     @return: the new Exec result, based on the previous result
 504         and hook results
 505
 506     """
 507     # API must be kept, thus we ignore the unused argument and could
 508     # be a function warnings
 509     # pylint: disable-msg=W0613,R0201
 510     return lu_result
 511
 512   def _ExpandAndLockInstance(self):
 513     """Helper function to expand and lock an instance.
 514
 515     Many LUs that work on an instance take its name in self.op.instance_name
 516     and need to expand it and then declare the expanded name for locking. This
 517     function does it, and then updates self.op.instance_name to the expanded
 518     name. It also initializes needed_locks as a dict, if this hasn't been done
 519     before.
 520
 521     """
 522     if self.needed_locks is None:
 523       self.needed_locks = {}
 524     else:
 525       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 526         "_ExpandAndLockInstance called with instance-level locks set"
 527     self.op.instance_name = _ExpandInstanceName(self.cfg,
 528                                                 self.op.instance_name)
 529     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 530
 531   def _LockInstancesNodes(self, primary_only=False):
 532     """Helper function to declare instances' nodes for locking.
 533
 534     This function should be called after locking one or more instances to lock
 535     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 536     with all primary or secondary nodes for instances already locked and
 537     present in self.needed_locks[locking.LEVEL_INSTANCE].
 538
 539     It should be called from DeclareLocks, and for safety only works if
 540     self.recalculate_locks[locking.LEVEL_NODE] is set.
 541
 542     In the future it may grow parameters to just lock some instance's nodes, or
 543     to just lock primaries or secondary nodes, if needed.
 544
 545     If should be called in DeclareLocks in a way similar to::
 546
 547       if level == locking.LEVEL_NODE:
 548         self._LockInstancesNodes()
 549
 550     @type primary_only: boolean
 551     @param primary_only: only lock primary nodes of locked instances
 552
 553     """
 554     assert locking.LEVEL_NODE in self.recalculate_locks, \
 555       "_LockInstancesNodes helper function called with no nodes to recalculate"
 556
 557     # TODO: check if we're really been called with the instance locks held
 558
 559     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 560     # future we might want to have different behaviors depending on the value
 561     # of self.recalculate_locks[locking.LEVEL_NODE]
 562     wanted_nodes = []
 563     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 564       instance = self.context.cfg.GetInstanceInfo(instance_name)
 565       wanted_nodes.append(instance.primary_node)
 566       if not primary_only:
 567         wanted_nodes.extend(instance.secondary_nodes)
 568
 569     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 570       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 571     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 572       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 573
 574     del self.recalculate_locks[locking.LEVEL_NODE]
 575
 576
 577 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 578   """Simple LU which runs no hooks.
 579
 580   This LU is intended as a parent for other LogicalUnits which will
 581   run no hooks, in order to reduce duplicate code.
 582
 583   """
 584   HPATH = None
 585   HTYPE = None
 586
 587   def BuildHooksEnv(self):
 588     """Empty BuildHooksEnv for NoHooksLu.
 589
 590     This just raises an error.
 591
 592     """
 593     assert False, "BuildHooksEnv called for NoHooksLUs"
 594
 595
 596 class Tasklet:
 597   """Tasklet base class.
 598
 599   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 600   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 601   tasklets know nothing about locks.
 602
 603   Subclasses must follow these rules:
 604     - Implement CheckPrereq
 605     - Implement Exec
 606
 607   """
 608   def __init__(self, lu):
 609     self.lu = lu
 610
 611     # Shortcuts
 612     self.cfg = lu.cfg
 613     self.rpc = lu.rpc
 614
 615   def CheckPrereq(self):
 616     """Check prerequisites for this tasklets.
 617
 618     This method should check whether the prerequisites for the execution of
 619     this tasklet are fulfilled. It can do internode communication, but it
 620     should be idempotent - no cluster or system changes are allowed.
 621
 622     The method should raise errors.OpPrereqError in case something is not
 623     fulfilled. Its return value is ignored.
 624
 625     This method should also update all parameters to their canonical form if it
 626     hasn't been done before.
 627
 628     """
 629     pass
 630
 631   def Exec(self, feedback_fn):
 632     """Execute the tasklet.
 633
 634     This method should implement the actual work. It should raise
 635     errors.OpExecError for failures that are somewhat dealt with in code, or
 636     expected.
 637
 638     """
 639     raise NotImplementedError
 640
 641
 642 def _GetWantedNodes(lu, nodes):
 643   """Returns list of checked and expanded node names.
 644
 645   @type lu: L{LogicalUnit}
 646   @param lu: the logical unit on whose behalf we execute
 647   @type nodes: list
 648   @param nodes: list of node names or None for all nodes
 649   @rtype: list
 650   @return: the list of nodes, sorted
 651   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 652
 653   """
 654   if not nodes:
 655     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 656       " non-empty list of nodes whose name is to be expanded.")
 657
 658   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 659   return utils.NiceSort(wanted)
 660
 661
 662 def _GetWantedInstances(lu, instances):
 663   """Returns list of checked and expanded instance names.
 664
 665   @type lu: L{LogicalUnit}
 666   @param lu: the logical unit on whose behalf we execute
 667   @type instances: list
 668   @param instances: list of instance names or None for all instances
 669   @rtype: list
 670   @return: the list of instances, sorted
 671   @raise errors.OpPrereqError: if the instances parameter is wrong type
 672   @raise errors.OpPrereqError: if any of the passed instances is not found
 673
 674   """
 675   if instances:
 676     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 677   else:
 678     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 679   return wanted
 680
 681
 682 def _GetUpdatedParams(old_params, update_dict,
 683                       use_default=True, use_none=False):
 684   """Return the new version of a parameter dictionary.
 685
 686   @type old_params: dict
 687   @param old_params: old parameters
 688   @type update_dict: dict
 689   @param update_dict: dict containing new parameter values, or
 690       constants.VALUE_DEFAULT to reset the parameter to its default
 691       value
 692   @param use_default: boolean
 693   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 694       values as 'to be deleted' values
 695   @param use_none: boolean
 696   @type use_none: whether to recognise C{None} values as 'to be
 697       deleted' values
 698   @rtype: dict
 699   @return: the new parameter dictionary
 700
 701   """
 702   params_copy = copy.deepcopy(old_params)
 703   for key, val in update_dict.iteritems():
 704     if ((use_default and val == constants.VALUE_DEFAULT) or
 705         (use_none and val is None)):
 706       try:
 707         del params_copy[key]
 708       except KeyError:
 709         pass
 710     else:
 711       params_copy[key] = val
 712   return params_copy
 713
 714
 715 def _CheckOutputFields(static, dynamic, selected):
 716   """Checks whether all selected fields are valid.
 717
 718   @type static: L{utils.FieldSet}
 719   @param static: static fields set
 720   @type dynamic: L{utils.FieldSet}
 721   @param dynamic: dynamic fields set
 722
 723   """
 724   f = utils.FieldSet()
 725   f.Extend(static)
 726   f.Extend(dynamic)
 727
 728   delta = f.NonMatching(selected)
 729   if delta:
 730     raise errors.OpPrereqError("Unknown output fields selected: %s"
 731                                % ",".join(delta), errors.ECODE_INVAL)
 732
 733
 734 def _CheckGlobalHvParams(params):
 735   """Validates that given hypervisor params are not global ones.
 736
 737   This will ensure that instances don't get customised versions of
 738   global params.
 739
 740   """
 741   used_globals = constants.HVC_GLOBALS.intersection(params)
 742   if used_globals:
 743     msg = ("The following hypervisor parameters are global and cannot"
 744            " be customized at instance level, please modify them at"
 745            " cluster level: %s" % utils.CommaJoin(used_globals))
 746     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 747
 748
 749 def _CheckNodeOnline(lu, node):
 750   """Ensure that a given node is online.
 751
 752   @param lu: the LU on behalf of which we make the check
 753   @param node: the node to check
 754   @raise errors.OpPrereqError: if the node is offline
 755
 756   """
 757   if lu.cfg.GetNodeInfo(node).offline:
 758     raise errors.OpPrereqError("Can't use offline node %s" % node,
 759                                errors.ECODE_INVAL)
 760
 761
 762 def _CheckNodeNotDrained(lu, node):
 763   """Ensure that a given node is not drained.
 764
 765   @param lu: the LU on behalf of which we make the check
 766   @param node: the node to check
 767   @raise errors.OpPrereqError: if the node is drained
 768
 769   """
 770   if lu.cfg.GetNodeInfo(node).drained:
 771     raise errors.OpPrereqError("Can't use drained node %s" % node,
 772                                errors.ECODE_INVAL)
 773
 774
 775 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 776   """Ensure that a node supports a given OS.
 777
 778   @param lu: the LU on behalf of which we make the check
 779   @param node: the node to check
 780   @param os_name: the OS to query about
 781   @param force_variant: whether to ignore variant errors
 782   @raise errors.OpPrereqError: if the node is not supporting the OS
 783
 784   """
 785   result = lu.rpc.call_os_get(node, os_name)
 786   result.Raise("OS '%s' not in supported OS list for node %s" %
 787                (os_name, node),
 788                prereq=True, ecode=errors.ECODE_INVAL)
 789   if not force_variant:
 790     _CheckOSVariant(result.payload, os_name)
 791
 792
 793 def _RequireFileStorage():
 794   """Checks that file storage is enabled.
 795
 796   @raise errors.OpPrereqError: when file storage is disabled
 797
 798   """
 799   if not constants.ENABLE_FILE_STORAGE:
 800     raise errors.OpPrereqError("File storage disabled at configure time",
 801                                errors.ECODE_INVAL)
 802
 803
 804 def _CheckDiskTemplate(template):
 805   """Ensure a given disk template is valid.
 806
 807   """
 808   if template not in constants.DISK_TEMPLATES:
 809     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 810            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 811     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 812   if template == constants.DT_FILE:
 813     _RequireFileStorage()
 814   return True
 815
 816
 817 def _CheckStorageType(storage_type):
 818   """Ensure a given storage type is valid.
 819
 820   """
 821   if storage_type not in constants.VALID_STORAGE_TYPES:
 822     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 823                                errors.ECODE_INVAL)
 824   if storage_type == constants.ST_FILE:
 825     _RequireFileStorage()
 826   return True
 827
 828
 829 def _GetClusterDomainSecret():
 830   """Reads the cluster domain secret.
 831
 832   """
 833   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 834                                strict=True)
 835
 836
 837 def _CheckInstanceDown(lu, instance, reason):
 838   """Ensure that an instance is not running."""
 839   if instance.admin_up:
 840     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 841                                (instance.name, reason), errors.ECODE_STATE)
 842
 843   pnode = instance.primary_node
 844   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 845   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 846               prereq=True, ecode=errors.ECODE_ENVIRON)
 847
 848   if instance.name in ins_l.payload:
 849     raise errors.OpPrereqError("Instance %s is running, %s" %
 850                                (instance.name, reason), errors.ECODE_STATE)
 851
 852
 853 def _ExpandItemName(fn, name, kind):
 854   """Expand an item name.
 855
 856   @param fn: the function to use for expansion
 857   @param name: requested item name
 858   @param kind: text description ('Node' or 'Instance')
 859   @return: the resolved (full) name
 860   @raise errors.OpPrereqError: if the item is not found
 861
 862   """
 863   full_name = fn(name)
 864   if full_name is None:
 865     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 866                                errors.ECODE_NOENT)
 867   return full_name
 868
 869
 870 def _ExpandNodeName(cfg, name):
 871   """Wrapper over L{_ExpandItemName} for nodes."""
 872   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 873
 874
 875 def _ExpandInstanceName(cfg, name):
 876   """Wrapper over L{_ExpandItemName} for instance."""
 877   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 878
 879
 880 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 881                           memory, vcpus, nics, disk_template, disks,
 882                           bep, hvp, hypervisor_name):
 883   """Builds instance related env variables for hooks
 884
 885   This builds the hook environment from individual variables.
 886
 887   @type name: string
 888   @param name: the name of the instance
 889   @type primary_node: string
 890   @param primary_node: the name of the instance's primary node
 891   @type secondary_nodes: list
 892   @param secondary_nodes: list of secondary nodes as strings
 893   @type os_type: string
 894   @param os_type: the name of the instance's OS
 895   @type status: boolean
 896   @param status: the should_run status of the instance
 897   @type memory: string
 898   @param memory: the memory size of the instance
 899   @type vcpus: string
 900   @param vcpus: the count of VCPUs the instance has
 901   @type nics: list
 902   @param nics: list of tuples (ip, mac, mode, link) representing
 903       the NICs the instance has
 904   @type disk_template: string
 905   @param disk_template: the disk template of the instance
 906   @type disks: list
 907   @param disks: the list of (size, mode) pairs
 908   @type bep: dict
 909   @param bep: the backend parameters for the instance
 910   @type hvp: dict
 911   @param hvp: the hypervisor parameters for the instance
 912   @type hypervisor_name: string
 913   @param hypervisor_name: the hypervisor for the instance
 914   @rtype: dict
 915   @return: the hook environment for this instance
 916
 917   """
 918   if status:
 919     str_status = "up"
 920   else:
 921     str_status = "down"
 922   env = {
 923     "OP_TARGET": name,
 924     "INSTANCE_NAME": name,
 925     "INSTANCE_PRIMARY": primary_node,
 926     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 927     "INSTANCE_OS_TYPE": os_type,
 928     "INSTANCE_STATUS": str_status,
 929     "INSTANCE_MEMORY": memory,
 930     "INSTANCE_VCPUS": vcpus,
 931     "INSTANCE_DISK_TEMPLATE": disk_template,
 932     "INSTANCE_HYPERVISOR": hypervisor_name,
 933   }
 934
 935   if nics:
 936     nic_count = len(nics)
 937     for idx, (ip, mac, mode, link) in enumerate(nics):
 938       if ip is None:
 939         ip = ""
 940       env["INSTANCE_NIC%d_IP" % idx] = ip
 941       env["INSTANCE_NIC%d_MAC" % idx] = mac
 942       env["INSTANCE_NIC%d_MODE" % idx] = mode
 943       env["INSTANCE_NIC%d_LINK" % idx] = link
 944       if mode == constants.NIC_MODE_BRIDGED:
 945         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 946   else:
 947     nic_count = 0
 948
 949   env["INSTANCE_NIC_COUNT"] = nic_count
 950
 951   if disks:
 952     disk_count = len(disks)
 953     for idx, (size, mode) in enumerate(disks):
 954       env["INSTANCE_DISK%d_SIZE" % idx] = size
 955       env["INSTANCE_DISK%d_MODE" % idx] = mode
 956   else:
 957     disk_count = 0
 958
 959   env["INSTANCE_DISK_COUNT"] = disk_count
 960
 961   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 962     for key, value in source.items():
 963       env["INSTANCE_%s_%s" % (kind, key)] = value
 964
 965   return env
 966
 967
 968 def _NICListToTuple(lu, nics):
 969   """Build a list of nic information tuples.
 970
 971   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 972   value in LUQueryInstanceData.
 973
 974   @type lu:  L{LogicalUnit}
 975   @param lu: the logical unit on whose behalf we execute
 976   @type nics: list of L{objects.NIC}
 977   @param nics: list of nics to convert to hooks tuples
 978
 979   """
 980   hooks_nics = []
 981   cluster = lu.cfg.GetClusterInfo()
 982   for nic in nics:
 983     ip = nic.ip
 984     mac = nic.mac
 985     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 986     mode = filled_params[constants.NIC_MODE]
 987     link = filled_params[constants.NIC_LINK]
 988     hooks_nics.append((ip, mac, mode, link))
 989   return hooks_nics
 990
 991
 992 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 993   """Builds instance related env variables for hooks from an object.
 994
 995   @type lu: L{LogicalUnit}
 996   @param lu: the logical unit on whose behalf we execute
 997   @type instance: L{objects.Instance}
 998   @param instance: the instance for which we should build the
 999       environment
1000   @type override: dict
1001   @param override: dictionary with key/values that will override
1002       our values
1003   @rtype: dict
1004   @return: the hook environment dictionary
1005
1006   """
1007   cluster = lu.cfg.GetClusterInfo()
1008   bep = cluster.FillBE(instance)
1009   hvp = cluster.FillHV(instance)
1010   args = {
1011     'name': instance.name,
1012     'primary_node': instance.primary_node,
1013     'secondary_nodes': instance.secondary_nodes,
1014     'os_type': instance.os,
1015     'status': instance.admin_up,
1016     'memory': bep[constants.BE_MEMORY],
1017     'vcpus': bep[constants.BE_VCPUS],
1018     'nics': _NICListToTuple(lu, instance.nics),
1019     'disk_template': instance.disk_template,
1020     'disks': [(disk.size, disk.mode) for disk in instance.disks],
1021     'bep': bep,
1022     'hvp': hvp,
1023     'hypervisor_name': instance.hypervisor,
1024   }
1025   if override:
1026     args.update(override)
1027   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1028
1029
1030 def _AdjustCandidatePool(lu, exceptions):
1031   """Adjust the candidate pool after node operations.
1032
1033   """
1034   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1035   if mod_list:
1036     lu.LogInfo("Promoted nodes to master candidate role: %s",
1037                utils.CommaJoin(node.name for node in mod_list))
1038     for name in mod_list:
1039       lu.context.ReaddNode(name)
1040   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1041   if mc_now > mc_max:
1042     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1043                (mc_now, mc_max))
1044
1045
1046 def _DecideSelfPromotion(lu, exceptions=None):
1047   """Decide whether I should promote myself as a master candidate.
1048
1049   """
1050   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1051   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1052   # the new node will increase mc_max with one, so:
1053   mc_should = min(mc_should + 1, cp_size)
1054   return mc_now < mc_should
1055
1056
1057 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1058   """Check that the brigdes needed by a list of nics exist.
1059
1060   """
1061   cluster = lu.cfg.GetClusterInfo()
1062   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1063   brlist = [params[constants.NIC_LINK] for params in paramslist
1064             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1065   if brlist:
1066     result = lu.rpc.call_bridges_exist(target_node, brlist)
1067     result.Raise("Error checking bridges on destination node '%s'" %
1068                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1069
1070
1071 def _CheckInstanceBridgesExist(lu, instance, node=None):
1072   """Check that the brigdes needed by an instance exist.
1073
1074   """
1075   if node is None:
1076     node = instance.primary_node
1077   _CheckNicsBridgesExist(lu, instance.nics, node)
1078
1079
1080 def _CheckOSVariant(os_obj, name):
1081   """Check whether an OS name conforms to the os variants specification.
1082
1083   @type os_obj: L{objects.OS}
1084   @param os_obj: OS object to check
1085   @type name: string
1086   @param name: OS name passed by the user, to check for validity
1087
1088   """
1089   if not os_obj.supported_variants:
1090     return
1091   try:
1092     variant = name.split("+", 1)[1]
1093   except IndexError:
1094     raise errors.OpPrereqError("OS name must include a variant",
1095                                errors.ECODE_INVAL)
1096
1097   if variant not in os_obj.supported_variants:
1098     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1099
1100
1101 def _GetNodeInstancesInner(cfg, fn):
1102   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1103
1104
1105 def _GetNodeInstances(cfg, node_name):
1106   """Returns a list of all primary and secondary instances on a node.
1107
1108   """
1109
1110   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1111
1112
1113 def _GetNodePrimaryInstances(cfg, node_name):
1114   """Returns primary instances on a node.
1115
1116   """
1117   return _GetNodeInstancesInner(cfg,
1118                                 lambda inst: node_name == inst.primary_node)
1119
1120
1121 def _GetNodeSecondaryInstances(cfg, node_name):
1122   """Returns secondary instances on a node.
1123
1124   """
1125   return _GetNodeInstancesInner(cfg,
1126                                 lambda inst: node_name in inst.secondary_nodes)
1127
1128
1129 def _GetStorageTypeArgs(cfg, storage_type):
1130   """Returns the arguments for a storage type.
1131
1132   """
1133   # Special case for file storage
1134   if storage_type == constants.ST_FILE:
1135     # storage.FileStorage wants a list of storage directories
1136     return [[cfg.GetFileStorageDir()]]
1137
1138   return []
1139
1140
1141 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1142   faulty = []
1143
1144   for dev in instance.disks:
1145     cfg.SetDiskID(dev, node_name)
1146
1147   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1148   result.Raise("Failed to get disk status from node %s" % node_name,
1149                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1150
1151   for idx, bdev_status in enumerate(result.payload):
1152     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1153       faulty.append(idx)
1154
1155   return faulty
1156
1157
1158 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1159   """Check the sanity of iallocator and node arguments and use the
1160   cluster-wide iallocator if appropriate.
1161
1162   Check that at most one of (iallocator, node) is specified. If none is
1163   specified, then the LU's opcode's iallocator slot is filled with the
1164   cluster-wide default iallocator.
1165
1166   @type iallocator_slot: string
1167   @param iallocator_slot: the name of the opcode iallocator slot
1168   @type node_slot: string
1169   @param node_slot: the name of the opcode target node slot
1170
1171   """
1172   node = getattr(lu.op, node_slot, None)
1173   iallocator = getattr(lu.op, iallocator_slot, None)
1174
1175   if node is not None and iallocator is not None:
1176     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1177                                errors.ECODE_INVAL)
1178   elif node is None and iallocator is None:
1179     default_iallocator = lu.cfg.GetDefaultIAllocator()
1180     if default_iallocator:
1181       setattr(lu.op, iallocator_slot, default_iallocator)
1182     else:
1183       raise errors.OpPrereqError("No iallocator or node given and no"
1184                                  " cluster-wide default iallocator found."
1185                                  " Please specify either an iallocator or a"
1186                                  " node, or set a cluster-wide default"
1187                                  " iallocator.")
1188
1189
1190 class LUPostInitCluster(LogicalUnit):
1191   """Logical unit for running hooks after cluster initialization.
1192
1193   """
1194   HPATH = "cluster-init"
1195   HTYPE = constants.HTYPE_CLUSTER
1196
1197   def BuildHooksEnv(self):
1198     """Build hooks env.
1199
1200     """
1201     env = {"OP_TARGET": self.cfg.GetClusterName()}
1202     mn = self.cfg.GetMasterNode()
1203     return env, [], [mn]
1204
1205   def Exec(self, feedback_fn):
1206     """Nothing to do.
1207
1208     """
1209     return True
1210
1211
1212 class LUDestroyCluster(LogicalUnit):
1213   """Logical unit for destroying the cluster.
1214
1215   """
1216   HPATH = "cluster-destroy"
1217   HTYPE = constants.HTYPE_CLUSTER
1218
1219   def BuildHooksEnv(self):
1220     """Build hooks env.
1221
1222     """
1223     env = {"OP_TARGET": self.cfg.GetClusterName()}
1224     return env, [], []
1225
1226   def CheckPrereq(self):
1227     """Check prerequisites.
1228
1229     This checks whether the cluster is empty.
1230
1231     Any errors are signaled by raising errors.OpPrereqError.
1232
1233     """
1234     master = self.cfg.GetMasterNode()
1235
1236     nodelist = self.cfg.GetNodeList()
1237     if len(nodelist) != 1 or nodelist[0] != master:
1238       raise errors.OpPrereqError("There are still %d node(s) in"
1239                                  " this cluster." % (len(nodelist) - 1),
1240                                  errors.ECODE_INVAL)
1241     instancelist = self.cfg.GetInstanceList()
1242     if instancelist:
1243       raise errors.OpPrereqError("There are still %d instance(s) in"
1244                                  " this cluster." % len(instancelist),
1245                                  errors.ECODE_INVAL)
1246
1247   def Exec(self, feedback_fn):
1248     """Destroys the cluster.
1249
1250     """
1251     master = self.cfg.GetMasterNode()
1252     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1253
1254     # Run post hooks on master node before it's removed
1255     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1256     try:
1257       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1258     except:
1259       # pylint: disable-msg=W0702
1260       self.LogWarning("Errors occurred running hooks on %s" % master)
1261
1262     result = self.rpc.call_node_stop_master(master, False)
1263     result.Raise("Could not disable the master role")
1264
1265     if modify_ssh_setup:
1266       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1267       utils.CreateBackup(priv_key)
1268       utils.CreateBackup(pub_key)
1269
1270     return master
1271
1272
1273 def _VerifyCertificate(filename):
1274   """Verifies a certificate for LUVerifyCluster.
1275
1276   @type filename: string
1277   @param filename: Path to PEM file
1278
1279   """
1280   try:
1281     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1282                                            utils.ReadFile(filename))
1283   except Exception, err: # pylint: disable-msg=W0703
1284     return (LUVerifyCluster.ETYPE_ERROR,
1285             "Failed to load X509 certificate %s: %s" % (filename, err))
1286
1287   (errcode, msg) = \
1288     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1289                                 constants.SSL_CERT_EXPIRATION_ERROR)
1290
1291   if msg:
1292     fnamemsg = "While verifying %s: %s" % (filename, msg)
1293   else:
1294     fnamemsg = None
1295
1296   if errcode is None:
1297     return (None, fnamemsg)
1298   elif errcode == utils.CERT_WARNING:
1299     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1300   elif errcode == utils.CERT_ERROR:
1301     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1302
1303   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1304
1305
1306 class LUVerifyCluster(LogicalUnit):
1307   """Verifies the cluster status.
1308
1309   """
1310   HPATH = "cluster-verify"
1311   HTYPE = constants.HTYPE_CLUSTER
1312   _OP_PARAMS = [
1313     ("skip_checks", _EmptyList,
1314      _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1315     ("verbose", False, _TBool),
1316     ("error_codes", False, _TBool),
1317     ("debug_simulate_errors", False, _TBool),
1318     ]
1319   REQ_BGL = False
1320
1321   TCLUSTER = "cluster"
1322   TNODE = "node"
1323   TINSTANCE = "instance"
1324
1325   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1326   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1327   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1328   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1329   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1330   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1331   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1332   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1333   ENODEDRBD = (TNODE, "ENODEDRBD")
1334   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1335   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1336   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1337   ENODEHV = (TNODE, "ENODEHV")
1338   ENODELVM = (TNODE, "ENODELVM")
1339   ENODEN1 = (TNODE, "ENODEN1")
1340   ENODENET = (TNODE, "ENODENET")
1341   ENODEOS = (TNODE, "ENODEOS")
1342   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1343   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1344   ENODERPC = (TNODE, "ENODERPC")
1345   ENODESSH = (TNODE, "ENODESSH")
1346   ENODEVERSION = (TNODE, "ENODEVERSION")
1347   ENODESETUP = (TNODE, "ENODESETUP")
1348   ENODETIME = (TNODE, "ENODETIME")
1349
1350   ETYPE_FIELD = "code"
1351   ETYPE_ERROR = "ERROR"
1352   ETYPE_WARNING = "WARNING"
1353
1354   class NodeImage(object):
1355     """A class representing the logical and physical status of a node.
1356
1357     @type name: string
1358     @ivar name: the node name to which this object refers
1359     @ivar volumes: a structure as returned from
1360         L{ganeti.backend.GetVolumeList} (runtime)
1361     @ivar instances: a list of running instances (runtime)
1362     @ivar pinst: list of configured primary instances (config)
1363     @ivar sinst: list of configured secondary instances (config)
1364     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1365         of this node (config)
1366     @ivar mfree: free memory, as reported by hypervisor (runtime)
1367     @ivar dfree: free disk, as reported by the node (runtime)
1368     @ivar offline: the offline status (config)
1369     @type rpc_fail: boolean
1370     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1371         not whether the individual keys were correct) (runtime)
1372     @type lvm_fail: boolean
1373     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1374     @type hyp_fail: boolean
1375     @ivar hyp_fail: whether the RPC call didn't return the instance list
1376     @type ghost: boolean
1377     @ivar ghost: whether this is a known node or not (config)
1378     @type os_fail: boolean
1379     @ivar os_fail: whether the RPC call didn't return valid OS data
1380     @type oslist: list
1381     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1382
1383     """
1384     def __init__(self, offline=False, name=None):
1385       self.name = name
1386       self.volumes = {}
1387       self.instances = []
1388       self.pinst = []
1389       self.sinst = []
1390       self.sbp = {}
1391       self.mfree = 0
1392       self.dfree = 0
1393       self.offline = offline
1394       self.rpc_fail = False
1395       self.lvm_fail = False
1396       self.hyp_fail = False
1397       self.ghost = False
1398       self.os_fail = False
1399       self.oslist = {}
1400
1401   def ExpandNames(self):
1402     self.needed_locks = {
1403       locking.LEVEL_NODE: locking.ALL_SET,
1404       locking.LEVEL_INSTANCE: locking.ALL_SET,
1405     }
1406     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1407
1408   def _Error(self, ecode, item, msg, *args, **kwargs):
1409     """Format an error message.
1410
1411     Based on the opcode's error_codes parameter, either format a
1412     parseable error code, or a simpler error string.
1413
1414     This must be called only from Exec and functions called from Exec.
1415
1416     """
1417     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1418     itype, etxt = ecode
1419     # first complete the msg
1420     if args:
1421       msg = msg % args
1422     # then format the whole message
1423     if self.op.error_codes:
1424       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1425     else:
1426       if item:
1427         item = " " + item
1428       else:
1429         item = ""
1430       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1431     # and finally report it via the feedback_fn
1432     self._feedback_fn("  - %s" % msg)
1433
1434   def _ErrorIf(self, cond, *args, **kwargs):
1435     """Log an error message if the passed condition is True.
1436
1437     """
1438     cond = bool(cond) or self.op.debug_simulate_errors
1439     if cond:
1440       self._Error(*args, **kwargs)
1441     # do not mark the operation as failed for WARN cases only
1442     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1443       self.bad = self.bad or cond
1444
1445   def _VerifyNode(self, ninfo, nresult):
1446     """Perform some basic validation on data returned from a node.
1447
1448       - check the result data structure is well formed and has all the
1449         mandatory fields
1450       - check ganeti version
1451
1452     @type ninfo: L{objects.Node}
1453     @param ninfo: the node to check
1454     @param nresult: the results from the node
1455     @rtype: boolean
1456     @return: whether overall this call was successful (and we can expect
1457          reasonable values in the respose)
1458
1459     """
1460     node = ninfo.name
1461     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1462
1463     # main result, nresult should be a non-empty dict
1464     test = not nresult or not isinstance(nresult, dict)
1465     _ErrorIf(test, self.ENODERPC, node,
1466                   "unable to verify node: no data returned")
1467     if test:
1468       return False
1469
1470     # compares ganeti version
1471     local_version = constants.PROTOCOL_VERSION
1472     remote_version = nresult.get("version", None)
1473     test = not (remote_version and
1474                 isinstance(remote_version, (list, tuple)) and
1475                 len(remote_version) == 2)
1476     _ErrorIf(test, self.ENODERPC, node,
1477              "connection to node returned invalid data")
1478     if test:
1479       return False
1480
1481     test = local_version != remote_version[0]
1482     _ErrorIf(test, self.ENODEVERSION, node,
1483              "incompatible protocol versions: master %s,"
1484              " node %s", local_version, remote_version[0])
1485     if test:
1486       return False
1487
1488     # node seems compatible, we can actually try to look into its results
1489
1490     # full package version
1491     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1492                   self.ENODEVERSION, node,
1493                   "software version mismatch: master %s, node %s",
1494                   constants.RELEASE_VERSION, remote_version[1],
1495                   code=self.ETYPE_WARNING)
1496
1497     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1498     if isinstance(hyp_result, dict):
1499       for hv_name, hv_result in hyp_result.iteritems():
1500         test = hv_result is not None
1501         _ErrorIf(test, self.ENODEHV, node,
1502                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1503
1504
1505     test = nresult.get(constants.NV_NODESETUP,
1506                            ["Missing NODESETUP results"])
1507     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1508              "; ".join(test))
1509
1510     return True
1511
1512   def _VerifyNodeTime(self, ninfo, nresult,
1513                       nvinfo_starttime, nvinfo_endtime):
1514     """Check the node time.
1515
1516     @type ninfo: L{objects.Node}
1517     @param ninfo: the node to check
1518     @param nresult: the remote results for the node
1519     @param nvinfo_starttime: the start time of the RPC call
1520     @param nvinfo_endtime: the end time of the RPC call
1521
1522     """
1523     node = ninfo.name
1524     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1525
1526     ntime = nresult.get(constants.NV_TIME, None)
1527     try:
1528       ntime_merged = utils.MergeTime(ntime)
1529     except (ValueError, TypeError):
1530       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1531       return
1532
1533     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1534       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1535     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1536       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1537     else:
1538       ntime_diff = None
1539
1540     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1541              "Node time diverges by at least %s from master node time",
1542              ntime_diff)
1543
1544   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1545     """Check the node time.
1546
1547     @type ninfo: L{objects.Node}
1548     @param ninfo: the node to check
1549     @param nresult: the remote results for the node
1550     @param vg_name: the configured VG name
1551
1552     """
1553     if vg_name is None:
1554       return
1555
1556     node = ninfo.name
1557     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1558
1559     # checks vg existence and size > 20G
1560     vglist = nresult.get(constants.NV_VGLIST, None)
1561     test = not vglist
1562     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1563     if not test:
1564       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1565                                             constants.MIN_VG_SIZE)
1566       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1567
1568     # check pv names
1569     pvlist = nresult.get(constants.NV_PVLIST, None)
1570     test = pvlist is None
1571     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1572     if not test:
1573       # check that ':' is not present in PV names, since it's a
1574       # special character for lvcreate (denotes the range of PEs to
1575       # use on the PV)
1576       for _, pvname, owner_vg in pvlist:
1577         test = ":" in pvname
1578         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1579                  " '%s' of VG '%s'", pvname, owner_vg)
1580
1581   def _VerifyNodeNetwork(self, ninfo, nresult):
1582     """Check the node time.
1583
1584     @type ninfo: L{objects.Node}
1585     @param ninfo: the node to check
1586     @param nresult: the remote results for the node
1587
1588     """
1589     node = ninfo.name
1590     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1591
1592     test = constants.NV_NODELIST not in nresult
1593     _ErrorIf(test, self.ENODESSH, node,
1594              "node hasn't returned node ssh connectivity data")
1595     if not test:
1596       if nresult[constants.NV_NODELIST]:
1597         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1598           _ErrorIf(True, self.ENODESSH, node,
1599                    "ssh communication with node '%s': %s", a_node, a_msg)
1600
1601     test = constants.NV_NODENETTEST not in nresult
1602     _ErrorIf(test, self.ENODENET, node,
1603              "node hasn't returned node tcp connectivity data")
1604     if not test:
1605       if nresult[constants.NV_NODENETTEST]:
1606         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1607         for anode in nlist:
1608           _ErrorIf(True, self.ENODENET, node,
1609                    "tcp communication with node '%s': %s",
1610                    anode, nresult[constants.NV_NODENETTEST][anode])
1611
1612     test = constants.NV_MASTERIP not in nresult
1613     _ErrorIf(test, self.ENODENET, node,
1614              "node hasn't returned node master IP reachability data")
1615     if not test:
1616       if not nresult[constants.NV_MASTERIP]:
1617         if node == self.master_node:
1618           msg = "the master node cannot reach the master IP (not configured?)"
1619         else:
1620           msg = "cannot reach the master IP"
1621         _ErrorIf(True, self.ENODENET, node, msg)
1622
1623
1624   def _VerifyInstance(self, instance, instanceconfig, node_image):
1625     """Verify an instance.
1626
1627     This function checks to see if the required block devices are
1628     available on the instance's node.
1629
1630     """
1631     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1632     node_current = instanceconfig.primary_node
1633
1634     node_vol_should = {}
1635     instanceconfig.MapLVsByNode(node_vol_should)
1636
1637     for node in node_vol_should:
1638       n_img = node_image[node]
1639       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1640         # ignore missing volumes on offline or broken nodes
1641         continue
1642       for volume in node_vol_should[node]:
1643         test = volume not in n_img.volumes
1644         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1645                  "volume %s missing on node %s", volume, node)
1646
1647     if instanceconfig.admin_up:
1648       pri_img = node_image[node_current]
1649       test = instance not in pri_img.instances and not pri_img.offline
1650       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1651                "instance not running on its primary node %s",
1652                node_current)
1653
1654     for node, n_img in node_image.items():
1655       if (not node == node_current):
1656         test = instance in n_img.instances
1657         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1658                  "instance should not run on node %s", node)
1659
1660   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1661     """Verify if there are any unknown volumes in the cluster.
1662
1663     The .os, .swap and backup volumes are ignored. All other volumes are
1664     reported as unknown.
1665
1666     @type reserved: L{ganeti.utils.FieldSet}
1667     @param reserved: a FieldSet of reserved volume names
1668
1669     """
1670     for node, n_img in node_image.items():
1671       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1672         # skip non-healthy nodes
1673         continue
1674       for volume in n_img.volumes:
1675         test = ((node not in node_vol_should or
1676                 volume not in node_vol_should[node]) and
1677                 not reserved.Matches(volume))
1678         self._ErrorIf(test, self.ENODEORPHANLV, node,
1679                       "volume %s is unknown", volume)
1680
1681   def _VerifyOrphanInstances(self, instancelist, node_image):
1682     """Verify the list of running instances.
1683
1684     This checks what instances are running but unknown to the cluster.
1685
1686     """
1687     for node, n_img in node_image.items():
1688       for o_inst in n_img.instances:
1689         test = o_inst not in instancelist
1690         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1691                       "instance %s on node %s should not exist", o_inst, node)
1692
1693   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1694     """Verify N+1 Memory Resilience.
1695
1696     Check that if one single node dies we can still start all the
1697     instances it was primary for.
1698
1699     """
1700     for node, n_img in node_image.items():
1701       # This code checks that every node which is now listed as
1702       # secondary has enough memory to host all instances it is
1703       # supposed to should a single other node in the cluster fail.
1704       # FIXME: not ready for failover to an arbitrary node
1705       # FIXME: does not support file-backed instances
1706       # WARNING: we currently take into account down instances as well
1707       # as up ones, considering that even if they're down someone
1708       # might want to start them even in the event of a node failure.
1709       for prinode, instances in n_img.sbp.items():
1710         needed_mem = 0
1711         for instance in instances:
1712           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1713           if bep[constants.BE_AUTO_BALANCE]:
1714             needed_mem += bep[constants.BE_MEMORY]
1715         test = n_img.mfree < needed_mem
1716         self._ErrorIf(test, self.ENODEN1, node,
1717                       "not enough memory on to accommodate"
1718                       " failovers should peer node %s fail", prinode)
1719
1720   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1721                        master_files):
1722     """Verifies and computes the node required file checksums.
1723
1724     @type ninfo: L{objects.Node}
1725     @param ninfo: the node to check
1726     @param nresult: the remote results for the node
1727     @param file_list: required list of files
1728     @param local_cksum: dictionary of local files and their checksums
1729     @param master_files: list of files that only masters should have
1730
1731     """
1732     node = ninfo.name
1733     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1734
1735     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1736     test = not isinstance(remote_cksum, dict)
1737     _ErrorIf(test, self.ENODEFILECHECK, node,
1738              "node hasn't returned file checksum data")
1739     if test:
1740       return
1741
1742     for file_name in file_list:
1743       node_is_mc = ninfo.master_candidate
1744       must_have = (file_name not in master_files) or node_is_mc
1745       # missing
1746       test1 = file_name not in remote_cksum
1747       # invalid checksum
1748       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1749       # existing and good
1750       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1751       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1752                "file '%s' missing", file_name)
1753       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1754                "file '%s' has wrong checksum", file_name)
1755       # not candidate and this is not a must-have file
1756       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1757                "file '%s' should not exist on non master"
1758                " candidates (and the file is outdated)", file_name)
1759       # all good, except non-master/non-must have combination
1760       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1761                "file '%s' should not exist"
1762                " on non master candidates", file_name)
1763
1764   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1765                       drbd_map):
1766     """Verifies and the node DRBD status.
1767
1768     @type ninfo: L{objects.Node}
1769     @param ninfo: the node to check
1770     @param nresult: the remote results for the node
1771     @param instanceinfo: the dict of instances
1772     @param drbd_helper: the configured DRBD usermode helper
1773     @param drbd_map: the DRBD map as returned by
1774         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1775
1776     """
1777     node = ninfo.name
1778     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1779
1780     if drbd_helper:
1781       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1782       test = (helper_result == None)
1783       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1784                "no drbd usermode helper returned")
1785       if helper_result:
1786         status, payload = helper_result
1787         test = not status
1788         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1789                  "drbd usermode helper check unsuccessful: %s", payload)
1790         test = status and (payload != drbd_helper)
1791         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1792                  "wrong drbd usermode helper: %s", payload)
1793
1794     # compute the DRBD minors
1795     node_drbd = {}
1796     for minor, instance in drbd_map[node].items():
1797       test = instance not in instanceinfo
1798       _ErrorIf(test, self.ECLUSTERCFG, None,
1799                "ghost instance '%s' in temporary DRBD map", instance)
1800         # ghost instance should not be running, but otherwise we
1801         # don't give double warnings (both ghost instance and
1802         # unallocated minor in use)
1803       if test:
1804         node_drbd[minor] = (instance, False)
1805       else:
1806         instance = instanceinfo[instance]
1807         node_drbd[minor] = (instance.name, instance.admin_up)
1808
1809     # and now check them
1810     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1811     test = not isinstance(used_minors, (tuple, list))
1812     _ErrorIf(test, self.ENODEDRBD, node,
1813              "cannot parse drbd status file: %s", str(used_minors))
1814     if test:
1815       # we cannot check drbd status
1816       return
1817
1818     for minor, (iname, must_exist) in node_drbd.items():
1819       test = minor not in used_minors and must_exist
1820       _ErrorIf(test, self.ENODEDRBD, node,
1821                "drbd minor %d of instance %s is not active", minor, iname)
1822     for minor in used_minors:
1823       test = minor not in node_drbd
1824       _ErrorIf(test, self.ENODEDRBD, node,
1825                "unallocated drbd minor %d is in use", minor)
1826
1827   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1828     """Builds the node OS structures.
1829
1830     @type ninfo: L{objects.Node}
1831     @param ninfo: the node to check
1832     @param nresult: the remote results for the node
1833     @param nimg: the node image object
1834
1835     """
1836     node = ninfo.name
1837     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1838
1839     remote_os = nresult.get(constants.NV_OSLIST, None)
1840     test = (not isinstance(remote_os, list) or
1841             not compat.all(isinstance(v, list) and len(v) == 7
1842                            for v in remote_os))
1843
1844     _ErrorIf(test, self.ENODEOS, node,
1845              "node hasn't returned valid OS data")
1846
1847     nimg.os_fail = test
1848
1849     if test:
1850       return
1851
1852     os_dict = {}
1853
1854     for (name, os_path, status, diagnose,
1855          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1856
1857       if name not in os_dict:
1858         os_dict[name] = []
1859
1860       # parameters is a list of lists instead of list of tuples due to
1861       # JSON lacking a real tuple type, fix it:
1862       parameters = [tuple(v) for v in parameters]
1863       os_dict[name].append((os_path, status, diagnose,
1864                             set(variants), set(parameters), set(api_ver)))
1865
1866     nimg.oslist = os_dict
1867
1868   def _VerifyNodeOS(self, ninfo, nimg, base):
1869     """Verifies the node OS list.
1870
1871     @type ninfo: L{objects.Node}
1872     @param ninfo: the node to check
1873     @param nimg: the node image object
1874     @param base: the 'template' node we match against (e.g. from the master)
1875
1876     """
1877     node = ninfo.name
1878     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1879
1880     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1881
1882     for os_name, os_data in nimg.oslist.items():
1883       assert os_data, "Empty OS status for OS %s?!" % os_name
1884       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1885       _ErrorIf(not f_status, self.ENODEOS, node,
1886                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1887       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1888                "OS '%s' has multiple entries (first one shadows the rest): %s",
1889                os_name, utils.CommaJoin([v[0] for v in os_data]))
1890       # this will catched in backend too
1891       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1892                and not f_var, self.ENODEOS, node,
1893                "OS %s with API at least %d does not declare any variant",
1894                os_name, constants.OS_API_V15)
1895       # comparisons with the 'base' image
1896       test = os_name not in base.oslist
1897       _ErrorIf(test, self.ENODEOS, node,
1898                "Extra OS %s not present on reference node (%s)",
1899                os_name, base.name)
1900       if test:
1901         continue
1902       assert base.oslist[os_name], "Base node has empty OS status?"
1903       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1904       if not b_status:
1905         # base OS is invalid, skipping
1906         continue
1907       for kind, a, b in [("API version", f_api, b_api),
1908                          ("variants list", f_var, b_var),
1909                          ("parameters", f_param, b_param)]:
1910         _ErrorIf(a != b, self.ENODEOS, node,
1911                  "OS %s %s differs from reference node %s: %s vs. %s",
1912                  kind, os_name, base.name,
1913                  utils.CommaJoin(a), utils.CommaJoin(b))
1914
1915     # check any missing OSes
1916     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1917     _ErrorIf(missing, self.ENODEOS, node,
1918              "OSes present on reference node %s but missing on this node: %s",
1919              base.name, utils.CommaJoin(missing))
1920
1921   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1922     """Verifies and updates the node volume data.
1923
1924     This function will update a L{NodeImage}'s internal structures
1925     with data from the remote call.
1926
1927     @type ninfo: L{objects.Node}
1928     @param ninfo: the node to check
1929     @param nresult: the remote results for the node
1930     @param nimg: the node image object
1931     @param vg_name: the configured VG name
1932
1933     """
1934     node = ninfo.name
1935     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1936
1937     nimg.lvm_fail = True
1938     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1939     if vg_name is None:
1940       pass
1941     elif isinstance(lvdata, basestring):
1942       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1943                utils.SafeEncode(lvdata))
1944     elif not isinstance(lvdata, dict):
1945       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1946     else:
1947       nimg.volumes = lvdata
1948       nimg.lvm_fail = False
1949
1950   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1951     """Verifies and updates the node instance list.
1952
1953     If the listing was successful, then updates this node's instance
1954     list. Otherwise, it marks the RPC call as failed for the instance
1955     list key.
1956
1957     @type ninfo: L{objects.Node}
1958     @param ninfo: the node to check
1959     @param nresult: the remote results for the node
1960     @param nimg: the node image object
1961
1962     """
1963     idata = nresult.get(constants.NV_INSTANCELIST, None)
1964     test = not isinstance(idata, list)
1965     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1966                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1967     if test:
1968       nimg.hyp_fail = True
1969     else:
1970       nimg.instances = idata
1971
1972   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1973     """Verifies and computes a node information map
1974
1975     @type ninfo: L{objects.Node}
1976     @param ninfo: the node to check
1977     @param nresult: the remote results for the node
1978     @param nimg: the node image object
1979     @param vg_name: the configured VG name
1980
1981     """
1982     node = ninfo.name
1983     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1984
1985     # try to read free memory (from the hypervisor)
1986     hv_info = nresult.get(constants.NV_HVINFO, None)
1987     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1988     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1989     if not test:
1990       try:
1991         nimg.mfree = int(hv_info["memory_free"])
1992       except (ValueError, TypeError):
1993         _ErrorIf(True, self.ENODERPC, node,
1994                  "node returned invalid nodeinfo, check hypervisor")
1995
1996     # FIXME: devise a free space model for file based instances as well
1997     if vg_name is not None:
1998       test = (constants.NV_VGLIST not in nresult or
1999               vg_name not in nresult[constants.NV_VGLIST])
2000       _ErrorIf(test, self.ENODELVM, node,
2001                "node didn't return data for the volume group '%s'"
2002                " - it is either missing or broken", vg_name)
2003       if not test:
2004         try:
2005           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2006         except (ValueError, TypeError):
2007           _ErrorIf(True, self.ENODERPC, node,
2008                    "node returned invalid LVM info, check LVM status")
2009
2010   def BuildHooksEnv(self):
2011     """Build hooks env.
2012
2013     Cluster-Verify hooks just ran in the post phase and their failure makes
2014     the output be logged in the verify output and the verification to fail.
2015
2016     """
2017     all_nodes = self.cfg.GetNodeList()
2018     env = {
2019       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2020       }
2021     for node in self.cfg.GetAllNodesInfo().values():
2022       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2023
2024     return env, [], all_nodes
2025
2026   def Exec(self, feedback_fn):
2027     """Verify integrity of cluster, performing various test on nodes.
2028
2029     """
2030     self.bad = False
2031     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2032     verbose = self.op.verbose
2033     self._feedback_fn = feedback_fn
2034     feedback_fn("* Verifying global settings")
2035     for msg in self.cfg.VerifyConfig():
2036       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2037
2038     # Check the cluster certificates
2039     for cert_filename in constants.ALL_CERT_FILES:
2040       (errcode, msg) = _VerifyCertificate(cert_filename)
2041       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2042
2043     vg_name = self.cfg.GetVGName()
2044     drbd_helper = self.cfg.GetDRBDHelper()
2045     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2046     cluster = self.cfg.GetClusterInfo()
2047     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2048     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2049     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2050     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2051                         for iname in instancelist)
2052     i_non_redundant = [] # Non redundant instances
2053     i_non_a_balanced = [] # Non auto-balanced instances
2054     n_offline = 0 # Count of offline nodes
2055     n_drained = 0 # Count of nodes being drained
2056     node_vol_should = {}
2057
2058     # FIXME: verify OS list
2059     # do local checksums
2060     master_files = [constants.CLUSTER_CONF_FILE]
2061     master_node = self.master_node = self.cfg.GetMasterNode()
2062     master_ip = self.cfg.GetMasterIP()
2063
2064     file_names = ssconf.SimpleStore().GetFileList()
2065     file_names.extend(constants.ALL_CERT_FILES)
2066     file_names.extend(master_files)
2067     if cluster.modify_etc_hosts:
2068       file_names.append(constants.ETC_HOSTS)
2069
2070     local_checksums = utils.FingerprintFiles(file_names)
2071
2072     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2073     node_verify_param = {
2074       constants.NV_FILELIST: file_names,
2075       constants.NV_NODELIST: [node.name for node in nodeinfo
2076                               if not node.offline],
2077       constants.NV_HYPERVISOR: hypervisors,
2078       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2079                                   node.secondary_ip) for node in nodeinfo
2080                                  if not node.offline],
2081       constants.NV_INSTANCELIST: hypervisors,
2082       constants.NV_VERSION: None,
2083       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2084       constants.NV_NODESETUP: None,
2085       constants.NV_TIME: None,
2086       constants.NV_MASTERIP: (master_node, master_ip),
2087       constants.NV_OSLIST: None,
2088       }
2089
2090     if vg_name is not None:
2091       node_verify_param[constants.NV_VGLIST] = None
2092       node_verify_param[constants.NV_LVLIST] = vg_name
2093       node_verify_param[constants.NV_PVLIST] = [vg_name]
2094       node_verify_param[constants.NV_DRBDLIST] = None
2095
2096     if drbd_helper:
2097       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2098
2099     # Build our expected cluster state
2100     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2101                                                  name=node.name))
2102                       for node in nodeinfo)
2103
2104     for instance in instancelist:
2105       inst_config = instanceinfo[instance]
2106
2107       for nname in inst_config.all_nodes:
2108         if nname not in node_image:
2109           # ghost node
2110           gnode = self.NodeImage(name=nname)
2111           gnode.ghost = True
2112           node_image[nname] = gnode
2113
2114       inst_config.MapLVsByNode(node_vol_should)
2115
2116       pnode = inst_config.primary_node
2117       node_image[pnode].pinst.append(instance)
2118
2119       for snode in inst_config.secondary_nodes:
2120         nimg = node_image[snode]
2121         nimg.sinst.append(instance)
2122         if pnode not in nimg.sbp:
2123           nimg.sbp[pnode] = []
2124         nimg.sbp[pnode].append(instance)
2125
2126     # At this point, we have the in-memory data structures complete,
2127     # except for the runtime information, which we'll gather next
2128
2129     # Due to the way our RPC system works, exact response times cannot be
2130     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2131     # time before and after executing the request, we can at least have a time
2132     # window.
2133     nvinfo_starttime = time.time()
2134     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2135                                            self.cfg.GetClusterName())
2136     nvinfo_endtime = time.time()
2137
2138     all_drbd_map = self.cfg.ComputeDRBDMap()
2139
2140     feedback_fn("* Verifying node status")
2141
2142     refos_img = None
2143
2144     for node_i in nodeinfo:
2145       node = node_i.name
2146       nimg = node_image[node]
2147
2148       if node_i.offline:
2149         if verbose:
2150           feedback_fn("* Skipping offline node %s" % (node,))
2151         n_offline += 1
2152         continue
2153
2154       if node == master_node:
2155         ntype = "master"
2156       elif node_i.master_candidate:
2157         ntype = "master candidate"
2158       elif node_i.drained:
2159         ntype = "drained"
2160         n_drained += 1
2161       else:
2162         ntype = "regular"
2163       if verbose:
2164         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2165
2166       msg = all_nvinfo[node].fail_msg
2167       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2168       if msg:
2169         nimg.rpc_fail = True
2170         continue
2171
2172       nresult = all_nvinfo[node].payload
2173
2174       nimg.call_ok = self._VerifyNode(node_i, nresult)
2175       self._VerifyNodeNetwork(node_i, nresult)
2176       self._VerifyNodeLVM(node_i, nresult, vg_name)
2177       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2178                             master_files)
2179       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2180                            all_drbd_map)
2181       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2182
2183       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2184       self._UpdateNodeInstances(node_i, nresult, nimg)
2185       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2186       self._UpdateNodeOS(node_i, nresult, nimg)
2187       if not nimg.os_fail:
2188         if refos_img is None:
2189           refos_img = nimg
2190         self._VerifyNodeOS(node_i, nimg, refos_img)
2191
2192     feedback_fn("* Verifying instance status")
2193     for instance in instancelist:
2194       if verbose:
2195         feedback_fn("* Verifying instance %s" % instance)
2196       inst_config = instanceinfo[instance]
2197       self._VerifyInstance(instance, inst_config, node_image)
2198       inst_nodes_offline = []
2199
2200       pnode = inst_config.primary_node
2201       pnode_img = node_image[pnode]
2202       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2203                self.ENODERPC, pnode, "instance %s, connection to"
2204                " primary node failed", instance)
2205
2206       if pnode_img.offline:
2207         inst_nodes_offline.append(pnode)
2208
2209       # If the instance is non-redundant we cannot survive losing its primary
2210       # node, so we are not N+1 compliant. On the other hand we have no disk
2211       # templates with more than one secondary so that situation is not well
2212       # supported either.
2213       # FIXME: does not support file-backed instances
2214       if not inst_config.secondary_nodes:
2215         i_non_redundant.append(instance)
2216       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2217                instance, "instance has multiple secondary nodes: %s",
2218                utils.CommaJoin(inst_config.secondary_nodes),
2219                code=self.ETYPE_WARNING)
2220
2221       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2222         i_non_a_balanced.append(instance)
2223
2224       for snode in inst_config.secondary_nodes:
2225         s_img = node_image[snode]
2226         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2227                  "instance %s, connection to secondary node failed", instance)
2228
2229         if s_img.offline:
2230           inst_nodes_offline.append(snode)
2231
2232       # warn that the instance lives on offline nodes
2233       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2234                "instance lives on offline node(s) %s",
2235                utils.CommaJoin(inst_nodes_offline))
2236       # ... or ghost nodes
2237       for node in inst_config.all_nodes:
2238         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2239                  "instance lives on ghost node %s", node)
2240
2241     feedback_fn("* Verifying orphan volumes")
2242     reserved = utils.FieldSet(*cluster.reserved_lvs)
2243     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2244
2245     feedback_fn("* Verifying orphan instances")
2246     self._VerifyOrphanInstances(instancelist, node_image)
2247
2248     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2249       feedback_fn("* Verifying N+1 Memory redundancy")
2250       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2251
2252     feedback_fn("* Other Notes")
2253     if i_non_redundant:
2254       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2255                   % len(i_non_redundant))
2256
2257     if i_non_a_balanced:
2258       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2259                   % len(i_non_a_balanced))
2260
2261     if n_offline:
2262       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2263
2264     if n_drained:
2265       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2266
2267     return not self.bad
2268
2269   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2270     """Analyze the post-hooks' result
2271
2272     This method analyses the hook result, handles it, and sends some
2273     nicely-formatted feedback back to the user.
2274
2275     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2276         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2277     @param hooks_results: the results of the multi-node hooks rpc call
2278     @param feedback_fn: function used send feedback back to the caller
2279     @param lu_result: previous Exec result
2280     @return: the new Exec result, based on the previous result
2281         and hook results
2282
2283     """
2284     # We only really run POST phase hooks, and are only interested in
2285     # their results
2286     if phase == constants.HOOKS_PHASE_POST:
2287       # Used to change hooks' output to proper indentation
2288       indent_re = re.compile('^', re.M)
2289       feedback_fn("* Hooks Results")
2290       assert hooks_results, "invalid result from hooks"
2291
2292       for node_name in hooks_results:
2293         res = hooks_results[node_name]
2294         msg = res.fail_msg
2295         test = msg and not res.offline
2296         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2297                       "Communication failure in hooks execution: %s", msg)
2298         if res.offline or msg:
2299           # No need to investigate payload if node is offline or gave an error.
2300           # override manually lu_result here as _ErrorIf only
2301           # overrides self.bad
2302           lu_result = 1
2303           continue
2304         for script, hkr, output in res.payload:
2305           test = hkr == constants.HKR_FAIL
2306           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2307                         "Script %s failed, output:", script)
2308           if test:
2309             output = indent_re.sub('      ', output)
2310             feedback_fn("%s" % output)
2311             lu_result = 0
2312
2313       return lu_result
2314
2315
2316 class LUVerifyDisks(NoHooksLU):
2317   """Verifies the cluster disks status.
2318
2319   """
2320   REQ_BGL = False
2321
2322   def ExpandNames(self):
2323     self.needed_locks = {
2324       locking.LEVEL_NODE: locking.ALL_SET,
2325       locking.LEVEL_INSTANCE: locking.ALL_SET,
2326     }
2327     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2328
2329   def Exec(self, feedback_fn):
2330     """Verify integrity of cluster disks.
2331
2332     @rtype: tuple of three items
2333     @return: a tuple of (dict of node-to-node_error, list of instances
2334         which need activate-disks, dict of instance: (node, volume) for
2335         missing volumes
2336
2337     """
2338     result = res_nodes, res_instances, res_missing = {}, [], {}
2339
2340     vg_name = self.cfg.GetVGName()
2341     nodes = utils.NiceSort(self.cfg.GetNodeList())
2342     instances = [self.cfg.GetInstanceInfo(name)
2343                  for name in self.cfg.GetInstanceList()]
2344
2345     nv_dict = {}
2346     for inst in instances:
2347       inst_lvs = {}
2348       if (not inst.admin_up or
2349           inst.disk_template not in constants.DTS_NET_MIRROR):
2350         continue
2351       inst.MapLVsByNode(inst_lvs)
2352       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2353       for node, vol_list in inst_lvs.iteritems():
2354         for vol in vol_list:
2355           nv_dict[(node, vol)] = inst
2356
2357     if not nv_dict:
2358       return result
2359
2360     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2361
2362     for node in nodes:
2363       # node_volume
2364       node_res = node_lvs[node]
2365       if node_res.offline:
2366         continue
2367       msg = node_res.fail_msg
2368       if msg:
2369         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2370         res_nodes[node] = msg
2371         continue
2372
2373       lvs = node_res.payload
2374       for lv_name, (_, _, lv_online) in lvs.items():
2375         inst = nv_dict.pop((node, lv_name), None)
2376         if (not lv_online and inst is not None
2377             and inst.name not in res_instances):
2378           res_instances.append(inst.name)
2379
2380     # any leftover items in nv_dict are missing LVs, let's arrange the
2381     # data better
2382     for key, inst in nv_dict.iteritems():
2383       if inst.name not in res_missing:
2384         res_missing[inst.name] = []
2385       res_missing[inst.name].append(key)
2386
2387     return result
2388
2389
2390 class LURepairDiskSizes(NoHooksLU):
2391   """Verifies the cluster disks sizes.
2392
2393   """
2394   _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2395   REQ_BGL = False
2396
2397   def ExpandNames(self):
2398     if self.op.instances:
2399       self.wanted_names = []
2400       for name in self.op.instances:
2401         full_name = _ExpandInstanceName(self.cfg, name)
2402         self.wanted_names.append(full_name)
2403       self.needed_locks = {
2404         locking.LEVEL_NODE: [],
2405         locking.LEVEL_INSTANCE: self.wanted_names,
2406         }
2407       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2408     else:
2409       self.wanted_names = None
2410       self.needed_locks = {
2411         locking.LEVEL_NODE: locking.ALL_SET,
2412         locking.LEVEL_INSTANCE: locking.ALL_SET,
2413         }
2414     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2415
2416   def DeclareLocks(self, level):
2417     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2418       self._LockInstancesNodes(primary_only=True)
2419
2420   def CheckPrereq(self):
2421     """Check prerequisites.
2422
2423     This only checks the optional instance list against the existing names.
2424
2425     """
2426     if self.wanted_names is None:
2427       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2428
2429     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2430                              in self.wanted_names]
2431
2432   def _EnsureChildSizes(self, disk):
2433     """Ensure children of the disk have the needed disk size.
2434
2435     This is valid mainly for DRBD8 and fixes an issue where the
2436     children have smaller disk size.
2437
2438     @param disk: an L{ganeti.objects.Disk} object
2439
2440     """
2441     if disk.dev_type == constants.LD_DRBD8:
2442       assert disk.children, "Empty children for DRBD8?"
2443       fchild = disk.children[0]
2444       mismatch = fchild.size < disk.size
2445       if mismatch:
2446         self.LogInfo("Child disk has size %d, parent %d, fixing",
2447                      fchild.size, disk.size)
2448         fchild.size = disk.size
2449
2450       # and we recurse on this child only, not on the metadev
2451       return self._EnsureChildSizes(fchild) or mismatch
2452     else:
2453       return False
2454
2455   def Exec(self, feedback_fn):
2456     """Verify the size of cluster disks.
2457
2458     """
2459     # TODO: check child disks too
2460     # TODO: check differences in size between primary/secondary nodes
2461     per_node_disks = {}
2462     for instance in self.wanted_instances:
2463       pnode = instance.primary_node
2464       if pnode not in per_node_disks:
2465         per_node_disks[pnode] = []
2466       for idx, disk in enumerate(instance.disks):
2467         per_node_disks[pnode].append((instance, idx, disk))
2468
2469     changed = []
2470     for node, dskl in per_node_disks.items():
2471       newl = [v[2].Copy() for v in dskl]
2472       for dsk in newl:
2473         self.cfg.SetDiskID(dsk, node)
2474       result = self.rpc.call_blockdev_getsizes(node, newl)
2475       if result.fail_msg:
2476         self.LogWarning("Failure in blockdev_getsizes call to node"
2477                         " %s, ignoring", node)
2478         continue
2479       if len(result.data) != len(dskl):
2480         self.LogWarning("Invalid result from node %s, ignoring node results",
2481                         node)
2482         continue
2483       for ((instance, idx, disk), size) in zip(dskl, result.data):
2484         if size is None:
2485           self.LogWarning("Disk %d of instance %s did not return size"
2486                           " information, ignoring", idx, instance.name)
2487           continue
2488         if not isinstance(size, (int, long)):
2489           self.LogWarning("Disk %d of instance %s did not return valid"
2490                           " size information, ignoring", idx, instance.name)
2491           continue
2492         size = size >> 20
2493         if size != disk.size:
2494           self.LogInfo("Disk %d of instance %s has mismatched size,"
2495                        " correcting: recorded %d, actual %d", idx,
2496                        instance.name, disk.size, size)
2497           disk.size = size
2498           self.cfg.Update(instance, feedback_fn)
2499           changed.append((instance.name, idx, size))
2500         if self._EnsureChildSizes(disk):
2501           self.cfg.Update(instance, feedback_fn)
2502           changed.append((instance.name, idx, disk.size))
2503     return changed
2504
2505
2506 class LURenameCluster(LogicalUnit):
2507   """Rename the cluster.
2508
2509   """
2510   HPATH = "cluster-rename"
2511   HTYPE = constants.HTYPE_CLUSTER
2512   _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2513
2514   def BuildHooksEnv(self):
2515     """Build hooks env.
2516
2517     """
2518     env = {
2519       "OP_TARGET": self.cfg.GetClusterName(),
2520       "NEW_NAME": self.op.name,
2521       }
2522     mn = self.cfg.GetMasterNode()
2523     all_nodes = self.cfg.GetNodeList()
2524     return env, [mn], all_nodes
2525
2526   def CheckPrereq(self):
2527     """Verify that the passed name is a valid one.
2528
2529     """
2530     hostname = netutils.GetHostInfo(self.op.name)
2531
2532     new_name = hostname.name
2533     self.ip = new_ip = hostname.ip
2534     old_name = self.cfg.GetClusterName()
2535     old_ip = self.cfg.GetMasterIP()
2536     if new_name == old_name and new_ip == old_ip:
2537       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2538                                  " cluster has changed",
2539                                  errors.ECODE_INVAL)
2540     if new_ip != old_ip:
2541       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2542         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2543                                    " reachable on the network. Aborting." %
2544                                    new_ip, errors.ECODE_NOTUNIQUE)
2545
2546     self.op.name = new_name
2547
2548   def Exec(self, feedback_fn):
2549     """Rename the cluster.
2550
2551     """
2552     clustername = self.op.name
2553     ip = self.ip
2554
2555     # shutdown the master IP
2556     master = self.cfg.GetMasterNode()
2557     result = self.rpc.call_node_stop_master(master, False)
2558     result.Raise("Could not disable the master role")
2559
2560     try:
2561       cluster = self.cfg.GetClusterInfo()
2562       cluster.cluster_name = clustername
2563       cluster.master_ip = ip
2564       self.cfg.Update(cluster, feedback_fn)
2565
2566       # update the known hosts file
2567       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2568       node_list = self.cfg.GetNodeList()
2569       try:
2570         node_list.remove(master)
2571       except ValueError:
2572         pass
2573       result = self.rpc.call_upload_file(node_list,
2574                                          constants.SSH_KNOWN_HOSTS_FILE)
2575       for to_node, to_result in result.iteritems():
2576         msg = to_result.fail_msg
2577         if msg:
2578           msg = ("Copy of file %s to node %s failed: %s" %
2579                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2580           self.proc.LogWarning(msg)
2581
2582     finally:
2583       result = self.rpc.call_node_start_master(master, False, False)
2584       msg = result.fail_msg
2585       if msg:
2586         self.LogWarning("Could not re-enable the master role on"
2587                         " the master, please restart manually: %s", msg)
2588
2589     return clustername
2590
2591
2592 class LUSetClusterParams(LogicalUnit):
2593   """Change the parameters of the cluster.
2594
2595   """
2596   HPATH = "cluster-modify"
2597   HTYPE = constants.HTYPE_CLUSTER
2598   _OP_PARAMS = [
2599     ("vg_name", None, _TMaybeString),
2600     ("enabled_hypervisors", None,
2601      _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2602     ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2603     ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2604     ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2605     ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2606     ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2607     ("uid_pool", None, _NoType),
2608     ("add_uids", None, _NoType),
2609     ("remove_uids", None, _NoType),
2610     ("maintain_node_health", None, _TMaybeBool),
2611     ("nicparams", None, _TOr(_TDict, _TNone)),
2612     ("drbd_helper", None, _TOr(_TString, _TNone)),
2613     ("default_iallocator", None, _TMaybeString),
2614     ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)),
2615     ]
2616   REQ_BGL = False
2617
2618   def CheckArguments(self):
2619     """Check parameters
2620
2621     """
2622     if self.op.uid_pool:
2623       uidpool.CheckUidPool(self.op.uid_pool)
2624
2625     if self.op.add_uids:
2626       uidpool.CheckUidPool(self.op.add_uids)
2627
2628     if self.op.remove_uids:
2629       uidpool.CheckUidPool(self.op.remove_uids)
2630
2631   def ExpandNames(self):
2632     # FIXME: in the future maybe other cluster params won't require checking on
2633     # all nodes to be modified.
2634     self.needed_locks = {
2635       locking.LEVEL_NODE: locking.ALL_SET,
2636     }
2637     self.share_locks[locking.LEVEL_NODE] = 1
2638
2639   def BuildHooksEnv(self):
2640     """Build hooks env.
2641
2642     """
2643     env = {
2644       "OP_TARGET": self.cfg.GetClusterName(),
2645       "NEW_VG_NAME": self.op.vg_name,
2646       }
2647     mn = self.cfg.GetMasterNode()
2648     return env, [mn], [mn]
2649
2650   def CheckPrereq(self):
2651     """Check prerequisites.
2652
2653     This checks whether the given params don't conflict and
2654     if the given volume group is valid.
2655
2656     """
2657     if self.op.vg_name is not None and not self.op.vg_name:
2658       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2659         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2660                                    " instances exist", errors.ECODE_INVAL)
2661
2662     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2663       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2664         raise errors.OpPrereqError("Cannot disable drbd helper while"
2665                                    " drbd-based instances exist",
2666                                    errors.ECODE_INVAL)
2667
2668     node_list = self.acquired_locks[locking.LEVEL_NODE]
2669
2670     # if vg_name not None, checks given volume group on all nodes
2671     if self.op.vg_name:
2672       vglist = self.rpc.call_vg_list(node_list)
2673       for node in node_list:
2674         msg = vglist[node].fail_msg
2675         if msg:
2676           # ignoring down node
2677           self.LogWarning("Error while gathering data on node %s"
2678                           " (ignoring node): %s", node, msg)
2679           continue
2680         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2681                                               self.op.vg_name,
2682                                               constants.MIN_VG_SIZE)
2683         if vgstatus:
2684           raise errors.OpPrereqError("Error on node '%s': %s" %
2685                                      (node, vgstatus), errors.ECODE_ENVIRON)
2686
2687     if self.op.drbd_helper:
2688       # checks given drbd helper on all nodes
2689       helpers = self.rpc.call_drbd_helper(node_list)
2690       for node in node_list:
2691         ninfo = self.cfg.GetNodeInfo(node)
2692         if ninfo.offline:
2693           self.LogInfo("Not checking drbd helper on offline node %s", node)
2694           continue
2695         msg = helpers[node].fail_msg
2696         if msg:
2697           raise errors.OpPrereqError("Error checking drbd helper on node"
2698                                      " '%s': %s" % (node, msg),
2699                                      errors.ECODE_ENVIRON)
2700         node_helper = helpers[node].payload
2701         if node_helper != self.op.drbd_helper:
2702           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2703                                      (node, node_helper), errors.ECODE_ENVIRON)
2704
2705     self.cluster = cluster = self.cfg.GetClusterInfo()
2706     # validate params changes
2707     if self.op.beparams:
2708       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2709       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2710
2711     if self.op.nicparams:
2712       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2713       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2714       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2715       nic_errors = []
2716
2717       # check all instances for consistency
2718       for instance in self.cfg.GetAllInstancesInfo().values():
2719         for nic_idx, nic in enumerate(instance.nics):
2720           params_copy = copy.deepcopy(nic.nicparams)
2721           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2722
2723           # check parameter syntax
2724           try:
2725             objects.NIC.CheckParameterSyntax(params_filled)
2726           except errors.ConfigurationError, err:
2727             nic_errors.append("Instance %s, nic/%d: %s" %
2728                               (instance.name, nic_idx, err))
2729
2730           # if we're moving instances to routed, check that they have an ip
2731           target_mode = params_filled[constants.NIC_MODE]
2732           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2733             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2734                               (instance.name, nic_idx))
2735       if nic_errors:
2736         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2737                                    "\n".join(nic_errors))
2738
2739     # hypervisor list/parameters
2740     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2741     if self.op.hvparams:
2742       for hv_name, hv_dict in self.op.hvparams.items():
2743         if hv_name not in self.new_hvparams:
2744           self.new_hvparams[hv_name] = hv_dict
2745         else:
2746           self.new_hvparams[hv_name].update(hv_dict)
2747
2748     # os hypervisor parameters
2749     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2750     if self.op.os_hvp:
2751       for os_name, hvs in self.op.os_hvp.items():
2752         if os_name not in self.new_os_hvp:
2753           self.new_os_hvp[os_name] = hvs
2754         else:
2755           for hv_name, hv_dict in hvs.items():
2756             if hv_name not in self.new_os_hvp[os_name]:
2757               self.new_os_hvp[os_name][hv_name] = hv_dict
2758             else:
2759               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2760
2761     # os parameters
2762     self.new_osp = objects.FillDict(cluster.osparams, {})
2763     if self.op.osparams:
2764       for os_name, osp in self.op.osparams.items():
2765         if os_name not in self.new_osp:
2766           self.new_osp[os_name] = {}
2767
2768         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2769                                                   use_none=True)
2770
2771         if not self.new_osp[os_name]:
2772           # we removed all parameters
2773           del self.new_osp[os_name]
2774         else:
2775           # check the parameter validity (remote check)
2776           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2777                          os_name, self.new_osp[os_name])
2778
2779     # changes to the hypervisor list
2780     if self.op.enabled_hypervisors is not None:
2781       self.hv_list = self.op.enabled_hypervisors
2782       for hv in self.hv_list:
2783         # if the hypervisor doesn't already exist in the cluster
2784         # hvparams, we initialize it to empty, and then (in both
2785         # cases) we make sure to fill the defaults, as we might not
2786         # have a complete defaults list if the hypervisor wasn't
2787         # enabled before
2788         if hv not in new_hvp:
2789           new_hvp[hv] = {}
2790         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2791         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2792     else:
2793       self.hv_list = cluster.enabled_hypervisors
2794
2795     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2796       # either the enabled list has changed, or the parameters have, validate
2797       for hv_name, hv_params in self.new_hvparams.items():
2798         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2799             (self.op.enabled_hypervisors and
2800              hv_name in self.op.enabled_hypervisors)):
2801           # either this is a new hypervisor, or its parameters have changed
2802           hv_class = hypervisor.GetHypervisor(hv_name)
2803           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2804           hv_class.CheckParameterSyntax(hv_params)
2805           _CheckHVParams(self, node_list, hv_name, hv_params)
2806
2807     if self.op.os_hvp:
2808       # no need to check any newly-enabled hypervisors, since the
2809       # defaults have already been checked in the above code-block
2810       for os_name, os_hvp in self.new_os_hvp.items():
2811         for hv_name, hv_params in os_hvp.items():
2812           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2813           # we need to fill in the new os_hvp on top of the actual hv_p
2814           cluster_defaults = self.new_hvparams.get(hv_name, {})
2815           new_osp = objects.FillDict(cluster_defaults, hv_params)
2816           hv_class = hypervisor.GetHypervisor(hv_name)
2817           hv_class.CheckParameterSyntax(new_osp)
2818           _CheckHVParams(self, node_list, hv_name, new_osp)
2819
2820     if self.op.default_iallocator:
2821       alloc_script = utils.FindFile(self.op.default_iallocator,
2822                                     constants.IALLOCATOR_SEARCH_PATH,
2823                                     os.path.isfile)
2824       if alloc_script is None:
2825         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2826                                    " specified" % self.op.default_iallocator,
2827                                    errors.ECODE_INVAL)
2828
2829   def Exec(self, feedback_fn):
2830     """Change the parameters of the cluster.
2831
2832     """
2833     if self.op.vg_name is not None:
2834       new_volume = self.op.vg_name
2835       if not new_volume:
2836         new_volume = None
2837       if new_volume != self.cfg.GetVGName():
2838         self.cfg.SetVGName(new_volume)
2839       else:
2840         feedback_fn("Cluster LVM configuration already in desired"
2841                     " state, not changing")
2842     if self.op.drbd_helper is not None:
2843       new_helper = self.op.drbd_helper
2844       if not new_helper:
2845         new_helper = None
2846       if new_helper != self.cfg.GetDRBDHelper():
2847         self.cfg.SetDRBDHelper(new_helper)
2848       else:
2849         feedback_fn("Cluster DRBD helper already in desired state,"
2850                     " not changing")
2851     if self.op.hvparams:
2852       self.cluster.hvparams = self.new_hvparams
2853     if self.op.os_hvp:
2854       self.cluster.os_hvp = self.new_os_hvp
2855     if self.op.enabled_hypervisors is not None:
2856       self.cluster.hvparams = self.new_hvparams
2857       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2858     if self.op.beparams:
2859       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2860     if self.op.nicparams:
2861       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2862     if self.op.osparams:
2863       self.cluster.osparams = self.new_osp
2864
2865     if self.op.candidate_pool_size is not None:
2866       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2867       # we need to update the pool size here, otherwise the save will fail
2868       _AdjustCandidatePool(self, [])
2869
2870     if self.op.maintain_node_health is not None:
2871       self.cluster.maintain_node_health = self.op.maintain_node_health
2872
2873     if self.op.add_uids is not None:
2874       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2875
2876     if self.op.remove_uids is not None:
2877       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2878
2879     if self.op.uid_pool is not None:
2880       self.cluster.uid_pool = self.op.uid_pool
2881
2882     if self.op.default_iallocator is not None:
2883       self.cluster.default_iallocator = self.op.default_iallocator
2884
2885     if self.op.reserved_lvs is not None:
2886       self.cluster.reserved_lvs = self.op.reserved_lvs
2887
2888     self.cfg.Update(self.cluster, feedback_fn)
2889
2890
2891 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2892   """Distribute additional files which are part of the cluster configuration.
2893
2894   ConfigWriter takes care of distributing the config and ssconf files, but
2895   there are more files which should be distributed to all nodes. This function
2896   makes sure those are copied.
2897
2898   @param lu: calling logical unit
2899   @param additional_nodes: list of nodes not in the config to distribute to
2900
2901   """
2902   # 1. Gather target nodes
2903   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2904   dist_nodes = lu.cfg.GetOnlineNodeList()
2905   if additional_nodes is not None:
2906     dist_nodes.extend(additional_nodes)
2907   if myself.name in dist_nodes:
2908     dist_nodes.remove(myself.name)
2909
2910   # 2. Gather files to distribute
2911   dist_files = set([constants.ETC_HOSTS,
2912                     constants.SSH_KNOWN_HOSTS_FILE,
2913                     constants.RAPI_CERT_FILE,
2914                     constants.RAPI_USERS_FILE,
2915                     constants.CONFD_HMAC_KEY,
2916                     constants.CLUSTER_DOMAIN_SECRET_FILE,
2917                    ])
2918
2919   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2920   for hv_name in enabled_hypervisors:
2921     hv_class = hypervisor.GetHypervisor(hv_name)
2922     dist_files.update(hv_class.GetAncillaryFiles())
2923
2924   # 3. Perform the files upload
2925   for fname in dist_files:
2926     if os.path.exists(fname):
2927       result = lu.rpc.call_upload_file(dist_nodes, fname)
2928       for to_node, to_result in result.items():
2929         msg = to_result.fail_msg
2930         if msg:
2931           msg = ("Copy of file %s to node %s failed: %s" %
2932                  (fname, to_node, msg))
2933           lu.proc.LogWarning(msg)
2934
2935
2936 class LURedistributeConfig(NoHooksLU):
2937   """Force the redistribution of cluster configuration.
2938
2939   This is a very simple LU.
2940
2941   """
2942   REQ_BGL = False
2943
2944   def ExpandNames(self):
2945     self.needed_locks = {
2946       locking.LEVEL_NODE: locking.ALL_SET,
2947     }
2948     self.share_locks[locking.LEVEL_NODE] = 1
2949
2950   def Exec(self, feedback_fn):
2951     """Redistribute the configuration.
2952
2953     """
2954     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2955     _RedistributeAncillaryFiles(self)
2956
2957
2958 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2959   """Sleep and poll for an instance's disk to sync.
2960
2961   """
2962   if not instance.disks or disks is not None and not disks:
2963     return True
2964
2965   disks = _ExpandCheckDisks(instance, disks)
2966
2967   if not oneshot:
2968     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2969
2970   node = instance.primary_node
2971
2972   for dev in disks:
2973     lu.cfg.SetDiskID(dev, node)
2974
2975   # TODO: Convert to utils.Retry
2976
2977   retries = 0
2978   degr_retries = 10 # in seconds, as we sleep 1 second each time
2979   while True:
2980     max_time = 0
2981     done = True
2982     cumul_degraded = False
2983     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2984     msg = rstats.fail_msg
2985     if msg:
2986       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2987       retries += 1
2988       if retries >= 10:
2989         raise errors.RemoteError("Can't contact node %s for mirror data,"
2990                                  " aborting." % node)
2991       time.sleep(6)
2992       continue
2993     rstats = rstats.payload
2994     retries = 0
2995     for i, mstat in enumerate(rstats):
2996       if mstat is None:
2997         lu.LogWarning("Can't compute data for node %s/%s",
2998                            node, disks[i].iv_name)
2999         continue
3000
3001       cumul_degraded = (cumul_degraded or
3002                         (mstat.is_degraded and mstat.sync_percent is None))
3003       if mstat.sync_percent is not None:
3004         done = False
3005         if mstat.estimated_time is not None:
3006           rem_time = ("%s remaining (estimated)" %
3007                       utils.FormatSeconds(mstat.estimated_time))
3008           max_time = mstat.estimated_time
3009         else:
3010           rem_time = "no time estimate"
3011         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3012                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3013
3014     # if we're done but degraded, let's do a few small retries, to
3015     # make sure we see a stable and not transient situation; therefore
3016     # we force restart of the loop
3017     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3018       logging.info("Degraded disks found, %d retries left", degr_retries)
3019       degr_retries -= 1
3020       time.sleep(1)
3021       continue
3022
3023     if done or oneshot:
3024       break
3025
3026     time.sleep(min(60, max_time))
3027
3028   if done:
3029     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3030   return not cumul_degraded
3031
3032
3033 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3034   """Check that mirrors are not degraded.
3035
3036   The ldisk parameter, if True, will change the test from the
3037   is_degraded attribute (which represents overall non-ok status for
3038   the device(s)) to the ldisk (representing the local storage status).
3039
3040   """
3041   lu.cfg.SetDiskID(dev, node)
3042
3043   result = True
3044
3045   if on_primary or dev.AssembleOnSecondary():
3046     rstats = lu.rpc.call_blockdev_find(node, dev)
3047     msg = rstats.fail_msg
3048     if msg:
3049       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3050       result = False
3051     elif not rstats.payload:
3052       lu.LogWarning("Can't find disk on node %s", node)
3053       result = False
3054     else:
3055       if ldisk:
3056         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3057       else:
3058         result = result and not rstats.payload.is_degraded
3059
3060   if dev.children:
3061     for child in dev.children:
3062       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3063
3064   return result
3065
3066
3067 class LUDiagnoseOS(NoHooksLU):
3068   """Logical unit for OS diagnose/query.
3069
3070   """
3071   _OP_PARAMS = [
3072     _POutputFields,
3073     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3074     ]
3075   REQ_BGL = False
3076   _FIELDS_STATIC = utils.FieldSet()
3077   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
3078                                    "parameters", "api_versions")
3079
3080   def CheckArguments(self):
3081     if self.op.names:
3082       raise errors.OpPrereqError("Selective OS query not supported",
3083                                  errors.ECODE_INVAL)
3084
3085     _CheckOutputFields(static=self._FIELDS_STATIC,
3086                        dynamic=self._FIELDS_DYNAMIC,
3087                        selected=self.op.output_fields)
3088
3089   def ExpandNames(self):
3090     # Lock all nodes, in shared mode
3091     # Temporary removal of locks, should be reverted later
3092     # TODO: reintroduce locks when they are lighter-weight
3093     self.needed_locks = {}
3094     #self.share_locks[locking.LEVEL_NODE] = 1
3095     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3096
3097   @staticmethod
3098   def _DiagnoseByOS(rlist):
3099     """Remaps a per-node return list into an a per-os per-node dictionary
3100
3101     @param rlist: a map with node names as keys and OS objects as values
3102
3103     @rtype: dict
3104     @return: a dictionary with osnames as keys and as value another
3105         map, with nodes as keys and tuples of (path, status, diagnose,
3106         variants, parameters, api_versions) as values, eg::
3107
3108           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3109                                      (/srv/..., False, "invalid api")],
3110                            "node2": [(/srv/..., True, "", [], [])]}
3111           }
3112
3113     """
3114     all_os = {}
3115     # we build here the list of nodes that didn't fail the RPC (at RPC
3116     # level), so that nodes with a non-responding node daemon don't
3117     # make all OSes invalid
3118     good_nodes = [node_name for node_name in rlist
3119                   if not rlist[node_name].fail_msg]
3120     for node_name, nr in rlist.items():
3121       if nr.fail_msg or not nr.payload:
3122         continue
3123       for (name, path, status, diagnose, variants,
3124            params, api_versions) in nr.payload:
3125         if name not in all_os:
3126           # build a list of nodes for this os containing empty lists
3127           # for each node in node_list
3128           all_os[name] = {}
3129           for nname in good_nodes:
3130             all_os[name][nname] = []
3131         # convert params from [name, help] to (name, help)
3132         params = [tuple(v) for v in params]
3133         all_os[name][node_name].append((path, status, diagnose,
3134                                         variants, params, api_versions))
3135     return all_os
3136
3137   def Exec(self, feedback_fn):
3138     """Compute the list of OSes.
3139
3140     """
3141     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3142     node_data = self.rpc.call_os_diagnose(valid_nodes)
3143     pol = self._DiagnoseByOS(node_data)
3144     output = []
3145
3146     for os_name, os_data in pol.items():
3147       row = []
3148       valid = True
3149       (variants, params, api_versions) = null_state = (set(), set(), set())
3150       for idx, osl in enumerate(os_data.values()):
3151         valid = bool(valid and osl and osl[0][1])
3152         if not valid:
3153           (variants, params, api_versions) = null_state
3154           break
3155         node_variants, node_params, node_api = osl[0][3:6]
3156         if idx == 0: # first entry
3157           variants = set(node_variants)
3158           params = set(node_params)
3159           api_versions = set(node_api)
3160         else: # keep consistency
3161           variants.intersection_update(node_variants)
3162           params.intersection_update(node_params)
3163           api_versions.intersection_update(node_api)
3164
3165       for field in self.op.output_fields:
3166         if field == "name":
3167           val = os_name
3168         elif field == "valid":
3169           val = valid
3170         elif field == "node_status":
3171           # this is just a copy of the dict
3172           val = {}
3173           for node_name, nos_list in os_data.items():
3174             val[node_name] = nos_list
3175         elif field == "variants":
3176           val = list(variants)
3177         elif field == "parameters":
3178           val = list(params)
3179         elif field == "api_versions":
3180           val = list(api_versions)
3181         else:
3182           raise errors.ParameterError(field)
3183         row.append(val)
3184       output.append(row)
3185
3186     return output
3187
3188
3189 class LURemoveNode(LogicalUnit):
3190   """Logical unit for removing a node.
3191
3192   """
3193   HPATH = "node-remove"
3194   HTYPE = constants.HTYPE_NODE
3195   _OP_PARAMS = [
3196     _PNodeName,
3197     ]
3198
3199   def BuildHooksEnv(self):
3200     """Build hooks env.
3201
3202     This doesn't run on the target node in the pre phase as a failed
3203     node would then be impossible to remove.
3204
3205     """
3206     env = {
3207       "OP_TARGET": self.op.node_name,
3208       "NODE_NAME": self.op.node_name,
3209       }
3210     all_nodes = self.cfg.GetNodeList()
3211     try:
3212       all_nodes.remove(self.op.node_name)
3213     except ValueError:
3214       logging.warning("Node %s which is about to be removed not found"
3215                       " in the all nodes list", self.op.node_name)
3216     return env, all_nodes, all_nodes
3217
3218   def CheckPrereq(self):
3219     """Check prerequisites.
3220
3221     This checks:
3222      - the node exists in the configuration
3223      - it does not have primary or secondary instances
3224      - it's not the master
3225
3226     Any errors are signaled by raising errors.OpPrereqError.
3227
3228     """
3229     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3230     node = self.cfg.GetNodeInfo(self.op.node_name)
3231     assert node is not None
3232
3233     instance_list = self.cfg.GetInstanceList()
3234
3235     masternode = self.cfg.GetMasterNode()
3236     if node.name == masternode:
3237       raise errors.OpPrereqError("Node is the master node,"
3238                                  " you need to failover first.",
3239                                  errors.ECODE_INVAL)
3240
3241     for instance_name in instance_list:
3242       instance = self.cfg.GetInstanceInfo(instance_name)
3243       if node.name in instance.all_nodes:
3244         raise errors.OpPrereqError("Instance %s is still running on the node,"
3245                                    " please remove first." % instance_name,
3246                                    errors.ECODE_INVAL)
3247     self.op.node_name = node.name
3248     self.node = node
3249
3250   def Exec(self, feedback_fn):
3251     """Removes the node from the cluster.
3252
3253     """
3254     node = self.node
3255     logging.info("Stopping the node daemon and removing configs from node %s",
3256                  node.name)
3257
3258     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3259
3260     # Promote nodes to master candidate as needed
3261     _AdjustCandidatePool(self, exceptions=[node.name])
3262     self.context.RemoveNode(node.name)
3263
3264     # Run post hooks on the node before it's removed
3265     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3266     try:
3267       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3268     except:
3269       # pylint: disable-msg=W0702
3270       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3271
3272     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3273     msg = result.fail_msg
3274     if msg:
3275       self.LogWarning("Errors encountered on the remote node while leaving"
3276                       " the cluster: %s", msg)
3277
3278     # Remove node from our /etc/hosts
3279     if self.cfg.GetClusterInfo().modify_etc_hosts:
3280       # FIXME: this should be done via an rpc call to node daemon
3281       utils.RemoveHostFromEtcHosts(node.name)
3282       _RedistributeAncillaryFiles(self)
3283
3284
3285 class LUQueryNodes(NoHooksLU):
3286   """Logical unit for querying nodes.
3287
3288   """
3289   # pylint: disable-msg=W0142
3290   _OP_PARAMS = [
3291     _POutputFields,
3292     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3293     ("use_locking", False, _TBool),
3294     ]
3295   REQ_BGL = False
3296
3297   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3298                     "master_candidate", "offline", "drained"]
3299
3300   _FIELDS_DYNAMIC = utils.FieldSet(
3301     "dtotal", "dfree",
3302     "mtotal", "mnode", "mfree",
3303     "bootid",
3304     "ctotal", "cnodes", "csockets",
3305     )
3306
3307   _FIELDS_STATIC = utils.FieldSet(*[
3308     "pinst_cnt", "sinst_cnt",
3309     "pinst_list", "sinst_list",
3310     "pip", "sip", "tags",
3311     "master",
3312     "role"] + _SIMPLE_FIELDS
3313     )
3314
3315   def CheckArguments(self):
3316     _CheckOutputFields(static=self._FIELDS_STATIC,
3317                        dynamic=self._FIELDS_DYNAMIC,
3318                        selected=self.op.output_fields)
3319
3320   def ExpandNames(self):
3321     self.needed_locks = {}
3322     self.share_locks[locking.LEVEL_NODE] = 1
3323
3324     if self.op.names:
3325       self.wanted = _GetWantedNodes(self, self.op.names)
3326     else:
3327       self.wanted = locking.ALL_SET
3328
3329     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3330     self.do_locking = self.do_node_query and self.op.use_locking
3331     if self.do_locking:
3332       # if we don't request only static fields, we need to lock the nodes
3333       self.needed_locks[locking.LEVEL_NODE] = self.wanted
3334
3335   def Exec(self, feedback_fn):
3336     """Computes the list of nodes and their attributes.
3337
3338     """
3339     all_info = self.cfg.GetAllNodesInfo()
3340     if self.do_locking:
3341       nodenames = self.acquired_locks[locking.LEVEL_NODE]
3342     elif self.wanted != locking.ALL_SET:
3343       nodenames = self.wanted
3344       missing = set(nodenames).difference(all_info.keys())
3345       if missing:
3346         raise errors.OpExecError(
3347           "Some nodes were removed before retrieving their data: %s" % missing)
3348     else:
3349       nodenames = all_info.keys()
3350
3351     nodenames = utils.NiceSort(nodenames)
3352     nodelist = [all_info[name] for name in nodenames]
3353
3354     # begin data gathering
3355
3356     if self.do_node_query:
3357       live_data = {}
3358       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3359                                           self.cfg.GetHypervisorType())
3360       for name in nodenames:
3361         nodeinfo = node_data[name]
3362         if not nodeinfo.fail_msg and nodeinfo.payload:
3363           nodeinfo = nodeinfo.payload
3364           fn = utils.TryConvert
3365           live_data[name] = {
3366             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3367             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3368             "mfree": fn(int, nodeinfo.get('memory_free', None)),
3369             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3370             "dfree": fn(int, nodeinfo.get('vg_free', None)),
3371             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3372             "bootid": nodeinfo.get('bootid', None),
3373             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3374             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3375             }
3376         else:
3377           live_data[name] = {}
3378     else:
3379       live_data = dict.fromkeys(nodenames, {})
3380
3381     node_to_primary = dict([(name, set()) for name in nodenames])
3382     node_to_secondary = dict([(name, set()) for name in nodenames])
3383
3384     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3385                              "sinst_cnt", "sinst_list"))
3386     if inst_fields & frozenset(self.op.output_fields):
3387       inst_data = self.cfg.GetAllInstancesInfo()
3388
3389       for inst in inst_data.values():
3390         if inst.primary_node in node_to_primary:
3391           node_to_primary[inst.primary_node].add(inst.name)
3392         for secnode in inst.secondary_nodes:
3393           if secnode in node_to_secondary:
3394             node_to_secondary[secnode].add(inst.name)
3395
3396     master_node = self.cfg.GetMasterNode()
3397
3398     # end data gathering
3399
3400     output = []
3401     for node in nodelist:
3402       node_output = []
3403       for field in self.op.output_fields:
3404         if field in self._SIMPLE_FIELDS:
3405           val = getattr(node, field)
3406         elif field == "pinst_list":
3407           val = list(node_to_primary[node.name])
3408         elif field == "sinst_list":
3409           val = list(node_to_secondary[node.name])
3410         elif field == "pinst_cnt":
3411           val = len(node_to_primary[node.name])
3412         elif field == "sinst_cnt":
3413           val = len(node_to_secondary[node.name])
3414         elif field == "pip":
3415           val = node.primary_ip
3416         elif field == "sip":
3417           val = node.secondary_ip
3418         elif field == "tags":
3419           val = list(node.GetTags())
3420         elif field == "master":
3421           val = node.name == master_node
3422         elif self._FIELDS_DYNAMIC.Matches(field):
3423           val = live_data[node.name].get(field, None)
3424         elif field == "role":
3425           if node.name == master_node:
3426             val = "M"
3427           elif node.master_candidate:
3428             val = "C"
3429           elif node.drained:
3430             val = "D"
3431           elif node.offline:
3432             val = "O"
3433           else:
3434             val = "R"
3435         else:
3436           raise errors.ParameterError(field)
3437         node_output.append(val)
3438       output.append(node_output)
3439
3440     return output
3441
3442
3443 class LUQueryNodeVolumes(NoHooksLU):
3444   """Logical unit for getting volumes on node(s).
3445
3446   """
3447   _OP_PARAMS = [
3448     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3449     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3450     ]
3451   REQ_BGL = False
3452   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3453   _FIELDS_STATIC = utils.FieldSet("node")
3454
3455   def CheckArguments(self):
3456     _CheckOutputFields(static=self._FIELDS_STATIC,
3457                        dynamic=self._FIELDS_DYNAMIC,
3458                        selected=self.op.output_fields)
3459
3460   def ExpandNames(self):
3461     self.needed_locks = {}
3462     self.share_locks[locking.LEVEL_NODE] = 1
3463     if not self.op.nodes:
3464       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3465     else:
3466       self.needed_locks[locking.LEVEL_NODE] = \
3467         _GetWantedNodes(self, self.op.nodes)
3468
3469   def Exec(self, feedback_fn):
3470     """Computes the list of nodes and their attributes.
3471
3472     """
3473     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3474     volumes = self.rpc.call_node_volumes(nodenames)
3475
3476     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3477              in self.cfg.GetInstanceList()]
3478
3479     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3480
3481     output = []
3482     for node in nodenames:
3483       nresult = volumes[node]
3484       if nresult.offline:
3485         continue
3486       msg = nresult.fail_msg
3487       if msg:
3488         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3489         continue
3490
3491       node_vols = nresult.payload[:]
3492       node_vols.sort(key=lambda vol: vol['dev'])
3493
3494       for vol in node_vols:
3495         node_output = []
3496         for field in self.op.output_fields:
3497           if field == "node":
3498             val = node
3499           elif field == "phys":
3500             val = vol['dev']
3501           elif field == "vg":
3502             val = vol['vg']
3503           elif field == "name":
3504             val = vol['name']
3505           elif field == "size":
3506             val = int(float(vol['size']))
3507           elif field == "instance":
3508             for inst in ilist:
3509               if node not in lv_by_node[inst]:
3510                 continue
3511               if vol['name'] in lv_by_node[inst][node]:
3512                 val = inst.name
3513                 break
3514             else:
3515               val = '-'
3516           else:
3517             raise errors.ParameterError(field)
3518           node_output.append(str(val))
3519
3520         output.append(node_output)
3521
3522     return output
3523
3524
3525 class LUQueryNodeStorage(NoHooksLU):
3526   """Logical unit for getting information on storage units on node(s).
3527
3528   """
3529   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3530   _OP_PARAMS = [
3531     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3532     ("storage_type", _NoDefault, _CheckStorageType),
3533     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3534     ("name", None, _TMaybeString),
3535     ]
3536   REQ_BGL = False
3537
3538   def CheckArguments(self):
3539     _CheckOutputFields(static=self._FIELDS_STATIC,
3540                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3541                        selected=self.op.output_fields)
3542
3543   def ExpandNames(self):
3544     self.needed_locks = {}
3545     self.share_locks[locking.LEVEL_NODE] = 1
3546
3547     if self.op.nodes:
3548       self.needed_locks[locking.LEVEL_NODE] = \
3549         _GetWantedNodes(self, self.op.nodes)
3550     else:
3551       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3552
3553   def Exec(self, feedback_fn):
3554     """Computes the list of nodes and their attributes.
3555
3556     """
3557     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3558
3559     # Always get name to sort by
3560     if constants.SF_NAME in self.op.output_fields:
3561       fields = self.op.output_fields[:]
3562     else:
3563       fields = [constants.SF_NAME] + self.op.output_fields
3564
3565     # Never ask for node or type as it's only known to the LU
3566     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3567       while extra in fields:
3568         fields.remove(extra)
3569
3570     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3571     name_idx = field_idx[constants.SF_NAME]
3572
3573     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3574     data = self.rpc.call_storage_list(self.nodes,
3575                                       self.op.storage_type, st_args,
3576                                       self.op.name, fields)
3577
3578     result = []
3579
3580     for node in utils.NiceSort(self.nodes):
3581       nresult = data[node]
3582       if nresult.offline:
3583         continue
3584
3585       msg = nresult.fail_msg
3586       if msg:
3587         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3588         continue
3589
3590       rows = dict([(row[name_idx], row) for row in nresult.payload])
3591
3592       for name in utils.NiceSort(rows.keys()):
3593         row = rows[name]
3594
3595         out = []
3596
3597         for field in self.op.output_fields:
3598           if field == constants.SF_NODE:
3599             val = node
3600           elif field == constants.SF_TYPE:
3601             val = self.op.storage_type
3602           elif field in field_idx:
3603             val = row[field_idx[field]]
3604           else:
3605             raise errors.ParameterError(field)
3606
3607           out.append(val)
3608
3609         result.append(out)
3610
3611     return result
3612
3613
3614 class LUModifyNodeStorage(NoHooksLU):
3615   """Logical unit for modifying a storage volume on a node.
3616
3617   """
3618   _OP_PARAMS = [
3619     _PNodeName,
3620     ("storage_type", _NoDefault, _CheckStorageType),
3621     ("name", _NoDefault, _TNonEmptyString),
3622     ("changes", _NoDefault, _TDict),
3623     ]
3624   REQ_BGL = False
3625
3626   def CheckArguments(self):
3627     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3628
3629     storage_type = self.op.storage_type
3630
3631     try:
3632       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3633     except KeyError:
3634       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3635                                  " modified" % storage_type,
3636                                  errors.ECODE_INVAL)
3637
3638     diff = set(self.op.changes.keys()) - modifiable
3639     if diff:
3640       raise errors.OpPrereqError("The following fields can not be modified for"
3641                                  " storage units of type '%s': %r" %
3642                                  (storage_type, list(diff)),
3643                                  errors.ECODE_INVAL)
3644
3645   def ExpandNames(self):
3646     self.needed_locks = {
3647       locking.LEVEL_NODE: self.op.node_name,
3648       }
3649
3650   def Exec(self, feedback_fn):
3651     """Computes the list of nodes and their attributes.
3652
3653     """
3654     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3655     result = self.rpc.call_storage_modify(self.op.node_name,
3656                                           self.op.storage_type, st_args,
3657                                           self.op.name, self.op.changes)
3658     result.Raise("Failed to modify storage unit '%s' on %s" %
3659                  (self.op.name, self.op.node_name))
3660
3661
3662 class LUAddNode(LogicalUnit):
3663   """Logical unit for adding node to the cluster.
3664
3665   """
3666   HPATH = "node-add"
3667   HTYPE = constants.HTYPE_NODE
3668   _OP_PARAMS = [
3669     _PNodeName,
3670     ("primary_ip", None, _NoType),
3671     ("secondary_ip", None, _TMaybeString),
3672     ("readd", False, _TBool),
3673     ]
3674
3675   def CheckArguments(self):
3676     # validate/normalize the node name
3677     self.op.node_name = netutils.HostInfo.NormalizeName(self.op.node_name)
3678
3679   def BuildHooksEnv(self):
3680     """Build hooks env.
3681
3682     This will run on all nodes before, and on all nodes + the new node after.
3683
3684     """
3685     env = {
3686       "OP_TARGET": self.op.node_name,
3687       "NODE_NAME": self.op.node_name,
3688       "NODE_PIP": self.op.primary_ip,
3689       "NODE_SIP": self.op.secondary_ip,
3690       }
3691     nodes_0 = self.cfg.GetNodeList()
3692     nodes_1 = nodes_0 + [self.op.node_name, ]
3693     return env, nodes_0, nodes_1
3694
3695   def CheckPrereq(self):
3696     """Check prerequisites.
3697
3698     This checks:
3699      - the new node is not already in the config
3700      - it is resolvable
3701      - its parameters (single/dual homed) matches the cluster
3702
3703     Any errors are signaled by raising errors.OpPrereqError.
3704
3705     """
3706     node_name = self.op.node_name
3707     cfg = self.cfg
3708
3709     dns_data = netutils.GetHostInfo(node_name)
3710
3711     node = dns_data.name
3712     primary_ip = self.op.primary_ip = dns_data.ip
3713     if self.op.secondary_ip is None:
3714       self.op.secondary_ip = primary_ip
3715     if not netutils.IP4Address.IsValid(self.op.secondary_ip):
3716       raise errors.OpPrereqError("Invalid secondary IP given",
3717                                  errors.ECODE_INVAL)
3718     secondary_ip = self.op.secondary_ip
3719
3720     node_list = cfg.GetNodeList()
3721     if not self.op.readd and node in node_list:
3722       raise errors.OpPrereqError("Node %s is already in the configuration" %
3723                                  node, errors.ECODE_EXISTS)
3724     elif self.op.readd and node not in node_list:
3725       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3726                                  errors.ECODE_NOENT)
3727
3728     self.changed_primary_ip = False
3729
3730     for existing_node_name in node_list:
3731       existing_node = cfg.GetNodeInfo(existing_node_name)
3732
3733       if self.op.readd and node == existing_node_name:
3734         if existing_node.secondary_ip != secondary_ip:
3735           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3736                                      " address configuration as before",
3737                                      errors.ECODE_INVAL)
3738         if existing_node.primary_ip != primary_ip:
3739           self.changed_primary_ip = True
3740
3741         continue
3742
3743       if (existing_node.primary_ip == primary_ip or
3744           existing_node.secondary_ip == primary_ip or
3745           existing_node.primary_ip == secondary_ip or
3746           existing_node.secondary_ip == secondary_ip):
3747         raise errors.OpPrereqError("New node ip address(es) conflict with"
3748                                    " existing node %s" % existing_node.name,
3749                                    errors.ECODE_NOTUNIQUE)
3750
3751     # check that the type of the node (single versus dual homed) is the
3752     # same as for the master
3753     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3754     master_singlehomed = myself.secondary_ip == myself.primary_ip
3755     newbie_singlehomed = secondary_ip == primary_ip
3756     if master_singlehomed != newbie_singlehomed:
3757       if master_singlehomed:
3758         raise errors.OpPrereqError("The master has no private ip but the"
3759                                    " new node has one",
3760                                    errors.ECODE_INVAL)
3761       else:
3762         raise errors.OpPrereqError("The master has a private ip but the"
3763                                    " new node doesn't have one",
3764                                    errors.ECODE_INVAL)
3765
3766     # checks reachability
3767     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3768       raise errors.OpPrereqError("Node not reachable by ping",
3769                                  errors.ECODE_ENVIRON)
3770
3771     if not newbie_singlehomed:
3772       # check reachability from my secondary ip to newbie's secondary ip
3773       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3774                            source=myself.secondary_ip):
3775         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3776                                    " based ping to noded port",
3777                                    errors.ECODE_ENVIRON)
3778
3779     if self.op.readd:
3780       exceptions = [node]
3781     else:
3782       exceptions = []
3783
3784     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3785
3786     if self.op.readd:
3787       self.new_node = self.cfg.GetNodeInfo(node)
3788       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3789     else:
3790       self.new_node = objects.Node(name=node,
3791                                    primary_ip=primary_ip,
3792                                    secondary_ip=secondary_ip,
3793                                    master_candidate=self.master_candidate,
3794                                    offline=False, drained=False)
3795
3796   def Exec(self, feedback_fn):
3797     """Adds the new node to the cluster.
3798
3799     """
3800     new_node = self.new_node
3801     node = new_node.name
3802
3803     # for re-adds, reset the offline/drained/master-candidate flags;
3804     # we need to reset here, otherwise offline would prevent RPC calls
3805     # later in the procedure; this also means that if the re-add
3806     # fails, we are left with a non-offlined, broken node
3807     if self.op.readd:
3808       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3809       self.LogInfo("Readding a node, the offline/drained flags were reset")
3810       # if we demote the node, we do cleanup later in the procedure
3811       new_node.master_candidate = self.master_candidate
3812       if self.changed_primary_ip:
3813         new_node.primary_ip = self.op.primary_ip
3814
3815     # notify the user about any possible mc promotion
3816     if new_node.master_candidate:
3817       self.LogInfo("Node will be a master candidate")
3818
3819     # check connectivity
3820     result = self.rpc.call_version([node])[node]
3821     result.Raise("Can't get version information from node %s" % node)
3822     if constants.PROTOCOL_VERSION == result.payload:
3823       logging.info("Communication to node %s fine, sw version %s match",
3824                    node, result.payload)
3825     else:
3826       raise errors.OpExecError("Version mismatch master version %s,"
3827                                " node version %s" %
3828                                (constants.PROTOCOL_VERSION, result.payload))
3829
3830     # setup ssh on node
3831     if self.cfg.GetClusterInfo().modify_ssh_setup:
3832       logging.info("Copy ssh key to node %s", node)
3833       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3834       keyarray = []
3835       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3836                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3837                   priv_key, pub_key]
3838
3839       for i in keyfiles:
3840         keyarray.append(utils.ReadFile(i))
3841
3842       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3843                                       keyarray[2], keyarray[3], keyarray[4],
3844                                       keyarray[5])
3845       result.Raise("Cannot transfer ssh keys to the new node")
3846
3847     # Add node to our /etc/hosts, and add key to known_hosts
3848     if self.cfg.GetClusterInfo().modify_etc_hosts:
3849       # FIXME: this should be done via an rpc call to node daemon
3850       utils.AddHostToEtcHosts(new_node.name)
3851
3852     if new_node.secondary_ip != new_node.primary_ip:
3853       result = self.rpc.call_node_has_ip_address(new_node.name,
3854                                                  new_node.secondary_ip)
3855       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3856                    prereq=True, ecode=errors.ECODE_ENVIRON)
3857       if not result.payload:
3858         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3859                                  " you gave (%s). Please fix and re-run this"
3860                                  " command." % new_node.secondary_ip)
3861
3862     node_verify_list = [self.cfg.GetMasterNode()]
3863     node_verify_param = {
3864       constants.NV_NODELIST: [node],
3865       # TODO: do a node-net-test as well?
3866     }
3867
3868     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3869                                        self.cfg.GetClusterName())
3870     for verifier in node_verify_list:
3871       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3872       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3873       if nl_payload:
3874         for failed in nl_payload:
3875           feedback_fn("ssh/hostname verification failed"
3876                       " (checking from %s): %s" %
3877                       (verifier, nl_payload[failed]))
3878         raise errors.OpExecError("ssh/hostname verification failed.")
3879
3880     if self.op.readd:
3881       _RedistributeAncillaryFiles(self)
3882       self.context.ReaddNode(new_node)
3883       # make sure we redistribute the config
3884       self.cfg.Update(new_node, feedback_fn)
3885       # and make sure the new node will not have old files around
3886       if not new_node.master_candidate:
3887         result = self.rpc.call_node_demote_from_mc(new_node.name)
3888         msg = result.fail_msg
3889         if msg:
3890           self.LogWarning("Node failed to demote itself from master"
3891                           " candidate status: %s" % msg)
3892     else:
3893       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3894       self.context.AddNode(new_node, self.proc.GetECId())
3895
3896
3897 class LUSetNodeParams(LogicalUnit):
3898   """Modifies the parameters of a node.
3899
3900   """
3901   HPATH = "node-modify"
3902   HTYPE = constants.HTYPE_NODE
3903   _OP_PARAMS = [
3904     _PNodeName,
3905     ("master_candidate", None, _TMaybeBool),
3906     ("offline", None, _TMaybeBool),
3907     ("drained", None, _TMaybeBool),
3908     ("auto_promote", False, _TBool),
3909     _PForce,
3910     ]
3911   REQ_BGL = False
3912
3913   def CheckArguments(self):
3914     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3915     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3916     if all_mods.count(None) == 3:
3917       raise errors.OpPrereqError("Please pass at least one modification",
3918                                  errors.ECODE_INVAL)
3919     if all_mods.count(True) > 1:
3920       raise errors.OpPrereqError("Can't set the node into more than one"
3921                                  " state at the same time",
3922                                  errors.ECODE_INVAL)
3923
3924     # Boolean value that tells us whether we're offlining or draining the node
3925     self.offline_or_drain = (self.op.offline == True or
3926                              self.op.drained == True)
3927     self.deoffline_or_drain = (self.op.offline == False or
3928                                self.op.drained == False)
3929     self.might_demote = (self.op.master_candidate == False or
3930                          self.offline_or_drain)
3931
3932     self.lock_all = self.op.auto_promote and self.might_demote
3933
3934
3935   def ExpandNames(self):
3936     if self.lock_all:
3937       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3938     else:
3939       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3940
3941   def BuildHooksEnv(self):
3942     """Build hooks env.
3943
3944     This runs on the master node.
3945
3946     """
3947     env = {
3948       "OP_TARGET": self.op.node_name,
3949       "MASTER_CANDIDATE": str(self.op.master_candidate),
3950       "OFFLINE": str(self.op.offline),
3951       "DRAINED": str(self.op.drained),
3952       }
3953     nl = [self.cfg.GetMasterNode(),
3954           self.op.node_name]
3955     return env, nl, nl
3956
3957   def CheckPrereq(self):
3958     """Check prerequisites.
3959
3960     This only checks the instance list against the existing names.
3961
3962     """
3963     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3964
3965     if (self.op.master_candidate is not None or
3966         self.op.drained is not None or
3967         self.op.offline is not None):
3968       # we can't change the master's node flags
3969       if self.op.node_name == self.cfg.GetMasterNode():
3970         raise errors.OpPrereqError("The master role can be changed"
3971                                    " only via master-failover",
3972                                    errors.ECODE_INVAL)
3973
3974
3975     if node.master_candidate and self.might_demote and not self.lock_all:
3976       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3977       # check if after removing the current node, we're missing master
3978       # candidates
3979       (mc_remaining, mc_should, _) = \
3980           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3981       if mc_remaining < mc_should:
3982         raise errors.OpPrereqError("Not enough master candidates, please"
3983                                    " pass auto_promote to allow promotion",
3984                                    errors.ECODE_INVAL)
3985
3986     if (self.op.master_candidate == True and
3987         ((node.offline and not self.op.offline == False) or
3988          (node.drained and not self.op.drained == False))):
3989       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3990                                  " to master_candidate" % node.name,
3991                                  errors.ECODE_INVAL)
3992
3993     # If we're being deofflined/drained, we'll MC ourself if needed
3994     if (self.deoffline_or_drain and not self.offline_or_drain and not
3995         self.op.master_candidate == True and not node.master_candidate):
3996       self.op.master_candidate = _DecideSelfPromotion(self)
3997       if self.op.master_candidate:
3998         self.LogInfo("Autopromoting node to master candidate")
3999
4000     return
4001
4002   def Exec(self, feedback_fn):
4003     """Modifies a node.
4004
4005     """
4006     node = self.node
4007
4008     result = []
4009     changed_mc = False
4010
4011     if self.op.offline is not None:
4012       node.offline = self.op.offline
4013       result.append(("offline", str(self.op.offline)))
4014       if self.op.offline == True:
4015         if node.master_candidate:
4016           node.master_candidate = False
4017           changed_mc = True
4018           result.append(("master_candidate", "auto-demotion due to offline"))
4019         if node.drained:
4020           node.drained = False
4021           result.append(("drained", "clear drained status due to offline"))
4022
4023     if self.op.master_candidate is not None:
4024       node.master_candidate = self.op.master_candidate
4025       changed_mc = True
4026       result.append(("master_candidate", str(self.op.master_candidate)))
4027       if self.op.master_candidate == False:
4028         rrc = self.rpc.call_node_demote_from_mc(node.name)
4029         msg = rrc.fail_msg
4030         if msg:
4031           self.LogWarning("Node failed to demote itself: %s" % msg)
4032
4033     if self.op.drained is not None:
4034       node.drained = self.op.drained
4035       result.append(("drained", str(self.op.drained)))
4036       if self.op.drained == True:
4037         if node.master_candidate:
4038           node.master_candidate = False
4039           changed_mc = True
4040           result.append(("master_candidate", "auto-demotion due to drain"))
4041           rrc = self.rpc.call_node_demote_from_mc(node.name)
4042           msg = rrc.fail_msg
4043           if msg:
4044             self.LogWarning("Node failed to demote itself: %s" % msg)
4045         if node.offline:
4046           node.offline = False
4047           result.append(("offline", "clear offline status due to drain"))
4048
4049     # we locked all nodes, we adjust the CP before updating this node
4050     if self.lock_all:
4051       _AdjustCandidatePool(self, [node.name])
4052
4053     # this will trigger configuration file update, if needed
4054     self.cfg.Update(node, feedback_fn)
4055
4056     # this will trigger job queue propagation or cleanup
4057     if changed_mc:
4058       self.context.ReaddNode(node)
4059
4060     return result
4061
4062
4063 class LUPowercycleNode(NoHooksLU):
4064   """Powercycles a node.
4065
4066   """
4067   _OP_PARAMS = [
4068     _PNodeName,
4069     _PForce,
4070     ]
4071   REQ_BGL = False
4072
4073   def CheckArguments(self):
4074     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4075     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4076       raise errors.OpPrereqError("The node is the master and the force"
4077                                  " parameter was not set",
4078                                  errors.ECODE_INVAL)
4079
4080   def ExpandNames(self):
4081     """Locking for PowercycleNode.
4082
4083     This is a last-resort option and shouldn't block on other
4084     jobs. Therefore, we grab no locks.
4085
4086     """
4087     self.needed_locks = {}
4088
4089   def Exec(self, feedback_fn):
4090     """Reboots a node.
4091
4092     """
4093     result = self.rpc.call_node_powercycle(self.op.node_name,
4094                                            self.cfg.GetHypervisorType())
4095     result.Raise("Failed to schedule the reboot")
4096     return result.payload
4097
4098
4099 class LUQueryClusterInfo(NoHooksLU):
4100   """Query cluster configuration.
4101
4102   """
4103   REQ_BGL = False
4104
4105   def ExpandNames(self):
4106     self.needed_locks = {}
4107
4108   def Exec(self, feedback_fn):
4109     """Return cluster config.
4110
4111     """
4112     cluster = self.cfg.GetClusterInfo()
4113     os_hvp = {}
4114
4115     # Filter just for enabled hypervisors
4116     for os_name, hv_dict in cluster.os_hvp.items():
4117       os_hvp[os_name] = {}
4118       for hv_name, hv_params in hv_dict.items():
4119         if hv_name in cluster.enabled_hypervisors:
4120           os_hvp[os_name][hv_name] = hv_params
4121
4122     result = {
4123       "software_version": constants.RELEASE_VERSION,
4124       "protocol_version": constants.PROTOCOL_VERSION,
4125       "config_version": constants.CONFIG_VERSION,
4126       "os_api_version": max(constants.OS_API_VERSIONS),
4127       "export_version": constants.EXPORT_VERSION,
4128       "architecture": (platform.architecture()[0], platform.machine()),
4129       "name": cluster.cluster_name,
4130       "master": cluster.master_node,
4131       "default_hypervisor": cluster.enabled_hypervisors[0],
4132       "enabled_hypervisors": cluster.enabled_hypervisors,
4133       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4134                         for hypervisor_name in cluster.enabled_hypervisors]),
4135       "os_hvp": os_hvp,
4136       "beparams": cluster.beparams,
4137       "osparams": cluster.osparams,
4138       "nicparams": cluster.nicparams,
4139       "candidate_pool_size": cluster.candidate_pool_size,
4140       "master_netdev": cluster.master_netdev,
4141       "volume_group_name": cluster.volume_group_name,
4142       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4143       "file_storage_dir": cluster.file_storage_dir,
4144       "maintain_node_health": cluster.maintain_node_health,
4145       "ctime": cluster.ctime,
4146       "mtime": cluster.mtime,
4147       "uuid": cluster.uuid,
4148       "tags": list(cluster.GetTags()),
4149       "uid_pool": cluster.uid_pool,
4150       "default_iallocator": cluster.default_iallocator,
4151       "reserved_lvs": cluster.reserved_lvs,
4152       }
4153
4154     return result
4155
4156
4157 class LUQueryConfigValues(NoHooksLU):
4158   """Return configuration values.
4159
4160   """
4161   _OP_PARAMS = [_POutputFields]
4162   REQ_BGL = False
4163   _FIELDS_DYNAMIC = utils.FieldSet()
4164   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4165                                   "watcher_pause")
4166
4167   def CheckArguments(self):
4168     _CheckOutputFields(static=self._FIELDS_STATIC,
4169                        dynamic=self._FIELDS_DYNAMIC,
4170                        selected=self.op.output_fields)
4171
4172   def ExpandNames(self):
4173     self.needed_locks = {}
4174
4175   def Exec(self, feedback_fn):
4176     """Dump a representation of the cluster config to the standard output.
4177
4178     """
4179     values = []
4180     for field in self.op.output_fields:
4181       if field == "cluster_name":
4182         entry = self.cfg.GetClusterName()
4183       elif field == "master_node":
4184         entry = self.cfg.GetMasterNode()
4185       elif field == "drain_flag":
4186         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4187       elif field == "watcher_pause":
4188         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4189       else:
4190         raise errors.ParameterError(field)
4191       values.append(entry)
4192     return values
4193
4194
4195 class LUActivateInstanceDisks(NoHooksLU):
4196   """Bring up an instance's disks.
4197
4198   """
4199   _OP_PARAMS = [
4200     _PInstanceName,
4201     ("ignore_size", False, _TBool),
4202     ]
4203   REQ_BGL = False
4204
4205   def ExpandNames(self):
4206     self._ExpandAndLockInstance()
4207     self.needed_locks[locking.LEVEL_NODE] = []
4208     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4209
4210   def DeclareLocks(self, level):
4211     if level == locking.LEVEL_NODE:
4212       self._LockInstancesNodes()
4213
4214   def CheckPrereq(self):
4215     """Check prerequisites.
4216
4217     This checks that the instance is in the cluster.
4218
4219     """
4220     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4221     assert self.instance is not None, \
4222       "Cannot retrieve locked instance %s" % self.op.instance_name
4223     _CheckNodeOnline(self, self.instance.primary_node)
4224
4225   def Exec(self, feedback_fn):
4226     """Activate the disks.
4227
4228     """
4229     disks_ok, disks_info = \
4230               _AssembleInstanceDisks(self, self.instance,
4231                                      ignore_size=self.op.ignore_size)
4232     if not disks_ok:
4233       raise errors.OpExecError("Cannot activate block devices")
4234
4235     return disks_info
4236
4237
4238 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4239                            ignore_size=False):
4240   """Prepare the block devices for an instance.
4241
4242   This sets up the block devices on all nodes.
4243
4244   @type lu: L{LogicalUnit}
4245   @param lu: the logical unit on whose behalf we execute
4246   @type instance: L{objects.Instance}
4247   @param instance: the instance for whose disks we assemble
4248   @type disks: list of L{objects.Disk} or None
4249   @param disks: which disks to assemble (or all, if None)
4250   @type ignore_secondaries: boolean
4251   @param ignore_secondaries: if true, errors on secondary nodes
4252       won't result in an error return from the function
4253   @type ignore_size: boolean
4254   @param ignore_size: if true, the current known size of the disk
4255       will not be used during the disk activation, useful for cases
4256       when the size is wrong
4257   @return: False if the operation failed, otherwise a list of
4258       (host, instance_visible_name, node_visible_name)
4259       with the mapping from node devices to instance devices
4260
4261   """
4262   device_info = []
4263   disks_ok = True
4264   iname = instance.name
4265   disks = _ExpandCheckDisks(instance, disks)
4266
4267   # With the two passes mechanism we try to reduce the window of
4268   # opportunity for the race condition of switching DRBD to primary
4269   # before handshaking occured, but we do not eliminate it
4270
4271   # The proper fix would be to wait (with some limits) until the
4272   # connection has been made and drbd transitions from WFConnection
4273   # into any other network-connected state (Connected, SyncTarget,
4274   # SyncSource, etc.)
4275
4276   # 1st pass, assemble on all nodes in secondary mode
4277   for inst_disk in disks:
4278     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4279       if ignore_size:
4280         node_disk = node_disk.Copy()
4281         node_disk.UnsetSize()
4282       lu.cfg.SetDiskID(node_disk, node)
4283       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4284       msg = result.fail_msg
4285       if msg:
4286         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4287                            " (is_primary=False, pass=1): %s",
4288                            inst_disk.iv_name, node, msg)
4289         if not ignore_secondaries:
4290           disks_ok = False
4291
4292   # FIXME: race condition on drbd migration to primary
4293
4294   # 2nd pass, do only the primary node
4295   for inst_disk in disks:
4296     dev_path = None
4297
4298     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4299       if node != instance.primary_node:
4300         continue
4301       if ignore_size:
4302         node_disk = node_disk.Copy()
4303         node_disk.UnsetSize()
4304       lu.cfg.SetDiskID(node_disk, node)
4305       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4306       msg = result.fail_msg
4307       if msg:
4308         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4309                            " (is_primary=True, pass=2): %s",
4310                            inst_disk.iv_name, node, msg)
4311         disks_ok = False
4312       else:
4313         dev_path = result.payload
4314
4315     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4316
4317   # leave the disks configured for the primary node
4318   # this is a workaround that would be fixed better by
4319   # improving the logical/physical id handling
4320   for disk in disks:
4321     lu.cfg.SetDiskID(disk, instance.primary_node)
4322
4323   return disks_ok, device_info
4324
4325
4326 def _StartInstanceDisks(lu, instance, force):
4327   """Start the disks of an instance.
4328
4329   """
4330   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4331                                            ignore_secondaries=force)
4332   if not disks_ok:
4333     _ShutdownInstanceDisks(lu, instance)
4334     if force is not None and not force:
4335       lu.proc.LogWarning("", hint="If the message above refers to a"
4336                          " secondary node,"
4337                          " you can retry the operation using '--force'.")
4338     raise errors.OpExecError("Disk consistency error")
4339
4340
4341 class LUDeactivateInstanceDisks(NoHooksLU):
4342   """Shutdown an instance's disks.
4343
4344   """
4345   _OP_PARAMS = [
4346     _PInstanceName,
4347     ]
4348   REQ_BGL = False
4349
4350   def ExpandNames(self):
4351     self._ExpandAndLockInstance()
4352     self.needed_locks[locking.LEVEL_NODE] = []
4353     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4354
4355   def DeclareLocks(self, level):
4356     if level == locking.LEVEL_NODE:
4357       self._LockInstancesNodes()
4358
4359   def CheckPrereq(self):
4360     """Check prerequisites.
4361
4362     This checks that the instance is in the cluster.
4363
4364     """
4365     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4366     assert self.instance is not None, \
4367       "Cannot retrieve locked instance %s" % self.op.instance_name
4368
4369   def Exec(self, feedback_fn):
4370     """Deactivate the disks
4371
4372     """
4373     instance = self.instance
4374     _SafeShutdownInstanceDisks(self, instance)
4375
4376
4377 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4378   """Shutdown block devices of an instance.
4379
4380   This function checks if an instance is running, before calling
4381   _ShutdownInstanceDisks.
4382
4383   """
4384   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4385   _ShutdownInstanceDisks(lu, instance, disks=disks)
4386
4387
4388 def _ExpandCheckDisks(instance, disks):
4389   """Return the instance disks selected by the disks list
4390
4391   @type disks: list of L{objects.Disk} or None
4392   @param disks: selected disks
4393   @rtype: list of L{objects.Disk}
4394   @return: selected instance disks to act on
4395
4396   """
4397   if disks is None:
4398     return instance.disks
4399   else:
4400     if not set(disks).issubset(instance.disks):
4401       raise errors.ProgrammerError("Can only act on disks belonging to the"
4402                                    " target instance")
4403     return disks
4404
4405
4406 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4407   """Shutdown block devices of an instance.
4408
4409   This does the shutdown on all nodes of the instance.
4410
4411   If the ignore_primary is false, errors on the primary node are
4412   ignored.
4413
4414   """
4415   all_result = True
4416   disks = _ExpandCheckDisks(instance, disks)
4417
4418   for disk in disks:
4419     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4420       lu.cfg.SetDiskID(top_disk, node)
4421       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4422       msg = result.fail_msg
4423       if msg:
4424         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4425                       disk.iv_name, node, msg)
4426         if not ignore_primary or node != instance.primary_node:
4427           all_result = False
4428   return all_result
4429
4430
4431 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4432   """Checks if a node has enough free memory.
4433
4434   This function check if a given node has the needed amount of free
4435   memory. In case the node has less memory or we cannot get the
4436   information from the node, this function raise an OpPrereqError
4437   exception.
4438
4439   @type lu: C{LogicalUnit}
4440   @param lu: a logical unit from which we get configuration data
4441   @type node: C{str}
4442   @param node: the node to check
4443   @type reason: C{str}
4444   @param reason: string to use in the error message
4445   @type requested: C{int}
4446   @param requested: the amount of memory in MiB to check for
4447   @type hypervisor_name: C{str}
4448   @param hypervisor_name: the hypervisor to ask for memory stats
4449   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4450       we cannot check the node
4451
4452   """
4453   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4454   nodeinfo[node].Raise("Can't get data from node %s" % node,
4455                        prereq=True, ecode=errors.ECODE_ENVIRON)
4456   free_mem = nodeinfo[node].payload.get('memory_free', None)
4457   if not isinstance(free_mem, int):
4458     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4459                                " was '%s'" % (node, free_mem),
4460                                errors.ECODE_ENVIRON)
4461   if requested > free_mem:
4462     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4463                                " needed %s MiB, available %s MiB" %
4464                                (node, reason, requested, free_mem),
4465                                errors.ECODE_NORES)
4466
4467
4468 def _CheckNodesFreeDisk(lu, nodenames, requested):
4469   """Checks if nodes have enough free disk space in the default VG.
4470
4471   This function check if all given nodes have the needed amount of
4472   free disk. In case any node has less disk or we cannot get the
4473   information from the node, this function raise an OpPrereqError
4474   exception.
4475
4476   @type lu: C{LogicalUnit}
4477   @param lu: a logical unit from which we get configuration data
4478   @type nodenames: C{list}
4479   @param nodenames: the list of node names to check
4480   @type requested: C{int}
4481   @param requested: the amount of disk in MiB to check for
4482   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4483       we cannot check the node
4484
4485   """
4486   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4487                                    lu.cfg.GetHypervisorType())
4488   for node in nodenames:
4489     info = nodeinfo[node]
4490     info.Raise("Cannot get current information from node %s" % node,
4491                prereq=True, ecode=errors.ECODE_ENVIRON)
4492     vg_free = info.payload.get("vg_free", None)
4493     if not isinstance(vg_free, int):
4494       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4495                                  " result was '%s'" % (node, vg_free),
4496                                  errors.ECODE_ENVIRON)
4497     if requested > vg_free:
4498       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4499                                  " required %d MiB, available %d MiB" %
4500                                  (node, requested, vg_free),
4501                                  errors.ECODE_NORES)
4502
4503
4504 class LUStartupInstance(LogicalUnit):
4505   """Starts an instance.
4506
4507   """
4508   HPATH = "instance-start"
4509   HTYPE = constants.HTYPE_INSTANCE
4510   _OP_PARAMS = [
4511     _PInstanceName,
4512     _PForce,
4513     ("hvparams", _EmptyDict, _TDict),
4514     ("beparams", _EmptyDict, _TDict),
4515     ]
4516   REQ_BGL = False
4517
4518   def CheckArguments(self):
4519     # extra beparams
4520     if self.op.beparams:
4521       # fill the beparams dict
4522       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4523
4524   def ExpandNames(self):
4525     self._ExpandAndLockInstance()
4526
4527   def BuildHooksEnv(self):
4528     """Build hooks env.
4529
4530     This runs on master, primary and secondary nodes of the instance.
4531
4532     """
4533     env = {
4534       "FORCE": self.op.force,
4535       }
4536     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4537     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4538     return env, nl, nl
4539
4540   def CheckPrereq(self):
4541     """Check prerequisites.
4542
4543     This checks that the instance is in the cluster.
4544
4545     """
4546     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4547     assert self.instance is not None, \
4548       "Cannot retrieve locked instance %s" % self.op.instance_name
4549
4550     # extra hvparams
4551     if self.op.hvparams:
4552       # check hypervisor parameter syntax (locally)
4553       cluster = self.cfg.GetClusterInfo()
4554       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4555       filled_hvp = cluster.FillHV(instance)
4556       filled_hvp.update(self.op.hvparams)
4557       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4558       hv_type.CheckParameterSyntax(filled_hvp)
4559       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4560
4561     _CheckNodeOnline(self, instance.primary_node)
4562
4563     bep = self.cfg.GetClusterInfo().FillBE(instance)
4564     # check bridges existence
4565     _CheckInstanceBridgesExist(self, instance)
4566
4567     remote_info = self.rpc.call_instance_info(instance.primary_node,
4568                                               instance.name,
4569                                               instance.hypervisor)
4570     remote_info.Raise("Error checking node %s" % instance.primary_node,
4571                       prereq=True, ecode=errors.ECODE_ENVIRON)
4572     if not remote_info.payload: # not running already
4573       _CheckNodeFreeMemory(self, instance.primary_node,
4574                            "starting instance %s" % instance.name,
4575                            bep[constants.BE_MEMORY], instance.hypervisor)
4576
4577   def Exec(self, feedback_fn):
4578     """Start the instance.
4579
4580     """
4581     instance = self.instance
4582     force = self.op.force
4583
4584     self.cfg.MarkInstanceUp(instance.name)
4585
4586     node_current = instance.primary_node
4587
4588     _StartInstanceDisks(self, instance, force)
4589
4590     result = self.rpc.call_instance_start(node_current, instance,
4591                                           self.op.hvparams, self.op.beparams)
4592     msg = result.fail_msg
4593     if msg:
4594       _ShutdownInstanceDisks(self, instance)
4595       raise errors.OpExecError("Could not start instance: %s" % msg)
4596
4597
4598 class LURebootInstance(LogicalUnit):
4599   """Reboot an instance.
4600
4601   """
4602   HPATH = "instance-reboot"
4603   HTYPE = constants.HTYPE_INSTANCE
4604   _OP_PARAMS = [
4605     _PInstanceName,
4606     ("ignore_secondaries", False, _TBool),
4607     ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4608     _PShutdownTimeout,
4609     ]
4610   REQ_BGL = False
4611
4612   def ExpandNames(self):
4613     self._ExpandAndLockInstance()
4614
4615   def BuildHooksEnv(self):
4616     """Build hooks env.
4617
4618     This runs on master, primary and secondary nodes of the instance.
4619
4620     """
4621     env = {
4622       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4623       "REBOOT_TYPE": self.op.reboot_type,
4624       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4625       }
4626     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4627     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4628     return env, nl, nl
4629
4630   def CheckPrereq(self):
4631     """Check prerequisites.
4632
4633     This checks that the instance is in the cluster.
4634
4635     """
4636     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4637     assert self.instance is not None, \
4638       "Cannot retrieve locked instance %s" % self.op.instance_name
4639
4640     _CheckNodeOnline(self, instance.primary_node)
4641
4642     # check bridges existence
4643     _CheckInstanceBridgesExist(self, instance)
4644
4645   def Exec(self, feedback_fn):
4646     """Reboot the instance.
4647
4648     """
4649     instance = self.instance
4650     ignore_secondaries = self.op.ignore_secondaries
4651     reboot_type = self.op.reboot_type
4652
4653     node_current = instance.primary_node
4654
4655     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4656                        constants.INSTANCE_REBOOT_HARD]:
4657       for disk in instance.disks:
4658         self.cfg.SetDiskID(disk, node_current)
4659       result = self.rpc.call_instance_reboot(node_current, instance,
4660                                              reboot_type,
4661                                              self.op.shutdown_timeout)
4662       result.Raise("Could not reboot instance")
4663     else:
4664       result = self.rpc.call_instance_shutdown(node_current, instance,
4665                                                self.op.shutdown_timeout)
4666       result.Raise("Could not shutdown instance for full reboot")
4667       _ShutdownInstanceDisks(self, instance)
4668       _StartInstanceDisks(self, instance, ignore_secondaries)
4669       result = self.rpc.call_instance_start(node_current, instance, None, None)
4670       msg = result.fail_msg
4671       if msg:
4672         _ShutdownInstanceDisks(self, instance)
4673         raise errors.OpExecError("Could not start instance for"
4674                                  " full reboot: %s" % msg)
4675
4676     self.cfg.MarkInstanceUp(instance.name)
4677
4678
4679 class LUShutdownInstance(LogicalUnit):
4680   """Shutdown an instance.
4681
4682   """
4683   HPATH = "instance-stop"
4684   HTYPE = constants.HTYPE_INSTANCE
4685   _OP_PARAMS = [
4686     _PInstanceName,
4687     ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4688     ]
4689   REQ_BGL = False
4690
4691   def ExpandNames(self):
4692     self._ExpandAndLockInstance()
4693
4694   def BuildHooksEnv(self):
4695     """Build hooks env.
4696
4697     This runs on master, primary and secondary nodes of the instance.
4698
4699     """
4700     env = _BuildInstanceHookEnvByObject(self, self.instance)
4701     env["TIMEOUT"] = self.op.timeout
4702     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4703     return env, nl, nl
4704
4705   def CheckPrereq(self):
4706     """Check prerequisites.
4707
4708     This checks that the instance is in the cluster.
4709
4710     """
4711     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4712     assert self.instance is not None, \
4713       "Cannot retrieve locked instance %s" % self.op.instance_name
4714     _CheckNodeOnline(self, self.instance.primary_node)
4715
4716   def Exec(self, feedback_fn):
4717     """Shutdown the instance.
4718
4719     """
4720     instance = self.instance
4721     node_current = instance.primary_node
4722     timeout = self.op.timeout
4723     self.cfg.MarkInstanceDown(instance.name)
4724     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4725     msg = result.fail_msg
4726     if msg:
4727       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4728
4729     _ShutdownInstanceDisks(self, instance)
4730
4731
4732 class LUReinstallInstance(LogicalUnit):
4733   """Reinstall an instance.
4734
4735   """
4736   HPATH = "instance-reinstall"
4737   HTYPE = constants.HTYPE_INSTANCE
4738   _OP_PARAMS = [
4739     _PInstanceName,
4740     ("os_type", None, _TMaybeString),
4741     ("force_variant", False, _TBool),
4742     ]
4743   REQ_BGL = False
4744
4745   def ExpandNames(self):
4746     self._ExpandAndLockInstance()
4747
4748   def BuildHooksEnv(self):
4749     """Build hooks env.
4750
4751     This runs on master, primary and secondary nodes of the instance.
4752
4753     """
4754     env = _BuildInstanceHookEnvByObject(self, self.instance)
4755     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4756     return env, nl, nl
4757
4758   def CheckPrereq(self):
4759     """Check prerequisites.
4760
4761     This checks that the instance is in the cluster and is not running.
4762
4763     """
4764     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4765     assert instance is not None, \
4766       "Cannot retrieve locked instance %s" % self.op.instance_name
4767     _CheckNodeOnline(self, instance.primary_node)
4768
4769     if instance.disk_template == constants.DT_DISKLESS:
4770       raise errors.OpPrereqError("Instance '%s' has no disks" %
4771                                  self.op.instance_name,
4772                                  errors.ECODE_INVAL)
4773     _CheckInstanceDown(self, instance, "cannot reinstall")
4774
4775     if self.op.os_type is not None:
4776       # OS verification
4777       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4778       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4779
4780     self.instance = instance
4781
4782   def Exec(self, feedback_fn):
4783     """Reinstall the instance.
4784
4785     """
4786     inst = self.instance
4787
4788     if self.op.os_type is not None:
4789       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4790       inst.os = self.op.os_type
4791       self.cfg.Update(inst, feedback_fn)
4792
4793     _StartInstanceDisks(self, inst, None)
4794     try:
4795       feedback_fn("Running the instance OS create scripts...")
4796       # FIXME: pass debug option from opcode to backend
4797       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4798                                              self.op.debug_level)
4799       result.Raise("Could not install OS for instance %s on node %s" %
4800                    (inst.name, inst.primary_node))
4801     finally:
4802       _ShutdownInstanceDisks(self, inst)
4803
4804
4805 class LURecreateInstanceDisks(LogicalUnit):
4806   """Recreate an instance's missing disks.
4807
4808   """
4809   HPATH = "instance-recreate-disks"
4810   HTYPE = constants.HTYPE_INSTANCE
4811   _OP_PARAMS = [
4812     _PInstanceName,
4813     ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4814     ]
4815   REQ_BGL = False
4816
4817   def ExpandNames(self):
4818     self._ExpandAndLockInstance()
4819
4820   def BuildHooksEnv(self):
4821     """Build hooks env.
4822
4823     This runs on master, primary and secondary nodes of the instance.
4824
4825     """
4826     env = _BuildInstanceHookEnvByObject(self, self.instance)
4827     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4828     return env, nl, nl
4829
4830   def CheckPrereq(self):
4831     """Check prerequisites.
4832
4833     This checks that the instance is in the cluster and is not running.
4834
4835     """
4836     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4837     assert instance is not None, \
4838       "Cannot retrieve locked instance %s" % self.op.instance_name
4839     _CheckNodeOnline(self, instance.primary_node)
4840
4841     if instance.disk_template == constants.DT_DISKLESS:
4842       raise errors.OpPrereqError("Instance '%s' has no disks" %
4843                                  self.op.instance_name, errors.ECODE_INVAL)
4844     _CheckInstanceDown(self, instance, "cannot recreate disks")
4845
4846     if not self.op.disks:
4847       self.op.disks = range(len(instance.disks))
4848     else:
4849       for idx in self.op.disks:
4850         if idx >= len(instance.disks):
4851           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4852                                      errors.ECODE_INVAL)
4853
4854     self.instance = instance
4855
4856   def Exec(self, feedback_fn):
4857     """Recreate the disks.
4858
4859     """
4860     to_skip = []
4861     for idx, _ in enumerate(self.instance.disks):
4862       if idx not in self.op.disks: # disk idx has not been passed in
4863         to_skip.append(idx)
4864         continue
4865
4866     _CreateDisks(self, self.instance, to_skip=to_skip)
4867
4868
4869 class LURenameInstance(LogicalUnit):
4870   """Rename an instance.
4871
4872   """
4873   HPATH = "instance-rename"
4874   HTYPE = constants.HTYPE_INSTANCE
4875   _OP_PARAMS = [
4876     _PInstanceName,
4877     ("new_name", _NoDefault, _TNonEmptyString),
4878     ("ip_check", False, _TBool),
4879     ("name_check", True, _TBool),
4880     ]
4881
4882   def CheckArguments(self):
4883     """Check arguments.
4884
4885     """
4886     if self.op.ip_check and not self.op.name_check:
4887       # TODO: make the ip check more flexible and not depend on the name check
4888       raise errors.OpPrereqError("Cannot do ip check without a name check",
4889                                  errors.ECODE_INVAL)
4890
4891   def BuildHooksEnv(self):
4892     """Build hooks env.
4893
4894     This runs on master, primary and secondary nodes of the instance.
4895
4896     """
4897     env = _BuildInstanceHookEnvByObject(self, self.instance)
4898     env["INSTANCE_NEW_NAME"] = self.op.new_name
4899     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4900     return env, nl, nl
4901
4902   def CheckPrereq(self):
4903     """Check prerequisites.
4904
4905     This checks that the instance is in the cluster and is not running.
4906
4907     """
4908     self.op.instance_name = _ExpandInstanceName(self.cfg,
4909                                                 self.op.instance_name)
4910     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4911     assert instance is not None
4912     _CheckNodeOnline(self, instance.primary_node)
4913     _CheckInstanceDown(self, instance, "cannot rename")
4914     self.instance = instance
4915
4916     new_name = self.op.new_name
4917     if self.op.name_check:
4918       hostinfo = netutils.HostInfo(netutils.HostInfo.NormalizeName(new_name))
4919       new_name = hostinfo.name
4920       if (self.op.ip_check and
4921           netutils.TcpPing(hostinfo.ip, constants.DEFAULT_NODED_PORT)):
4922         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4923                                    (hostinfo.ip, new_name),
4924                                    errors.ECODE_NOTUNIQUE)
4925
4926     instance_list = self.cfg.GetInstanceList()
4927     if new_name in instance_list:
4928       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4929                                  new_name, errors.ECODE_EXISTS)
4930
4931
4932   def Exec(self, feedback_fn):
4933     """Reinstall the instance.
4934
4935     """
4936     inst = self.instance
4937     old_name = inst.name
4938
4939     if inst.disk_template == constants.DT_FILE:
4940       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4941
4942     self.cfg.RenameInstance(inst.name, self.op.new_name)
4943     # Change the instance lock. This is definitely safe while we hold the BGL
4944     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4945     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4946
4947     # re-read the instance from the configuration after rename
4948     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4949
4950     if inst.disk_template == constants.DT_FILE:
4951       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4952       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4953                                                      old_file_storage_dir,
4954                                                      new_file_storage_dir)
4955       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4956                    " (but the instance has been renamed in Ganeti)" %
4957                    (inst.primary_node, old_file_storage_dir,
4958                     new_file_storage_dir))
4959
4960     _StartInstanceDisks(self, inst, None)
4961     try:
4962       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4963                                                  old_name, self.op.debug_level)
4964       msg = result.fail_msg
4965       if msg:
4966         msg = ("Could not run OS rename script for instance %s on node %s"
4967                " (but the instance has been renamed in Ganeti): %s" %
4968                (inst.name, inst.primary_node, msg))
4969         self.proc.LogWarning(msg)
4970     finally:
4971       _ShutdownInstanceDisks(self, inst)
4972
4973     return inst.name
4974
4975
4976 class LURemoveInstance(LogicalUnit):
4977   """Remove an instance.
4978
4979   """
4980   HPATH = "instance-remove"
4981   HTYPE = constants.HTYPE_INSTANCE
4982   _OP_PARAMS = [
4983     _PInstanceName,
4984     ("ignore_failures", False, _TBool),
4985     _PShutdownTimeout,
4986     ]
4987   REQ_BGL = False
4988
4989   def ExpandNames(self):
4990     self._ExpandAndLockInstance()
4991     self.needed_locks[locking.LEVEL_NODE] = []
4992     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4993
4994   def DeclareLocks(self, level):
4995     if level == locking.LEVEL_NODE:
4996       self._LockInstancesNodes()
4997
4998   def BuildHooksEnv(self):
4999     """Build hooks env.
5000
5001     This runs on master, primary and secondary nodes of the instance.
5002
5003     """
5004     env = _BuildInstanceHookEnvByObject(self, self.instance)
5005     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5006     nl = [self.cfg.GetMasterNode()]
5007     nl_post = list(self.instance.all_nodes) + nl
5008     return env, nl, nl_post
5009
5010   def CheckPrereq(self):
5011     """Check prerequisites.
5012
5013     This checks that the instance is in the cluster.
5014
5015     """
5016     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5017     assert self.instance is not None, \
5018       "Cannot retrieve locked instance %s" % self.op.instance_name
5019
5020   def Exec(self, feedback_fn):
5021     """Remove the instance.
5022
5023     """
5024     instance = self.instance
5025     logging.info("Shutting down instance %s on node %s",
5026                  instance.name, instance.primary_node)
5027
5028     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5029                                              self.op.shutdown_timeout)
5030     msg = result.fail_msg
5031     if msg:
5032       if self.op.ignore_failures:
5033         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5034       else:
5035         raise errors.OpExecError("Could not shutdown instance %s on"
5036                                  " node %s: %s" %
5037                                  (instance.name, instance.primary_node, msg))
5038
5039     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5040
5041
5042 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5043   """Utility function to remove an instance.
5044
5045   """
5046   logging.info("Removing block devices for instance %s", instance.name)
5047
5048   if not _RemoveDisks(lu, instance):
5049     if not ignore_failures:
5050       raise errors.OpExecError("Can't remove instance's disks")
5051     feedback_fn("Warning: can't remove instance's disks")
5052
5053   logging.info("Removing instance %s out of cluster config", instance.name)
5054
5055   lu.cfg.RemoveInstance(instance.name)
5056
5057   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5058     "Instance lock removal conflict"
5059
5060   # Remove lock for the instance
5061   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5062
5063
5064 class LUQueryInstances(NoHooksLU):
5065   """Logical unit for querying instances.
5066
5067   """
5068   # pylint: disable-msg=W0142
5069   _OP_PARAMS = [
5070     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5071     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5072     ("use_locking", False, _TBool),
5073     ]
5074   REQ_BGL = False
5075   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5076                     "serial_no", "ctime", "mtime", "uuid"]
5077   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5078                                     "admin_state",
5079                                     "disk_template", "ip", "mac", "bridge",
5080                                     "nic_mode", "nic_link",
5081                                     "sda_size", "sdb_size", "vcpus", "tags",
5082                                     "network_port", "beparams",
5083                                     r"(disk)\.(size)/([0-9]+)",
5084                                     r"(disk)\.(sizes)", "disk_usage",
5085                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5086                                     r"(nic)\.(bridge)/([0-9]+)",
5087                                     r"(nic)\.(macs|ips|modes|links|bridges)",
5088                                     r"(disk|nic)\.(count)",
5089                                     "hvparams",
5090                                     ] + _SIMPLE_FIELDS +
5091                                   ["hv/%s" % name
5092                                    for name in constants.HVS_PARAMETERS
5093                                    if name not in constants.HVC_GLOBALS] +
5094                                   ["be/%s" % name
5095                                    for name in constants.BES_PARAMETERS])
5096   _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5097                                    "oper_ram",
5098                                    "oper_vcpus",
5099                                    "status")
5100
5101
5102   def CheckArguments(self):
5103     _CheckOutputFields(static=self._FIELDS_STATIC,
5104                        dynamic=self._FIELDS_DYNAMIC,
5105                        selected=self.op.output_fields)
5106
5107   def ExpandNames(self):
5108     self.needed_locks = {}
5109     self.share_locks[locking.LEVEL_INSTANCE] = 1
5110     self.share_locks[locking.LEVEL_NODE] = 1
5111
5112     if self.op.names:
5113       self.wanted = _GetWantedInstances(self, self.op.names)
5114     else:
5115       self.wanted = locking.ALL_SET
5116
5117     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5118     self.do_locking = self.do_node_query and self.op.use_locking
5119     if self.do_locking:
5120       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5121       self.needed_locks[locking.LEVEL_NODE] = []
5122       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5123
5124   def DeclareLocks(self, level):
5125     if level == locking.LEVEL_NODE and self.do_locking:
5126       self._LockInstancesNodes()
5127
5128   def Exec(self, feedback_fn):
5129     """Computes the list of nodes and their attributes.
5130
5131     """
5132     # pylint: disable-msg=R0912
5133     # way too many branches here
5134     all_info = self.cfg.GetAllInstancesInfo()
5135     if self.wanted == locking.ALL_SET:
5136       # caller didn't specify instance names, so ordering is not important
5137       if self.do_locking:
5138         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5139       else:
5140         instance_names = all_info.keys()
5141       instance_names = utils.NiceSort(instance_names)
5142     else:
5143       # caller did specify names, so we must keep the ordering
5144       if self.do_locking:
5145         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5146       else:
5147         tgt_set = all_info.keys()
5148       missing = set(self.wanted).difference(tgt_set)
5149       if missing:
5150         raise errors.OpExecError("Some instances were removed before"
5151                                  " retrieving their data: %s" % missing)
5152       instance_names = self.wanted
5153
5154     instance_list = [all_info[iname] for iname in instance_names]
5155
5156     # begin data gathering
5157
5158     nodes = frozenset([inst.primary_node for inst in instance_list])
5159     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5160
5161     bad_nodes = []
5162     off_nodes = []
5163     if self.do_node_query:
5164       live_data = {}
5165       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5166       for name in nodes:
5167         result = node_data[name]
5168         if result.offline:
5169           # offline nodes will be in both lists
5170           off_nodes.append(name)
5171         if result.fail_msg:
5172           bad_nodes.append(name)
5173         else:
5174           if result.payload:
5175             live_data.update(result.payload)
5176           # else no instance is alive
5177     else:
5178       live_data = dict([(name, {}) for name in instance_names])
5179
5180     # end data gathering
5181
5182     HVPREFIX = "hv/"
5183     BEPREFIX = "be/"
5184     output = []
5185     cluster = self.cfg.GetClusterInfo()
5186     for instance in instance_list:
5187       iout = []
5188       i_hv = cluster.FillHV(instance, skip_globals=True)
5189       i_be = cluster.FillBE(instance)
5190       i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5191       for field in self.op.output_fields:
5192         st_match = self._FIELDS_STATIC.Matches(field)
5193         if field in self._SIMPLE_FIELDS:
5194           val = getattr(instance, field)
5195         elif field == "pnode":
5196           val = instance.primary_node
5197         elif field == "snodes":
5198           val = list(instance.secondary_nodes)
5199         elif field == "admin_state":
5200           val = instance.admin_up
5201         elif field == "oper_state":
5202           if instance.primary_node in bad_nodes:
5203             val = None
5204           else:
5205             val = bool(live_data.get(instance.name))
5206         elif field == "status":
5207           if instance.primary_node in off_nodes:
5208             val = "ERROR_nodeoffline"
5209           elif instance.primary_node in bad_nodes:
5210             val = "ERROR_nodedown"
5211           else:
5212             running = bool(live_data.get(instance.name))
5213             if running:
5214               if instance.admin_up:
5215                 val = "running"
5216               else:
5217                 val = "ERROR_up"
5218             else:
5219               if instance.admin_up:
5220                 val = "ERROR_down"
5221               else:
5222                 val = "ADMIN_down"
5223         elif field == "oper_ram":
5224           if instance.primary_node in bad_nodes:
5225             val = None
5226           elif instance.name in live_data:
5227             val = live_data[instance.name].get("memory", "?")
5228           else:
5229             val = "-"
5230         elif field == "oper_vcpus":
5231           if instance.primary_node in bad_nodes:
5232             val = None
5233           elif instance.name in live_data:
5234             val = live_data[instance.name].get("vcpus", "?")
5235           else:
5236             val = "-"
5237         elif field == "vcpus":
5238           val = i_be[constants.BE_VCPUS]
5239         elif field == "disk_template":
5240           val = instance.disk_template
5241         elif field == "ip":
5242           if instance.nics:
5243             val = instance.nics[0].ip
5244           else:
5245             val = None
5246         elif field == "nic_mode":
5247           if instance.nics:
5248             val = i_nicp[0][constants.NIC_MODE]
5249           else:
5250             val = None
5251         elif field == "nic_link":
5252           if instance.nics:
5253             val = i_nicp[0][constants.NIC_LINK]
5254           else:
5255             val = None
5256         elif field == "bridge":
5257           if (instance.nics and
5258               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5259             val = i_nicp[0][constants.NIC_LINK]
5260           else:
5261             val = None
5262         elif field == "mac":
5263           if instance.nics:
5264             val = instance.nics[0].mac
5265           else:
5266             val = None
5267         elif field == "sda_size" or field == "sdb_size":
5268           idx = ord(field[2]) - ord('a')
5269           try:
5270             val = instance.FindDisk(idx).size
5271           except errors.OpPrereqError:
5272             val = None
5273         elif field == "disk_usage": # total disk usage per node
5274           disk_sizes = [{'size': disk.size} for disk in instance.disks]
5275           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5276         elif field == "tags":
5277           val = list(instance.GetTags())
5278         elif field == "hvparams":
5279           val = i_hv
5280         elif (field.startswith(HVPREFIX) and
5281               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5282               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5283           val = i_hv.get(field[len(HVPREFIX):], None)
5284         elif field == "beparams":
5285           val = i_be
5286         elif (field.startswith(BEPREFIX) and
5287               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5288           val = i_be.get(field[len(BEPREFIX):], None)
5289         elif st_match and st_match.groups():
5290           # matches a variable list
5291           st_groups = st_match.groups()
5292           if st_groups and st_groups[0] == "disk":
5293             if st_groups[1] == "count":
5294               val = len(instance.disks)
5295             elif st_groups[1] == "sizes":
5296               val = [disk.size for disk in instance.disks]
5297             elif st_groups[1] == "size":
5298               try:
5299                 val = instance.FindDisk(st_groups[2]).size
5300               except errors.OpPrereqError:
5301                 val = None
5302             else:
5303               assert False, "Unhandled disk parameter"
5304           elif st_groups[0] == "nic":
5305             if st_groups[1] == "count":
5306               val = len(instance.nics)
5307             elif st_groups[1] == "macs":
5308               val = [nic.mac for nic in instance.nics]
5309             elif st_groups[1] == "ips":
5310               val = [nic.ip for nic in instance.nics]
5311             elif st_groups[1] == "modes":
5312               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5313             elif st_groups[1] == "links":
5314               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5315             elif st_groups[1] == "bridges":
5316               val = []
5317               for nicp in i_nicp:
5318                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5319                   val.append(nicp[constants.NIC_LINK])
5320                 else:
5321                   val.append(None)
5322             else:
5323               # index-based item
5324               nic_idx = int(st_groups[2])
5325               if nic_idx >= len(instance.nics):
5326                 val = None
5327               else:
5328                 if st_groups[1] == "mac":
5329                   val = instance.nics[nic_idx].mac
5330                 elif st_groups[1] == "ip":
5331                   val = instance.nics[nic_idx].ip
5332                 elif st_groups[1] == "mode":
5333                   val = i_nicp[nic_idx][constants.NIC_MODE]
5334                 elif st_groups[1] == "link":
5335                   val = i_nicp[nic_idx][constants.NIC_LINK]
5336                 elif st_groups[1] == "bridge":
5337                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5338                   if nic_mode == constants.NIC_MODE_BRIDGED:
5339                     val = i_nicp[nic_idx][constants.NIC_LINK]
5340                   else:
5341                     val = None
5342                 else:
5343                   assert False, "Unhandled NIC parameter"
5344           else:
5345             assert False, ("Declared but unhandled variable parameter '%s'" %
5346                            field)
5347         else:
5348           assert False, "Declared but unhandled parameter '%s'" % field
5349         iout.append(val)
5350       output.append(iout)
5351
5352     return output
5353
5354
5355 class LUFailoverInstance(LogicalUnit):
5356   """Failover an instance.
5357
5358   """
5359   HPATH = "instance-failover"
5360   HTYPE = constants.HTYPE_INSTANCE
5361   _OP_PARAMS = [
5362     _PInstanceName,
5363     ("ignore_consistency", False, _TBool),
5364     _PShutdownTimeout,
5365     ]
5366   REQ_BGL = False
5367
5368   def ExpandNames(self):
5369     self._ExpandAndLockInstance()
5370     self.needed_locks[locking.LEVEL_NODE] = []
5371     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5372
5373   def DeclareLocks(self, level):
5374     if level == locking.LEVEL_NODE:
5375       self._LockInstancesNodes()
5376
5377   def BuildHooksEnv(self):
5378     """Build hooks env.
5379
5380     This runs on master, primary and secondary nodes of the instance.
5381
5382     """
5383     instance = self.instance
5384     source_node = instance.primary_node
5385     target_node = instance.secondary_nodes[0]
5386     env = {
5387       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5388       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5389       "OLD_PRIMARY": source_node,
5390       "OLD_SECONDARY": target_node,
5391       "NEW_PRIMARY": target_node,
5392       "NEW_SECONDARY": source_node,
5393       }
5394     env.update(_BuildInstanceHookEnvByObject(self, instance))
5395     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5396     nl_post = list(nl)
5397     nl_post.append(source_node)
5398     return env, nl, nl_post
5399
5400   def CheckPrereq(self):
5401     """Check prerequisites.
5402
5403     This checks that the instance is in the cluster.
5404
5405     """
5406     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5407     assert self.instance is not None, \
5408       "Cannot retrieve locked instance %s" % self.op.instance_name
5409
5410     bep = self.cfg.GetClusterInfo().FillBE(instance)
5411     if instance.disk_template not in constants.DTS_NET_MIRROR:
5412       raise errors.OpPrereqError("Instance's disk layout is not"
5413                                  " network mirrored, cannot failover.",
5414                                  errors.ECODE_STATE)
5415
5416     secondary_nodes = instance.secondary_nodes
5417     if not secondary_nodes:
5418       raise errors.ProgrammerError("no secondary node but using "
5419                                    "a mirrored disk template")
5420
5421     target_node = secondary_nodes[0]
5422     _CheckNodeOnline(self, target_node)
5423     _CheckNodeNotDrained(self, target_node)
5424     if instance.admin_up:
5425       # check memory requirements on the secondary node
5426       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5427                            instance.name, bep[constants.BE_MEMORY],
5428                            instance.hypervisor)
5429     else:
5430       self.LogInfo("Not checking memory on the secondary node as"
5431                    " instance will not be started")
5432
5433     # check bridge existance
5434     _CheckInstanceBridgesExist(self, instance, node=target_node)
5435
5436   def Exec(self, feedback_fn):
5437     """Failover an instance.
5438
5439     The failover is done by shutting it down on its present node and
5440     starting it on the secondary.
5441
5442     """
5443     instance = self.instance
5444
5445     source_node = instance.primary_node
5446     target_node = instance.secondary_nodes[0]
5447
5448     if instance.admin_up:
5449       feedback_fn("* checking disk consistency between source and target")
5450       for dev in instance.disks:
5451         # for drbd, these are drbd over lvm
5452         if not _CheckDiskConsistency(self, dev, target_node, False):
5453           if not self.op.ignore_consistency:
5454             raise errors.OpExecError("Disk %s is degraded on target node,"
5455                                      " aborting failover." % dev.iv_name)
5456     else:
5457       feedback_fn("* not checking disk consistency as instance is not running")
5458
5459     feedback_fn("* shutting down instance on source node")
5460     logging.info("Shutting down instance %s on node %s",
5461                  instance.name, source_node)
5462
5463     result = self.rpc.call_instance_shutdown(source_node, instance,
5464                                              self.op.shutdown_timeout)
5465     msg = result.fail_msg
5466     if msg:
5467       if self.op.ignore_consistency:
5468         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5469                              " Proceeding anyway. Please make sure node"
5470                              " %s is down. Error details: %s",
5471                              instance.name, source_node, source_node, msg)
5472       else:
5473         raise errors.OpExecError("Could not shutdown instance %s on"
5474                                  " node %s: %s" %
5475                                  (instance.name, source_node, msg))
5476
5477     feedback_fn("* deactivating the instance's disks on source node")
5478     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5479       raise errors.OpExecError("Can't shut down the instance's disks.")
5480
5481     instance.primary_node = target_node
5482     # distribute new instance config to the other nodes
5483     self.cfg.Update(instance, feedback_fn)
5484
5485     # Only start the instance if it's marked as up
5486     if instance.admin_up:
5487       feedback_fn("* activating the instance's disks on target node")
5488       logging.info("Starting instance %s on node %s",
5489                    instance.name, target_node)
5490
5491       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5492                                            ignore_secondaries=True)
5493       if not disks_ok:
5494         _ShutdownInstanceDisks(self, instance)
5495         raise errors.OpExecError("Can't activate the instance's disks")
5496
5497       feedback_fn("* starting the instance on the target node")
5498       result = self.rpc.call_instance_start(target_node, instance, None, None)
5499       msg = result.fail_msg
5500       if msg:
5501         _ShutdownInstanceDisks(self, instance)
5502         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5503                                  (instance.name, target_node, msg))
5504
5505
5506 class LUMigrateInstance(LogicalUnit):
5507   """Migrate an instance.
5508
5509   This is migration without shutting down, compared to the failover,
5510   which is done with shutdown.
5511
5512   """
5513   HPATH = "instance-migrate"
5514   HTYPE = constants.HTYPE_INSTANCE
5515   _OP_PARAMS = [
5516     _PInstanceName,
5517     _PMigrationMode,
5518     _PMigrationLive,
5519     ("cleanup", False, _TBool),
5520     ]
5521
5522   REQ_BGL = False
5523
5524   def ExpandNames(self):
5525     self._ExpandAndLockInstance()
5526
5527     self.needed_locks[locking.LEVEL_NODE] = []
5528     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5529
5530     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5531                                        self.op.cleanup)
5532     self.tasklets = [self._migrater]
5533
5534   def DeclareLocks(self, level):
5535     if level == locking.LEVEL_NODE:
5536       self._LockInstancesNodes()
5537
5538   def BuildHooksEnv(self):
5539     """Build hooks env.
5540
5541     This runs on master, primary and secondary nodes of the instance.
5542
5543     """
5544     instance = self._migrater.instance
5545     source_node = instance.primary_node
5546     target_node = instance.secondary_nodes[0]
5547     env = _BuildInstanceHookEnvByObject(self, instance)
5548     env["MIGRATE_LIVE"] = self._migrater.live
5549     env["MIGRATE_CLEANUP"] = self.op.cleanup
5550     env.update({
5551         "OLD_PRIMARY": source_node,
5552         "OLD_SECONDARY": target_node,
5553         "NEW_PRIMARY": target_node,
5554         "NEW_SECONDARY": source_node,
5555         })
5556     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5557     nl_post = list(nl)
5558     nl_post.append(source_node)
5559     return env, nl, nl_post
5560
5561
5562 class LUMoveInstance(LogicalUnit):
5563   """Move an instance by data-copying.
5564
5565   """
5566   HPATH = "instance-move"
5567   HTYPE = constants.HTYPE_INSTANCE
5568   _OP_PARAMS = [
5569     _PInstanceName,
5570     ("target_node", _NoDefault, _TNonEmptyString),
5571     _PShutdownTimeout,
5572     ]
5573   REQ_BGL = False
5574
5575   def ExpandNames(self):
5576     self._ExpandAndLockInstance()
5577     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5578     self.op.target_node = target_node
5579     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5580     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5581
5582   def DeclareLocks(self, level):
5583     if level == locking.LEVEL_NODE:
5584       self._LockInstancesNodes(primary_only=True)
5585
5586   def BuildHooksEnv(self):
5587     """Build hooks env.
5588
5589     This runs on master, primary and secondary nodes of the instance.
5590
5591     """
5592     env = {
5593       "TARGET_NODE": self.op.target_node,
5594       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5595       }
5596     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5597     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5598                                        self.op.target_node]
5599     return env, nl, nl
5600
5601   def CheckPrereq(self):
5602     """Check prerequisites.
5603
5604     This checks that the instance is in the cluster.
5605
5606     """
5607     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5608     assert self.instance is not None, \
5609       "Cannot retrieve locked instance %s" % self.op.instance_name
5610
5611     node = self.cfg.GetNodeInfo(self.op.target_node)
5612     assert node is not None, \
5613       "Cannot retrieve locked node %s" % self.op.target_node
5614
5615     self.target_node = target_node = node.name
5616
5617     if target_node == instance.primary_node:
5618       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5619                                  (instance.name, target_node),
5620                                  errors.ECODE_STATE)
5621
5622     bep = self.cfg.GetClusterInfo().FillBE(instance)
5623
5624     for idx, dsk in enumerate(instance.disks):
5625       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5626         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5627                                    " cannot copy" % idx, errors.ECODE_STATE)
5628
5629     _CheckNodeOnline(self, target_node)
5630     _CheckNodeNotDrained(self, target_node)
5631
5632     if instance.admin_up:
5633       # check memory requirements on the secondary node
5634       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5635                            instance.name, bep[constants.BE_MEMORY],
5636                            instance.hypervisor)
5637     else:
5638       self.LogInfo("Not checking memory on the secondary node as"
5639                    " instance will not be started")
5640
5641     # check bridge existance
5642     _CheckInstanceBridgesExist(self, instance, node=target_node)
5643
5644   def Exec(self, feedback_fn):
5645     """Move an instance.
5646
5647     The move is done by shutting it down on its present node, copying
5648     the data over (slow) and starting it on the new node.
5649
5650     """
5651     instance = self.instance
5652
5653     source_node = instance.primary_node
5654     target_node = self.target_node
5655
5656     self.LogInfo("Shutting down instance %s on source node %s",
5657                  instance.name, source_node)
5658
5659     result = self.rpc.call_instance_shutdown(source_node, instance,
5660                                              self.op.shutdown_timeout)
5661     msg = result.fail_msg
5662     if msg:
5663       if self.op.ignore_consistency:
5664         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5665                              " Proceeding anyway. Please make sure node"
5666                              " %s is down. Error details: %s",
5667                              instance.name, source_node, source_node, msg)
5668       else:
5669         raise errors.OpExecError("Could not shutdown instance %s on"
5670                                  " node %s: %s" %
5671                                  (instance.name, source_node, msg))
5672
5673     # create the target disks
5674     try:
5675       _CreateDisks(self, instance, target_node=target_node)
5676     except errors.OpExecError:
5677       self.LogWarning("Device creation failed, reverting...")
5678       try:
5679         _RemoveDisks(self, instance, target_node=target_node)
5680       finally:
5681         self.cfg.ReleaseDRBDMinors(instance.name)
5682         raise
5683
5684     cluster_name = self.cfg.GetClusterInfo().cluster_name
5685
5686     errs = []
5687     # activate, get path, copy the data over
5688     for idx, disk in enumerate(instance.disks):
5689       self.LogInfo("Copying data for disk %d", idx)
5690       result = self.rpc.call_blockdev_assemble(target_node, disk,
5691                                                instance.name, True)
5692       if result.fail_msg:
5693         self.LogWarning("Can't assemble newly created disk %d: %s",
5694                         idx, result.fail_msg)
5695         errs.append(result.fail_msg)
5696         break
5697       dev_path = result.payload
5698       result = self.rpc.call_blockdev_export(source_node, disk,
5699                                              target_node, dev_path,
5700                                              cluster_name)
5701       if result.fail_msg:
5702         self.LogWarning("Can't copy data over for disk %d: %s",
5703                         idx, result.fail_msg)
5704         errs.append(result.fail_msg)
5705         break
5706
5707     if errs:
5708       self.LogWarning("Some disks failed to copy, aborting")
5709       try:
5710         _RemoveDisks(self, instance, target_node=target_node)
5711       finally:
5712         self.cfg.ReleaseDRBDMinors(instance.name)
5713         raise errors.OpExecError("Errors during disk copy: %s" %
5714                                  (",".join(errs),))
5715
5716     instance.primary_node = target_node
5717     self.cfg.Update(instance, feedback_fn)
5718
5719     self.LogInfo("Removing the disks on the original node")
5720     _RemoveDisks(self, instance, target_node=source_node)
5721
5722     # Only start the instance if it's marked as up
5723     if instance.admin_up:
5724       self.LogInfo("Starting instance %s on node %s",
5725                    instance.name, target_node)
5726
5727       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5728                                            ignore_secondaries=True)
5729       if not disks_ok:
5730         _ShutdownInstanceDisks(self, instance)
5731         raise errors.OpExecError("Can't activate the instance's disks")
5732
5733       result = self.rpc.call_instance_start(target_node, instance, None, None)
5734       msg = result.fail_msg
5735       if msg:
5736         _ShutdownInstanceDisks(self, instance)
5737         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5738                                  (instance.name, target_node, msg))
5739
5740
5741 class LUMigrateNode(LogicalUnit):
5742   """Migrate all instances from a node.
5743
5744   """
5745   HPATH = "node-migrate"
5746   HTYPE = constants.HTYPE_NODE
5747   _OP_PARAMS = [
5748     _PNodeName,
5749     _PMigrationMode,
5750     _PMigrationLive,
5751     ]
5752   REQ_BGL = False
5753
5754   def ExpandNames(self):
5755     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5756
5757     self.needed_locks = {
5758       locking.LEVEL_NODE: [self.op.node_name],
5759       }
5760
5761     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5762
5763     # Create tasklets for migrating instances for all instances on this node
5764     names = []
5765     tasklets = []
5766
5767     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5768       logging.debug("Migrating instance %s", inst.name)
5769       names.append(inst.name)
5770
5771       tasklets.append(TLMigrateInstance(self, inst.name, False))
5772
5773     self.tasklets = tasklets
5774
5775     # Declare instance locks
5776     self.needed_locks[locking.LEVEL_INSTANCE] = names
5777
5778   def DeclareLocks(self, level):
5779     if level == locking.LEVEL_NODE:
5780       self._LockInstancesNodes()
5781
5782   def BuildHooksEnv(self):
5783     """Build hooks env.
5784
5785     This runs on the master, the primary and all the secondaries.
5786
5787     """
5788     env = {
5789       "NODE_NAME": self.op.node_name,
5790       }
5791
5792     nl = [self.cfg.GetMasterNode()]
5793
5794     return (env, nl, nl)
5795
5796
5797 class TLMigrateInstance(Tasklet):
5798   """Tasklet class for instance migration.
5799
5800   @type live: boolean
5801   @ivar live: whether the migration will be done live or non-live;
5802       this variable is initalized only after CheckPrereq has run
5803
5804   """
5805   def __init__(self, lu, instance_name, cleanup):
5806     """Initializes this class.
5807
5808     """
5809     Tasklet.__init__(self, lu)
5810
5811     # Parameters
5812     self.instance_name = instance_name
5813     self.cleanup = cleanup
5814     self.live = False # will be overridden later
5815
5816   def CheckPrereq(self):
5817     """Check prerequisites.
5818
5819     This checks that the instance is in the cluster.
5820
5821     """
5822     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5823     instance = self.cfg.GetInstanceInfo(instance_name)
5824     assert instance is not None
5825
5826     if instance.disk_template != constants.DT_DRBD8:
5827       raise errors.OpPrereqError("Instance's disk layout is not"
5828                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5829
5830     secondary_nodes = instance.secondary_nodes
5831     if not secondary_nodes:
5832       raise errors.ConfigurationError("No secondary node but using"
5833                                       " drbd8 disk template")
5834
5835     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5836
5837     target_node = secondary_nodes[0]
5838     # check memory requirements on the secondary node
5839     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5840                          instance.name, i_be[constants.BE_MEMORY],
5841                          instance.hypervisor)
5842
5843     # check bridge existance
5844     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5845
5846     if not self.cleanup:
5847       _CheckNodeNotDrained(self.lu, target_node)
5848       result = self.rpc.call_instance_migratable(instance.primary_node,
5849                                                  instance)
5850       result.Raise("Can't migrate, please use failover",
5851                    prereq=True, ecode=errors.ECODE_STATE)
5852
5853     self.instance = instance
5854
5855     if self.lu.op.live is not None and self.lu.op.mode is not None:
5856       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5857                                  " parameters are accepted",
5858                                  errors.ECODE_INVAL)
5859     if self.lu.op.live is not None:
5860       if self.lu.op.live:
5861         self.lu.op.mode = constants.HT_MIGRATION_LIVE
5862       else:
5863         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5864       # reset the 'live' parameter to None so that repeated
5865       # invocations of CheckPrereq do not raise an exception
5866       self.lu.op.live = None
5867     elif self.lu.op.mode is None:
5868       # read the default value from the hypervisor
5869       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5870       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5871
5872     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5873
5874   def _WaitUntilSync(self):
5875     """Poll with custom rpc for disk sync.
5876
5877     This uses our own step-based rpc call.
5878
5879     """
5880     self.feedback_fn("* wait until resync is done")
5881     all_done = False
5882     while not all_done:
5883       all_done = True
5884       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5885                                             self.nodes_ip,
5886                                             self.instance.disks)
5887       min_percent = 100
5888       for node, nres in result.items():
5889         nres.Raise("Cannot resync disks on node %s" % node)
5890         node_done, node_percent = nres.payload
5891         all_done = all_done and node_done
5892         if node_percent is not None:
5893           min_percent = min(min_percent, node_percent)
5894       if not all_done:
5895         if min_percent < 100:
5896           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5897         time.sleep(2)
5898
5899   def _EnsureSecondary(self, node):
5900     """Demote a node to secondary.
5901
5902     """
5903     self.feedback_fn("* switching node %s to secondary mode" % node)
5904
5905     for dev in self.instance.disks:
5906       self.cfg.SetDiskID(dev, node)
5907
5908     result = self.rpc.call_blockdev_close(node, self.instance.name,
5909                                           self.instance.disks)
5910     result.Raise("Cannot change disk to secondary on node %s" % node)
5911
5912   def _GoStandalone(self):
5913     """Disconnect from the network.
5914
5915     """
5916     self.feedback_fn("* changing into standalone mode")
5917     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5918                                                self.instance.disks)
5919     for node, nres in result.items():
5920       nres.Raise("Cannot disconnect disks node %s" % node)
5921
5922   def _GoReconnect(self, multimaster):
5923     """Reconnect to the network.
5924
5925     """
5926     if multimaster:
5927       msg = "dual-master"
5928     else:
5929       msg = "single-master"
5930     self.feedback_fn("* changing disks into %s mode" % msg)
5931     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5932                                            self.instance.disks,
5933                                            self.instance.name, multimaster)
5934     for node, nres in result.items():
5935       nres.Raise("Cannot change disks config on node %s" % node)
5936
5937   def _ExecCleanup(self):
5938     """Try to cleanup after a failed migration.
5939
5940     The cleanup is done by:
5941       - check that the instance is running only on one node
5942         (and update the config if needed)
5943       - change disks on its secondary node to secondary
5944       - wait until disks are fully synchronized
5945       - disconnect from the network
5946       - change disks into single-master mode
5947       - wait again until disks are fully synchronized
5948
5949     """
5950     instance = self.instance
5951     target_node = self.target_node
5952     source_node = self.source_node
5953
5954     # check running on only one node
5955     self.feedback_fn("* checking where the instance actually runs"
5956                      " (if this hangs, the hypervisor might be in"
5957                      " a bad state)")
5958     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5959     for node, result in ins_l.items():
5960       result.Raise("Can't contact node %s" % node)
5961
5962     runningon_source = instance.name in ins_l[source_node].payload
5963     runningon_target = instance.name in ins_l[target_node].payload
5964
5965     if runningon_source and runningon_target:
5966       raise errors.OpExecError("Instance seems to be running on two nodes,"
5967                                " or the hypervisor is confused. You will have"
5968                                " to ensure manually that it runs only on one"
5969                                " and restart this operation.")
5970
5971     if not (runningon_source or runningon_target):
5972       raise errors.OpExecError("Instance does not seem to be running at all."
5973                                " In this case, it's safer to repair by"
5974                                " running 'gnt-instance stop' to ensure disk"
5975                                " shutdown, and then restarting it.")
5976
5977     if runningon_target:
5978       # the migration has actually succeeded, we need to update the config
5979       self.feedback_fn("* instance running on secondary node (%s),"
5980                        " updating config" % target_node)
5981       instance.primary_node = target_node
5982       self.cfg.Update(instance, self.feedback_fn)
5983       demoted_node = source_node
5984     else:
5985       self.feedback_fn("* instance confirmed to be running on its"
5986                        " primary node (%s)" % source_node)
5987       demoted_node = target_node
5988
5989     self._EnsureSecondary(demoted_node)
5990     try:
5991       self._WaitUntilSync()
5992     except errors.OpExecError:
5993       # we ignore here errors, since if the device is standalone, it
5994       # won't be able to sync
5995       pass
5996     self._GoStandalone()
5997     self._GoReconnect(False)
5998     self._WaitUntilSync()
5999
6000     self.feedback_fn("* done")
6001
6002   def _RevertDiskStatus(self):
6003     """Try to revert the disk status after a failed migration.
6004
6005     """
6006     target_node = self.target_node
6007     try:
6008       self._EnsureSecondary(target_node)
6009       self._GoStandalone()
6010       self._GoReconnect(False)
6011       self._WaitUntilSync()
6012     except errors.OpExecError, err:
6013       self.lu.LogWarning("Migration failed and I can't reconnect the"
6014                          " drives: error '%s'\n"
6015                          "Please look and recover the instance status" %
6016                          str(err))
6017
6018   def _AbortMigration(self):
6019     """Call the hypervisor code to abort a started migration.
6020
6021     """
6022     instance = self.instance
6023     target_node = self.target_node
6024     migration_info = self.migration_info
6025
6026     abort_result = self.rpc.call_finalize_migration(target_node,
6027                                                     instance,
6028                                                     migration_info,
6029                                                     False)
6030     abort_msg = abort_result.fail_msg
6031     if abort_msg:
6032       logging.error("Aborting migration failed on target node %s: %s",
6033                     target_node, abort_msg)
6034       # Don't raise an exception here, as we stil have to try to revert the
6035       # disk status, even if this step failed.
6036
6037   def _ExecMigration(self):
6038     """Migrate an instance.
6039
6040     The migrate is done by:
6041       - change the disks into dual-master mode
6042       - wait until disks are fully synchronized again
6043       - migrate the instance
6044       - change disks on the new secondary node (the old primary) to secondary
6045       - wait until disks are fully synchronized
6046       - change disks into single-master mode
6047
6048     """
6049     instance = self.instance
6050     target_node = self.target_node
6051     source_node = self.source_node
6052
6053     self.feedback_fn("* checking disk consistency between source and target")
6054     for dev in instance.disks:
6055       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6056         raise errors.OpExecError("Disk %s is degraded or not fully"
6057                                  " synchronized on target node,"
6058                                  " aborting migrate." % dev.iv_name)
6059
6060     # First get the migration information from the remote node
6061     result = self.rpc.call_migration_info(source_node, instance)
6062     msg = result.fail_msg
6063     if msg:
6064       log_err = ("Failed fetching source migration information from %s: %s" %
6065                  (source_node, msg))
6066       logging.error(log_err)
6067       raise errors.OpExecError(log_err)
6068
6069     self.migration_info = migration_info = result.payload
6070
6071     # Then switch the disks to master/master mode
6072     self._EnsureSecondary(target_node)
6073     self._GoStandalone()
6074     self._GoReconnect(True)
6075     self._WaitUntilSync()
6076
6077     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6078     result = self.rpc.call_accept_instance(target_node,
6079                                            instance,
6080                                            migration_info,
6081                                            self.nodes_ip[target_node])
6082
6083     msg = result.fail_msg
6084     if msg:
6085       logging.error("Instance pre-migration failed, trying to revert"
6086                     " disk status: %s", msg)
6087       self.feedback_fn("Pre-migration failed, aborting")
6088       self._AbortMigration()
6089       self._RevertDiskStatus()
6090       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6091                                (instance.name, msg))
6092
6093     self.feedback_fn("* migrating instance to %s" % target_node)
6094     time.sleep(10)
6095     result = self.rpc.call_instance_migrate(source_node, instance,
6096                                             self.nodes_ip[target_node],
6097                                             self.live)
6098     msg = result.fail_msg
6099     if msg:
6100       logging.error("Instance migration failed, trying to revert"
6101                     " disk status: %s", msg)
6102       self.feedback_fn("Migration failed, aborting")
6103       self._AbortMigration()
6104       self._RevertDiskStatus()
6105       raise errors.OpExecError("Could not migrate instance %s: %s" %
6106                                (instance.name, msg))
6107     time.sleep(10)
6108
6109     instance.primary_node = target_node
6110     # distribute new instance config to the other nodes
6111     self.cfg.Update(instance, self.feedback_fn)
6112
6113     result = self.rpc.call_finalize_migration(target_node,
6114                                               instance,
6115                                               migration_info,
6116                                               True)
6117     msg = result.fail_msg
6118     if msg:
6119       logging.error("Instance migration succeeded, but finalization failed:"
6120                     " %s", msg)
6121       raise errors.OpExecError("Could not finalize instance migration: %s" %
6122                                msg)
6123
6124     self._EnsureSecondary(source_node)
6125     self._WaitUntilSync()
6126     self._GoStandalone()
6127     self._GoReconnect(False)
6128     self._WaitUntilSync()
6129
6130     self.feedback_fn("* done")
6131
6132   def Exec(self, feedback_fn):
6133     """Perform the migration.
6134
6135     """
6136     feedback_fn("Migrating instance %s" % self.instance.name)
6137
6138     self.feedback_fn = feedback_fn
6139
6140     self.source_node = self.instance.primary_node
6141     self.target_node = self.instance.secondary_nodes[0]
6142     self.all_nodes = [self.source_node, self.target_node]
6143     self.nodes_ip = {
6144       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6145       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6146       }
6147
6148     if self.cleanup:
6149       return self._ExecCleanup()
6150     else:
6151       return self._ExecMigration()
6152
6153
6154 def _CreateBlockDev(lu, node, instance, device, force_create,
6155                     info, force_open):
6156   """Create a tree of block devices on a given node.
6157
6158   If this device type has to be created on secondaries, create it and
6159   all its children.
6160
6161   If not, just recurse to children keeping the same 'force' value.
6162
6163   @param lu: the lu on whose behalf we execute
6164   @param node: the node on which to create the device
6165   @type instance: L{objects.Instance}
6166   @param instance: the instance which owns the device
6167   @type device: L{objects.Disk}
6168   @param device: the device to create
6169   @type force_create: boolean
6170   @param force_create: whether to force creation of this device; this
6171       will be change to True whenever we find a device which has
6172       CreateOnSecondary() attribute
6173   @param info: the extra 'metadata' we should attach to the device
6174       (this will be represented as a LVM tag)
6175   @type force_open: boolean
6176   @param force_open: this parameter will be passes to the
6177       L{backend.BlockdevCreate} function where it specifies
6178       whether we run on primary or not, and it affects both
6179       the child assembly and the device own Open() execution
6180
6181   """
6182   if device.CreateOnSecondary():
6183     force_create = True
6184
6185   if device.children:
6186     for child in device.children:
6187       _CreateBlockDev(lu, node, instance, child, force_create,
6188                       info, force_open)
6189
6190   if not force_create:
6191     return
6192
6193   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6194
6195
6196 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6197   """Create a single block device on a given node.
6198
6199   This will not recurse over children of the device, so they must be
6200   created in advance.
6201
6202   @param lu: the lu on whose behalf we execute
6203   @param node: the node on which to create the device
6204   @type instance: L{objects.Instance}
6205   @param instance: the instance which owns the device
6206   @type device: L{objects.Disk}
6207   @param device: the device to create
6208   @param info: the extra 'metadata' we should attach to the device
6209       (this will be represented as a LVM tag)
6210   @type force_open: boolean
6211   @param force_open: this parameter will be passes to the
6212       L{backend.BlockdevCreate} function where it specifies
6213       whether we run on primary or not, and it affects both
6214       the child assembly and the device own Open() execution
6215
6216   """
6217   lu.cfg.SetDiskID(device, node)
6218   result = lu.rpc.call_blockdev_create(node, device, device.size,
6219                                        instance.name, force_open, info)
6220   result.Raise("Can't create block device %s on"
6221                " node %s for instance %s" % (device, node, instance.name))
6222   if device.physical_id is None:
6223     device.physical_id = result.payload
6224
6225
6226 def _GenerateUniqueNames(lu, exts):
6227   """Generate a suitable LV name.
6228
6229   This will generate a logical volume name for the given instance.
6230
6231   """
6232   results = []
6233   for val in exts:
6234     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6235     results.append("%s%s" % (new_id, val))
6236   return results
6237
6238
6239 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6240                          p_minor, s_minor):
6241   """Generate a drbd8 device complete with its children.
6242
6243   """
6244   port = lu.cfg.AllocatePort()
6245   vgname = lu.cfg.GetVGName()
6246   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6247   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6248                           logical_id=(vgname, names[0]))
6249   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6250                           logical_id=(vgname, names[1]))
6251   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6252                           logical_id=(primary, secondary, port,
6253                                       p_minor, s_minor,
6254                                       shared_secret),
6255                           children=[dev_data, dev_meta],
6256                           iv_name=iv_name)
6257   return drbd_dev
6258
6259
6260 def _GenerateDiskTemplate(lu, template_name,
6261                           instance_name, primary_node,
6262                           secondary_nodes, disk_info,
6263                           file_storage_dir, file_driver,
6264                           base_index):
6265   """Generate the entire disk layout for a given template type.
6266
6267   """
6268   #TODO: compute space requirements
6269
6270   vgname = lu.cfg.GetVGName()
6271   disk_count = len(disk_info)
6272   disks = []
6273   if template_name == constants.DT_DISKLESS:
6274     pass
6275   elif template_name == constants.DT_PLAIN:
6276     if len(secondary_nodes) != 0:
6277       raise errors.ProgrammerError("Wrong template configuration")
6278
6279     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6280                                       for i in range(disk_count)])
6281     for idx, disk in enumerate(disk_info):
6282       disk_index = idx + base_index
6283       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6284                               logical_id=(vgname, names[idx]),
6285                               iv_name="disk/%d" % disk_index,
6286                               mode=disk["mode"])
6287       disks.append(disk_dev)
6288   elif template_name == constants.DT_DRBD8:
6289     if len(secondary_nodes) != 1:
6290       raise errors.ProgrammerError("Wrong template configuration")
6291     remote_node = secondary_nodes[0]
6292     minors = lu.cfg.AllocateDRBDMinor(
6293       [primary_node, remote_node] * len(disk_info), instance_name)
6294
6295     names = []
6296     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6297                                                for i in range(disk_count)]):
6298       names.append(lv_prefix + "_data")
6299       names.append(lv_prefix + "_meta")
6300     for idx, disk in enumerate(disk_info):
6301       disk_index = idx + base_index
6302       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6303                                       disk["size"], names[idx*2:idx*2+2],
6304                                       "disk/%d" % disk_index,
6305                                       minors[idx*2], minors[idx*2+1])
6306       disk_dev.mode = disk["mode"]
6307       disks.append(disk_dev)
6308   elif template_name == constants.DT_FILE:
6309     if len(secondary_nodes) != 0:
6310       raise errors.ProgrammerError("Wrong template configuration")
6311
6312     _RequireFileStorage()
6313
6314     for idx, disk in enumerate(disk_info):
6315       disk_index = idx + base_index
6316       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6317                               iv_name="disk/%d" % disk_index,
6318                               logical_id=(file_driver,
6319                                           "%s/disk%d" % (file_storage_dir,
6320                                                          disk_index)),
6321                               mode=disk["mode"])
6322       disks.append(disk_dev)
6323   else:
6324     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6325   return disks
6326
6327
6328 def _GetInstanceInfoText(instance):
6329   """Compute that text that should be added to the disk's metadata.
6330
6331   """
6332   return "originstname+%s" % instance.name
6333
6334
6335 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6336   """Create all disks for an instance.
6337
6338   This abstracts away some work from AddInstance.
6339
6340   @type lu: L{LogicalUnit}
6341   @param lu: the logical unit on whose behalf we execute
6342   @type instance: L{objects.Instance}
6343   @param instance: the instance whose disks we should create
6344   @type to_skip: list
6345   @param to_skip: list of indices to skip
6346   @type target_node: string
6347   @param target_node: if passed, overrides the target node for creation
6348   @rtype: boolean
6349   @return: the success of the creation
6350
6351   """
6352   info = _GetInstanceInfoText(instance)
6353   if target_node is None:
6354     pnode = instance.primary_node
6355     all_nodes = instance.all_nodes
6356   else:
6357     pnode = target_node
6358     all_nodes = [pnode]
6359
6360   if instance.disk_template == constants.DT_FILE:
6361     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6362     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6363
6364     result.Raise("Failed to create directory '%s' on"
6365                  " node %s" % (file_storage_dir, pnode))
6366
6367   # Note: this needs to be kept in sync with adding of disks in
6368   # LUSetInstanceParams
6369   for idx, device in enumerate(instance.disks):
6370     if to_skip and idx in to_skip:
6371       continue
6372     logging.info("Creating volume %s for instance %s",
6373                  device.iv_name, instance.name)
6374     #HARDCODE
6375     for node in all_nodes:
6376       f_create = node == pnode
6377       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6378
6379
6380 def _RemoveDisks(lu, instance, target_node=None):
6381   """Remove all disks for an instance.
6382
6383   This abstracts away some work from `AddInstance()` and
6384   `RemoveInstance()`. Note that in case some of the devices couldn't
6385   be removed, the removal will continue with the other ones (compare
6386   with `_CreateDisks()`).
6387
6388   @type lu: L{LogicalUnit}
6389   @param lu: the logical unit on whose behalf we execute
6390   @type instance: L{objects.Instance}
6391   @param instance: the instance whose disks we should remove
6392   @type target_node: string
6393   @param target_node: used to override the node on which to remove the disks
6394   @rtype: boolean
6395   @return: the success of the removal
6396
6397   """
6398   logging.info("Removing block devices for instance %s", instance.name)
6399
6400   all_result = True
6401   for device in instance.disks:
6402     if target_node:
6403       edata = [(target_node, device)]
6404     else:
6405       edata = device.ComputeNodeTree(instance.primary_node)
6406     for node, disk in edata:
6407       lu.cfg.SetDiskID(disk, node)
6408       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6409       if msg:
6410         lu.LogWarning("Could not remove block device %s on node %s,"
6411                       " continuing anyway: %s", device.iv_name, node, msg)
6412         all_result = False
6413
6414   if instance.disk_template == constants.DT_FILE:
6415     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6416     if target_node:
6417       tgt = target_node
6418     else:
6419       tgt = instance.primary_node
6420     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6421     if result.fail_msg:
6422       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6423                     file_storage_dir, instance.primary_node, result.fail_msg)
6424       all_result = False
6425
6426   return all_result
6427
6428
6429 def _ComputeDiskSize(disk_template, disks):
6430   """Compute disk size requirements in the volume group
6431
6432   """
6433   # Required free disk space as a function of disk and swap space
6434   req_size_dict = {
6435     constants.DT_DISKLESS: None,
6436     constants.DT_PLAIN: sum(d["size"] for d in disks),
6437     # 128 MB are added for drbd metadata for each disk
6438     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6439     constants.DT_FILE: None,
6440   }
6441
6442   if disk_template not in req_size_dict:
6443     raise errors.ProgrammerError("Disk template '%s' size requirement"
6444                                  " is unknown" %  disk_template)
6445
6446   return req_size_dict[disk_template]
6447
6448
6449 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6450   """Hypervisor parameter validation.
6451
6452   This function abstract the hypervisor parameter validation to be
6453   used in both instance create and instance modify.
6454
6455   @type lu: L{LogicalUnit}
6456   @param lu: the logical unit for which we check
6457   @type nodenames: list
6458   @param nodenames: the list of nodes on which we should check
6459   @type hvname: string
6460   @param hvname: the name of the hypervisor we should use
6461   @type hvparams: dict
6462   @param hvparams: the parameters which we need to check
6463   @raise errors.OpPrereqError: if the parameters are not valid
6464
6465   """
6466   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6467                                                   hvname,
6468                                                   hvparams)
6469   for node in nodenames:
6470     info = hvinfo[node]
6471     if info.offline:
6472       continue
6473     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6474
6475
6476 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6477   """OS parameters validation.
6478
6479   @type lu: L{LogicalUnit}
6480   @param lu: the logical unit for which we check
6481   @type required: boolean
6482   @param required: whether the validation should fail if the OS is not
6483       found
6484   @type nodenames: list
6485   @param nodenames: the list of nodes on which we should check
6486   @type osname: string
6487   @param osname: the name of the hypervisor we should use
6488   @type osparams: dict
6489   @param osparams: the parameters which we need to check
6490   @raise errors.OpPrereqError: if the parameters are not valid
6491
6492   """
6493   result = lu.rpc.call_os_validate(required, nodenames, osname,
6494                                    [constants.OS_VALIDATE_PARAMETERS],
6495                                    osparams)
6496   for node, nres in result.items():
6497     # we don't check for offline cases since this should be run only
6498     # against the master node and/or an instance's nodes
6499     nres.Raise("OS Parameters validation failed on node %s" % node)
6500     if not nres.payload:
6501       lu.LogInfo("OS %s not found on node %s, validation skipped",
6502                  osname, node)
6503
6504
6505 class LUCreateInstance(LogicalUnit):
6506   """Create an instance.
6507
6508   """
6509   HPATH = "instance-add"
6510   HTYPE = constants.HTYPE_INSTANCE
6511   _OP_PARAMS = [
6512     _PInstanceName,
6513     ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6514     ("start", True, _TBool),
6515     ("wait_for_sync", True, _TBool),
6516     ("ip_check", True, _TBool),
6517     ("name_check", True, _TBool),
6518     ("disks", _NoDefault, _TListOf(_TDict)),
6519     ("nics", _NoDefault, _TListOf(_TDict)),
6520     ("hvparams", _EmptyDict, _TDict),
6521     ("beparams", _EmptyDict, _TDict),
6522     ("osparams", _EmptyDict, _TDict),
6523     ("no_install", None, _TMaybeBool),
6524     ("os_type", None, _TMaybeString),
6525     ("force_variant", False, _TBool),
6526     ("source_handshake", None, _TOr(_TList, _TNone)),
6527     ("source_x509_ca", None, _TMaybeString),
6528     ("source_instance_name", None, _TMaybeString),
6529     ("src_node", None, _TMaybeString),
6530     ("src_path", None, _TMaybeString),
6531     ("pnode", None, _TMaybeString),
6532     ("snode", None, _TMaybeString),
6533     ("iallocator", None, _TMaybeString),
6534     ("hypervisor", None, _TMaybeString),
6535     ("disk_template", _NoDefault, _CheckDiskTemplate),
6536     ("identify_defaults", False, _TBool),
6537     ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6538     ("file_storage_dir", None, _TMaybeString),
6539     ("dry_run", False, _TBool),
6540     ]
6541   REQ_BGL = False
6542
6543   def CheckArguments(self):
6544     """Check arguments.
6545
6546     """
6547     # do not require name_check to ease forward/backward compatibility
6548     # for tools
6549     if self.op.no_install and self.op.start:
6550       self.LogInfo("No-installation mode selected, disabling startup")
6551       self.op.start = False
6552     # validate/normalize the instance name
6553     self.op.instance_name = \
6554       netutils.HostInfo.NormalizeName(self.op.instance_name)
6555
6556     if self.op.ip_check and not self.op.name_check:
6557       # TODO: make the ip check more flexible and not depend on the name check
6558       raise errors.OpPrereqError("Cannot do ip check without a name check",
6559                                  errors.ECODE_INVAL)
6560
6561     # check nics' parameter names
6562     for nic in self.op.nics:
6563       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6564
6565     # check disks. parameter names and consistent adopt/no-adopt strategy
6566     has_adopt = has_no_adopt = False
6567     for disk in self.op.disks:
6568       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6569       if "adopt" in disk:
6570         has_adopt = True
6571       else:
6572         has_no_adopt = True
6573     if has_adopt and has_no_adopt:
6574       raise errors.OpPrereqError("Either all disks are adopted or none is",
6575                                  errors.ECODE_INVAL)
6576     if has_adopt:
6577       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6578         raise errors.OpPrereqError("Disk adoption is not supported for the"
6579                                    " '%s' disk template" %
6580                                    self.op.disk_template,
6581                                    errors.ECODE_INVAL)
6582       if self.op.iallocator is not None:
6583         raise errors.OpPrereqError("Disk adoption not allowed with an"
6584                                    " iallocator script", errors.ECODE_INVAL)
6585       if self.op.mode == constants.INSTANCE_IMPORT:
6586         raise errors.OpPrereqError("Disk adoption not allowed for"
6587                                    " instance import", errors.ECODE_INVAL)
6588
6589     self.adopt_disks = has_adopt
6590
6591     # instance name verification
6592     if self.op.name_check:
6593       self.hostname1 = netutils.GetHostInfo(self.op.instance_name)
6594       self.op.instance_name = self.hostname1.name
6595       # used in CheckPrereq for ip ping check
6596       self.check_ip = self.hostname1.ip
6597     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6598       raise errors.OpPrereqError("Remote imports require names to be checked" %
6599                                  errors.ECODE_INVAL)
6600     else:
6601       self.check_ip = None
6602
6603     # file storage checks
6604     if (self.op.file_driver and
6605         not self.op.file_driver in constants.FILE_DRIVER):
6606       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6607                                  self.op.file_driver, errors.ECODE_INVAL)
6608
6609     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6610       raise errors.OpPrereqError("File storage directory path not absolute",
6611                                  errors.ECODE_INVAL)
6612
6613     ### Node/iallocator related checks
6614     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6615
6616     self._cds = _GetClusterDomainSecret()
6617
6618     if self.op.mode == constants.INSTANCE_IMPORT:
6619       # On import force_variant must be True, because if we forced it at
6620       # initial install, our only chance when importing it back is that it
6621       # works again!
6622       self.op.force_variant = True
6623
6624       if self.op.no_install:
6625         self.LogInfo("No-installation mode has no effect during import")
6626
6627     elif self.op.mode == constants.INSTANCE_CREATE:
6628       if self.op.os_type is None:
6629         raise errors.OpPrereqError("No guest OS specified",
6630                                    errors.ECODE_INVAL)
6631       if self.op.disk_template is None:
6632         raise errors.OpPrereqError("No disk template specified",
6633                                    errors.ECODE_INVAL)
6634
6635     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6636       # Check handshake to ensure both clusters have the same domain secret
6637       src_handshake = self.op.source_handshake
6638       if not src_handshake:
6639         raise errors.OpPrereqError("Missing source handshake",
6640                                    errors.ECODE_INVAL)
6641
6642       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6643                                                            src_handshake)
6644       if errmsg:
6645         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6646                                    errors.ECODE_INVAL)
6647
6648       # Load and check source CA
6649       self.source_x509_ca_pem = self.op.source_x509_ca
6650       if not self.source_x509_ca_pem:
6651         raise errors.OpPrereqError("Missing source X509 CA",
6652                                    errors.ECODE_INVAL)
6653
6654       try:
6655         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6656                                                     self._cds)
6657       except OpenSSL.crypto.Error, err:
6658         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6659                                    (err, ), errors.ECODE_INVAL)
6660
6661       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6662       if errcode is not None:
6663         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6664                                    errors.ECODE_INVAL)
6665
6666       self.source_x509_ca = cert
6667
6668       src_instance_name = self.op.source_instance_name
6669       if not src_instance_name:
6670         raise errors.OpPrereqError("Missing source instance name",
6671                                    errors.ECODE_INVAL)
6672
6673       norm_name = netutils.HostInfo.NormalizeName(src_instance_name)
6674       self.source_instance_name = netutils.GetHostInfo(norm_name).name
6675
6676     else:
6677       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6678                                  self.op.mode, errors.ECODE_INVAL)
6679
6680   def ExpandNames(self):
6681     """ExpandNames for CreateInstance.
6682
6683     Figure out the right locks for instance creation.
6684
6685     """
6686     self.needed_locks = {}
6687
6688     instance_name = self.op.instance_name
6689     # this is just a preventive check, but someone might still add this
6690     # instance in the meantime, and creation will fail at lock-add time
6691     if instance_name in self.cfg.GetInstanceList():
6692       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6693                                  instance_name, errors.ECODE_EXISTS)
6694
6695     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6696
6697     if self.op.iallocator:
6698       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6699     else:
6700       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6701       nodelist = [self.op.pnode]
6702       if self.op.snode is not None:
6703         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6704         nodelist.append(self.op.snode)
6705       self.needed_locks[locking.LEVEL_NODE] = nodelist
6706
6707     # in case of import lock the source node too
6708     if self.op.mode == constants.INSTANCE_IMPORT:
6709       src_node = self.op.src_node
6710       src_path = self.op.src_path
6711
6712       if src_path is None:
6713         self.op.src_path = src_path = self.op.instance_name
6714
6715       if src_node is None:
6716         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6717         self.op.src_node = None
6718         if os.path.isabs(src_path):
6719           raise errors.OpPrereqError("Importing an instance from an absolute"
6720                                      " path requires a source node option.",
6721                                      errors.ECODE_INVAL)
6722       else:
6723         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6724         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6725           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6726         if not os.path.isabs(src_path):
6727           self.op.src_path = src_path = \
6728             utils.PathJoin(constants.EXPORT_DIR, src_path)
6729
6730   def _RunAllocator(self):
6731     """Run the allocator based on input opcode.
6732
6733     """
6734     nics = [n.ToDict() for n in self.nics]
6735     ial = IAllocator(self.cfg, self.rpc,
6736                      mode=constants.IALLOCATOR_MODE_ALLOC,
6737                      name=self.op.instance_name,
6738                      disk_template=self.op.disk_template,
6739                      tags=[],
6740                      os=self.op.os_type,
6741                      vcpus=self.be_full[constants.BE_VCPUS],
6742                      mem_size=self.be_full[constants.BE_MEMORY],
6743                      disks=self.disks,
6744                      nics=nics,
6745                      hypervisor=self.op.hypervisor,
6746                      )
6747
6748     ial.Run(self.op.iallocator)
6749
6750     if not ial.success:
6751       raise errors.OpPrereqError("Can't compute nodes using"
6752                                  " iallocator '%s': %s" %
6753                                  (self.op.iallocator, ial.info),
6754                                  errors.ECODE_NORES)
6755     if len(ial.result) != ial.required_nodes:
6756       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6757                                  " of nodes (%s), required %s" %
6758                                  (self.op.iallocator, len(ial.result),
6759                                   ial.required_nodes), errors.ECODE_FAULT)
6760     self.op.pnode = ial.result[0]
6761     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6762                  self.op.instance_name, self.op.iallocator,
6763                  utils.CommaJoin(ial.result))
6764     if ial.required_nodes == 2:
6765       self.op.snode = ial.result[1]
6766
6767   def BuildHooksEnv(self):
6768     """Build hooks env.
6769
6770     This runs on master, primary and secondary nodes of the instance.
6771
6772     """
6773     env = {
6774       "ADD_MODE": self.op.mode,
6775       }
6776     if self.op.mode == constants.INSTANCE_IMPORT:
6777       env["SRC_NODE"] = self.op.src_node
6778       env["SRC_PATH"] = self.op.src_path
6779       env["SRC_IMAGES"] = self.src_images
6780
6781     env.update(_BuildInstanceHookEnv(
6782       name=self.op.instance_name,
6783       primary_node=self.op.pnode,
6784       secondary_nodes=self.secondaries,
6785       status=self.op.start,
6786       os_type=self.op.os_type,
6787       memory=self.be_full[constants.BE_MEMORY],
6788       vcpus=self.be_full[constants.BE_VCPUS],
6789       nics=_NICListToTuple(self, self.nics),
6790       disk_template=self.op.disk_template,
6791       disks=[(d["size"], d["mode"]) for d in self.disks],
6792       bep=self.be_full,
6793       hvp=self.hv_full,
6794       hypervisor_name=self.op.hypervisor,
6795     ))
6796
6797     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6798           self.secondaries)
6799     return env, nl, nl
6800
6801   def _ReadExportInfo(self):
6802     """Reads the export information from disk.
6803
6804     It will override the opcode source node and path with the actual
6805     information, if these two were not specified before.
6806
6807     @return: the export information
6808
6809     """
6810     assert self.op.mode == constants.INSTANCE_IMPORT
6811
6812     src_node = self.op.src_node
6813     src_path = self.op.src_path
6814
6815     if src_node is None:
6816       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6817       exp_list = self.rpc.call_export_list(locked_nodes)
6818       found = False
6819       for node in exp_list:
6820         if exp_list[node].fail_msg:
6821           continue
6822         if src_path in exp_list[node].payload:
6823           found = True
6824           self.op.src_node = src_node = node
6825           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6826                                                        src_path)
6827           break
6828       if not found:
6829         raise errors.OpPrereqError("No export found for relative path %s" %
6830                                     src_path, errors.ECODE_INVAL)
6831
6832     _CheckNodeOnline(self, src_node)
6833     result = self.rpc.call_export_info(src_node, src_path)
6834     result.Raise("No export or invalid export found in dir %s" % src_path)
6835
6836     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6837     if not export_info.has_section(constants.INISECT_EXP):
6838       raise errors.ProgrammerError("Corrupted export config",
6839                                    errors.ECODE_ENVIRON)
6840
6841     ei_version = export_info.get(constants.INISECT_EXP, "version")
6842     if (int(ei_version) != constants.EXPORT_VERSION):
6843       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6844                                  (ei_version, constants.EXPORT_VERSION),
6845                                  errors.ECODE_ENVIRON)
6846     return export_info
6847
6848   def _ReadExportParams(self, einfo):
6849     """Use export parameters as defaults.
6850
6851     In case the opcode doesn't specify (as in override) some instance
6852     parameters, then try to use them from the export information, if
6853     that declares them.
6854
6855     """
6856     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6857
6858     if self.op.disk_template is None:
6859       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6860         self.op.disk_template = einfo.get(constants.INISECT_INS,
6861                                           "disk_template")
6862       else:
6863         raise errors.OpPrereqError("No disk template specified and the export"
6864                                    " is missing the disk_template information",
6865                                    errors.ECODE_INVAL)
6866
6867     if not self.op.disks:
6868       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6869         disks = []
6870         # TODO: import the disk iv_name too
6871         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6872           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6873           disks.append({"size": disk_sz})
6874         self.op.disks = disks
6875       else:
6876         raise errors.OpPrereqError("No disk info specified and the export"
6877                                    " is missing the disk information",
6878                                    errors.ECODE_INVAL)
6879
6880     if (not self.op.nics and
6881         einfo.has_option(constants.INISECT_INS, "nic_count")):
6882       nics = []
6883       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6884         ndict = {}
6885         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6886           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6887           ndict[name] = v
6888         nics.append(ndict)
6889       self.op.nics = nics
6890
6891     if (self.op.hypervisor is None and
6892         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6893       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6894     if einfo.has_section(constants.INISECT_HYP):
6895       # use the export parameters but do not override the ones
6896       # specified by the user
6897       for name, value in einfo.items(constants.INISECT_HYP):
6898         if name not in self.op.hvparams:
6899           self.op.hvparams[name] = value
6900
6901     if einfo.has_section(constants.INISECT_BEP):
6902       # use the parameters, without overriding
6903       for name, value in einfo.items(constants.INISECT_BEP):
6904         if name not in self.op.beparams:
6905           self.op.beparams[name] = value
6906     else:
6907       # try to read the parameters old style, from the main section
6908       for name in constants.BES_PARAMETERS:
6909         if (name not in self.op.beparams and
6910             einfo.has_option(constants.INISECT_INS, name)):
6911           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6912
6913     if einfo.has_section(constants.INISECT_OSP):
6914       # use the parameters, without overriding
6915       for name, value in einfo.items(constants.INISECT_OSP):
6916         if name not in self.op.osparams:
6917           self.op.osparams[name] = value
6918
6919   def _RevertToDefaults(self, cluster):
6920     """Revert the instance parameters to the default values.
6921
6922     """
6923     # hvparams
6924     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6925     for name in self.op.hvparams.keys():
6926       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6927         del self.op.hvparams[name]
6928     # beparams
6929     be_defs = cluster.SimpleFillBE({})
6930     for name in self.op.beparams.keys():
6931       if name in be_defs and be_defs[name] == self.op.beparams[name]:
6932         del self.op.beparams[name]
6933     # nic params
6934     nic_defs = cluster.SimpleFillNIC({})
6935     for nic in self.op.nics:
6936       for name in constants.NICS_PARAMETERS:
6937         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6938           del nic[name]
6939     # osparams
6940     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6941     for name in self.op.osparams.keys():
6942       if name in os_defs and os_defs[name] == self.op.osparams[name]:
6943         del self.op.osparams[name]
6944
6945   def CheckPrereq(self):
6946     """Check prerequisites.
6947
6948     """
6949     if self.op.mode == constants.INSTANCE_IMPORT:
6950       export_info = self._ReadExportInfo()
6951       self._ReadExportParams(export_info)
6952
6953     _CheckDiskTemplate(self.op.disk_template)
6954
6955     if (not self.cfg.GetVGName() and
6956         self.op.disk_template not in constants.DTS_NOT_LVM):
6957       raise errors.OpPrereqError("Cluster does not support lvm-based"
6958                                  " instances", errors.ECODE_STATE)
6959
6960     if self.op.hypervisor is None:
6961       self.op.hypervisor = self.cfg.GetHypervisorType()
6962
6963     cluster = self.cfg.GetClusterInfo()
6964     enabled_hvs = cluster.enabled_hypervisors
6965     if self.op.hypervisor not in enabled_hvs:
6966       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6967                                  " cluster (%s)" % (self.op.hypervisor,
6968                                   ",".join(enabled_hvs)),
6969                                  errors.ECODE_STATE)
6970
6971     # check hypervisor parameter syntax (locally)
6972     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6973     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6974                                       self.op.hvparams)
6975     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6976     hv_type.CheckParameterSyntax(filled_hvp)
6977     self.hv_full = filled_hvp
6978     # check that we don't specify global parameters on an instance
6979     _CheckGlobalHvParams(self.op.hvparams)
6980
6981     # fill and remember the beparams dict
6982     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6983     self.be_full = cluster.SimpleFillBE(self.op.beparams)
6984
6985     # build os parameters
6986     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6987
6988     # now that hvp/bep are in final format, let's reset to defaults,
6989     # if told to do so
6990     if self.op.identify_defaults:
6991       self._RevertToDefaults(cluster)
6992
6993     # NIC buildup
6994     self.nics = []
6995     for idx, nic in enumerate(self.op.nics):
6996       nic_mode_req = nic.get("mode", None)
6997       nic_mode = nic_mode_req
6998       if nic_mode is None:
6999         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7000
7001       # in routed mode, for the first nic, the default ip is 'auto'
7002       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7003         default_ip_mode = constants.VALUE_AUTO
7004       else:
7005         default_ip_mode = constants.VALUE_NONE
7006
7007       # ip validity checks
7008       ip = nic.get("ip", default_ip_mode)
7009       if ip is None or ip.lower() == constants.VALUE_NONE:
7010         nic_ip = None
7011       elif ip.lower() == constants.VALUE_AUTO:
7012         if not self.op.name_check:
7013           raise errors.OpPrereqError("IP address set to auto but name checks"
7014                                      " have been skipped. Aborting.",
7015                                      errors.ECODE_INVAL)
7016         nic_ip = self.hostname1.ip
7017       else:
7018         if not netutils.IP4Address.IsValid(ip):
7019           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
7020                                      " like a valid IP" % ip,
7021                                      errors.ECODE_INVAL)
7022         nic_ip = ip
7023
7024       # TODO: check the ip address for uniqueness
7025       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7026         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7027                                    errors.ECODE_INVAL)
7028
7029       # MAC address verification
7030       mac = nic.get("mac", constants.VALUE_AUTO)
7031       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7032         mac = utils.NormalizeAndValidateMac(mac)
7033
7034         try:
7035           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7036         except errors.ReservationError:
7037           raise errors.OpPrereqError("MAC address %s already in use"
7038                                      " in cluster" % mac,
7039                                      errors.ECODE_NOTUNIQUE)
7040
7041       # bridge verification
7042       bridge = nic.get("bridge", None)
7043       link = nic.get("link", None)
7044       if bridge and link:
7045         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7046                                    " at the same time", errors.ECODE_INVAL)
7047       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7048         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7049                                    errors.ECODE_INVAL)
7050       elif bridge:
7051         link = bridge
7052
7053       nicparams = {}
7054       if nic_mode_req:
7055         nicparams[constants.NIC_MODE] = nic_mode_req
7056       if link:
7057         nicparams[constants.NIC_LINK] = link
7058
7059       check_params = cluster.SimpleFillNIC(nicparams)
7060       objects.NIC.CheckParameterSyntax(check_params)
7061       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7062
7063     # disk checks/pre-build
7064     self.disks = []
7065     for disk in self.op.disks:
7066       mode = disk.get("mode", constants.DISK_RDWR)
7067       if mode not in constants.DISK_ACCESS_SET:
7068         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7069                                    mode, errors.ECODE_INVAL)
7070       size = disk.get("size", None)
7071       if size is None:
7072         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7073       try:
7074         size = int(size)
7075       except (TypeError, ValueError):
7076         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7077                                    errors.ECODE_INVAL)
7078       new_disk = {"size": size, "mode": mode}
7079       if "adopt" in disk:
7080         new_disk["adopt"] = disk["adopt"]
7081       self.disks.append(new_disk)
7082
7083     if self.op.mode == constants.INSTANCE_IMPORT:
7084
7085       # Check that the new instance doesn't have less disks than the export
7086       instance_disks = len(self.disks)
7087       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7088       if instance_disks < export_disks:
7089         raise errors.OpPrereqError("Not enough disks to import."
7090                                    " (instance: %d, export: %d)" %
7091                                    (instance_disks, export_disks),
7092                                    errors.ECODE_INVAL)
7093
7094       disk_images = []
7095       for idx in range(export_disks):
7096         option = 'disk%d_dump' % idx
7097         if export_info.has_option(constants.INISECT_INS, option):
7098           # FIXME: are the old os-es, disk sizes, etc. useful?
7099           export_name = export_info.get(constants.INISECT_INS, option)
7100           image = utils.PathJoin(self.op.src_path, export_name)
7101           disk_images.append(image)
7102         else:
7103           disk_images.append(False)
7104
7105       self.src_images = disk_images
7106
7107       old_name = export_info.get(constants.INISECT_INS, 'name')
7108       try:
7109         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7110       except (TypeError, ValueError), err:
7111         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7112                                    " an integer: %s" % str(err),
7113                                    errors.ECODE_STATE)
7114       if self.op.instance_name == old_name:
7115         for idx, nic in enumerate(self.nics):
7116           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7117             nic_mac_ini = 'nic%d_mac' % idx
7118             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7119
7120     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7121
7122     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7123     if self.op.ip_check:
7124       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7125         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7126                                    (self.check_ip, self.op.instance_name),
7127                                    errors.ECODE_NOTUNIQUE)
7128
7129     #### mac address generation
7130     # By generating here the mac address both the allocator and the hooks get
7131     # the real final mac address rather than the 'auto' or 'generate' value.
7132     # There is a race condition between the generation and the instance object
7133     # creation, which means that we know the mac is valid now, but we're not
7134     # sure it will be when we actually add the instance. If things go bad
7135     # adding the instance will abort because of a duplicate mac, and the
7136     # creation job will fail.
7137     for nic in self.nics:
7138       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7139         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7140
7141     #### allocator run
7142
7143     if self.op.iallocator is not None:
7144       self._RunAllocator()
7145
7146     #### node related checks
7147
7148     # check primary node
7149     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7150     assert self.pnode is not None, \
7151       "Cannot retrieve locked node %s" % self.op.pnode
7152     if pnode.offline:
7153       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7154                                  pnode.name, errors.ECODE_STATE)
7155     if pnode.drained:
7156       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7157                                  pnode.name, errors.ECODE_STATE)
7158
7159     self.secondaries = []
7160
7161     # mirror node verification
7162     if self.op.disk_template in constants.DTS_NET_MIRROR:
7163       if self.op.snode is None:
7164         raise errors.OpPrereqError("The networked disk templates need"
7165                                    " a mirror node", errors.ECODE_INVAL)
7166       if self.op.snode == pnode.name:
7167         raise errors.OpPrereqError("The secondary node cannot be the"
7168                                    " primary node.", errors.ECODE_INVAL)
7169       _CheckNodeOnline(self, self.op.snode)
7170       _CheckNodeNotDrained(self, self.op.snode)
7171       self.secondaries.append(self.op.snode)
7172
7173     nodenames = [pnode.name] + self.secondaries
7174
7175     req_size = _ComputeDiskSize(self.op.disk_template,
7176                                 self.disks)
7177
7178     # Check lv size requirements, if not adopting
7179     if req_size is not None and not self.adopt_disks:
7180       _CheckNodesFreeDisk(self, nodenames, req_size)
7181
7182     if self.adopt_disks: # instead, we must check the adoption data
7183       all_lvs = set([i["adopt"] for i in self.disks])
7184       if len(all_lvs) != len(self.disks):
7185         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7186                                    errors.ECODE_INVAL)
7187       for lv_name in all_lvs:
7188         try:
7189           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7190         except errors.ReservationError:
7191           raise errors.OpPrereqError("LV named %s used by another instance" %
7192                                      lv_name, errors.ECODE_NOTUNIQUE)
7193
7194       node_lvs = self.rpc.call_lv_list([pnode.name],
7195                                        self.cfg.GetVGName())[pnode.name]
7196       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7197       node_lvs = node_lvs.payload
7198       delta = all_lvs.difference(node_lvs.keys())
7199       if delta:
7200         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7201                                    utils.CommaJoin(delta),
7202                                    errors.ECODE_INVAL)
7203       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7204       if online_lvs:
7205         raise errors.OpPrereqError("Online logical volumes found, cannot"
7206                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7207                                    errors.ECODE_STATE)
7208       # update the size of disk based on what is found
7209       for dsk in self.disks:
7210         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7211
7212     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7213
7214     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7215     # check OS parameters (remotely)
7216     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7217
7218     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7219
7220     # memory check on primary node
7221     if self.op.start:
7222       _CheckNodeFreeMemory(self, self.pnode.name,
7223                            "creating instance %s" % self.op.instance_name,
7224                            self.be_full[constants.BE_MEMORY],
7225                            self.op.hypervisor)
7226
7227     self.dry_run_result = list(nodenames)
7228
7229   def Exec(self, feedback_fn):
7230     """Create and add the instance to the cluster.
7231
7232     """
7233     instance = self.op.instance_name
7234     pnode_name = self.pnode.name
7235
7236     ht_kind = self.op.hypervisor
7237     if ht_kind in constants.HTS_REQ_PORT:
7238       network_port = self.cfg.AllocatePort()
7239     else:
7240       network_port = None
7241
7242     if constants.ENABLE_FILE_STORAGE:
7243       # this is needed because os.path.join does not accept None arguments
7244       if self.op.file_storage_dir is None:
7245         string_file_storage_dir = ""
7246       else:
7247         string_file_storage_dir = self.op.file_storage_dir
7248
7249       # build the full file storage dir path
7250       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7251                                         string_file_storage_dir, instance)
7252     else:
7253       file_storage_dir = ""
7254
7255     disks = _GenerateDiskTemplate(self,
7256                                   self.op.disk_template,
7257                                   instance, pnode_name,
7258                                   self.secondaries,
7259                                   self.disks,
7260                                   file_storage_dir,
7261                                   self.op.file_driver,
7262                                   0)
7263
7264     iobj = objects.Instance(name=instance, os=self.op.os_type,
7265                             primary_node=pnode_name,
7266                             nics=self.nics, disks=disks,
7267                             disk_template=self.op.disk_template,
7268                             admin_up=False,
7269                             network_port=network_port,
7270                             beparams=self.op.beparams,
7271                             hvparams=self.op.hvparams,
7272                             hypervisor=self.op.hypervisor,
7273                             osparams=self.op.osparams,
7274                             )
7275
7276     if self.adopt_disks:
7277       # rename LVs to the newly-generated names; we need to construct
7278       # 'fake' LV disks with the old data, plus the new unique_id
7279       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7280       rename_to = []
7281       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7282         rename_to.append(t_dsk.logical_id)
7283         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7284         self.cfg.SetDiskID(t_dsk, pnode_name)
7285       result = self.rpc.call_blockdev_rename(pnode_name,
7286                                              zip(tmp_disks, rename_to))
7287       result.Raise("Failed to rename adoped LVs")
7288     else:
7289       feedback_fn("* creating instance disks...")
7290       try:
7291         _CreateDisks(self, iobj)
7292       except errors.OpExecError:
7293         self.LogWarning("Device creation failed, reverting...")
7294         try:
7295           _RemoveDisks(self, iobj)
7296         finally:
7297           self.cfg.ReleaseDRBDMinors(instance)
7298           raise
7299
7300     feedback_fn("adding instance %s to cluster config" % instance)
7301
7302     self.cfg.AddInstance(iobj, self.proc.GetECId())
7303
7304     # Declare that we don't want to remove the instance lock anymore, as we've
7305     # added the instance to the config
7306     del self.remove_locks[locking.LEVEL_INSTANCE]
7307     # Unlock all the nodes
7308     if self.op.mode == constants.INSTANCE_IMPORT:
7309       nodes_keep = [self.op.src_node]
7310       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7311                        if node != self.op.src_node]
7312       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7313       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7314     else:
7315       self.context.glm.release(locking.LEVEL_NODE)
7316       del self.acquired_locks[locking.LEVEL_NODE]
7317
7318     if self.op.wait_for_sync:
7319       disk_abort = not _WaitForSync(self, iobj)
7320     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7321       # make sure the disks are not degraded (still sync-ing is ok)
7322       time.sleep(15)
7323       feedback_fn("* checking mirrors status")
7324       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7325     else:
7326       disk_abort = False
7327
7328     if disk_abort:
7329       _RemoveDisks(self, iobj)
7330       self.cfg.RemoveInstance(iobj.name)
7331       # Make sure the instance lock gets removed
7332       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7333       raise errors.OpExecError("There are some degraded disks for"
7334                                " this instance")
7335
7336     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7337       if self.op.mode == constants.INSTANCE_CREATE:
7338         if not self.op.no_install:
7339           feedback_fn("* running the instance OS create scripts...")
7340           # FIXME: pass debug option from opcode to backend
7341           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7342                                                  self.op.debug_level)
7343           result.Raise("Could not add os for instance %s"
7344                        " on node %s" % (instance, pnode_name))
7345
7346       elif self.op.mode == constants.INSTANCE_IMPORT:
7347         feedback_fn("* running the instance OS import scripts...")
7348
7349         transfers = []
7350
7351         for idx, image in enumerate(self.src_images):
7352           if not image:
7353             continue
7354
7355           # FIXME: pass debug option from opcode to backend
7356           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7357                                              constants.IEIO_FILE, (image, ),
7358                                              constants.IEIO_SCRIPT,
7359                                              (iobj.disks[idx], idx),
7360                                              None)
7361           transfers.append(dt)
7362
7363         import_result = \
7364           masterd.instance.TransferInstanceData(self, feedback_fn,
7365                                                 self.op.src_node, pnode_name,
7366                                                 self.pnode.secondary_ip,
7367                                                 iobj, transfers)
7368         if not compat.all(import_result):
7369           self.LogWarning("Some disks for instance %s on node %s were not"
7370                           " imported successfully" % (instance, pnode_name))
7371
7372       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7373         feedback_fn("* preparing remote import...")
7374         connect_timeout = constants.RIE_CONNECT_TIMEOUT
7375         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7376
7377         disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7378                                                      self.source_x509_ca,
7379                                                      self._cds, timeouts)
7380         if not compat.all(disk_results):
7381           # TODO: Should the instance still be started, even if some disks
7382           # failed to import (valid for local imports, too)?
7383           self.LogWarning("Some disks for instance %s on node %s were not"
7384                           " imported successfully" % (instance, pnode_name))
7385
7386         # Run rename script on newly imported instance
7387         assert iobj.name == instance
7388         feedback_fn("Running rename script for %s" % instance)
7389         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7390                                                    self.source_instance_name,
7391                                                    self.op.debug_level)
7392         if result.fail_msg:
7393           self.LogWarning("Failed to run rename script for %s on node"
7394                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7395
7396       else:
7397         # also checked in the prereq part
7398         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7399                                      % self.op.mode)
7400
7401     if self.op.start:
7402       iobj.admin_up = True
7403       self.cfg.Update(iobj, feedback_fn)
7404       logging.info("Starting instance %s on node %s", instance, pnode_name)
7405       feedback_fn("* starting instance...")
7406       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7407       result.Raise("Could not start instance")
7408
7409     return list(iobj.all_nodes)
7410
7411
7412 class LUConnectConsole(NoHooksLU):
7413   """Connect to an instance's console.
7414
7415   This is somewhat special in that it returns the command line that
7416   you need to run on the master node in order to connect to the
7417   console.
7418
7419   """
7420   _OP_PARAMS = [
7421     _PInstanceName
7422     ]
7423   REQ_BGL = False
7424
7425   def ExpandNames(self):
7426     self._ExpandAndLockInstance()
7427
7428   def CheckPrereq(self):
7429     """Check prerequisites.
7430
7431     This checks that the instance is in the cluster.
7432
7433     """
7434     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7435     assert self.instance is not None, \
7436       "Cannot retrieve locked instance %s" % self.op.instance_name
7437     _CheckNodeOnline(self, self.instance.primary_node)
7438
7439   def Exec(self, feedback_fn):
7440     """Connect to the console of an instance
7441
7442     """
7443     instance = self.instance
7444     node = instance.primary_node
7445
7446     node_insts = self.rpc.call_instance_list([node],
7447                                              [instance.hypervisor])[node]
7448     node_insts.Raise("Can't get node information from %s" % node)
7449
7450     if instance.name not in node_insts.payload:
7451       raise errors.OpExecError("Instance %s is not running." % instance.name)
7452
7453     logging.debug("Connecting to console of %s on %s", instance.name, node)
7454
7455     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7456     cluster = self.cfg.GetClusterInfo()
7457     # beparams and hvparams are passed separately, to avoid editing the
7458     # instance and then saving the defaults in the instance itself.
7459     hvparams = cluster.FillHV(instance)
7460     beparams = cluster.FillBE(instance)
7461     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7462
7463     # build ssh cmdline
7464     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7465
7466
7467 class LUReplaceDisks(LogicalUnit):
7468   """Replace the disks of an instance.
7469
7470   """
7471   HPATH = "mirrors-replace"
7472   HTYPE = constants.HTYPE_INSTANCE
7473   _OP_PARAMS = [
7474     _PInstanceName,
7475     ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7476     ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7477     ("remote_node", None, _TMaybeString),
7478     ("iallocator", None, _TMaybeString),
7479     ("early_release", False, _TBool),
7480     ]
7481   REQ_BGL = False
7482
7483   def CheckArguments(self):
7484     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7485                                   self.op.iallocator)
7486
7487   def ExpandNames(self):
7488     self._ExpandAndLockInstance()
7489
7490     if self.op.iallocator is not None:
7491       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7492
7493     elif self.op.remote_node is not None:
7494       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7495       self.op.remote_node = remote_node
7496
7497       # Warning: do not remove the locking of the new secondary here
7498       # unless DRBD8.AddChildren is changed to work in parallel;
7499       # currently it doesn't since parallel invocations of
7500       # FindUnusedMinor will conflict
7501       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7502       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7503
7504     else:
7505       self.needed_locks[locking.LEVEL_NODE] = []
7506       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7507
7508     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7509                                    self.op.iallocator, self.op.remote_node,
7510                                    self.op.disks, False, self.op.early_release)
7511
7512     self.tasklets = [self.replacer]
7513
7514   def DeclareLocks(self, level):
7515     # If we're not already locking all nodes in the set we have to declare the
7516     # instance's primary/secondary nodes.
7517     if (level == locking.LEVEL_NODE and
7518         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7519       self._LockInstancesNodes()
7520
7521   def BuildHooksEnv(self):
7522     """Build hooks env.
7523
7524     This runs on the master, the primary and all the secondaries.
7525
7526     """
7527     instance = self.replacer.instance
7528     env = {
7529       "MODE": self.op.mode,
7530       "NEW_SECONDARY": self.op.remote_node,
7531       "OLD_SECONDARY": instance.secondary_nodes[0],
7532       }
7533     env.update(_BuildInstanceHookEnvByObject(self, instance))
7534     nl = [
7535       self.cfg.GetMasterNode(),
7536       instance.primary_node,
7537       ]
7538     if self.op.remote_node is not None:
7539       nl.append(self.op.remote_node)
7540     return env, nl, nl
7541
7542
7543 class TLReplaceDisks(Tasklet):
7544   """Replaces disks for an instance.
7545
7546   Note: Locking is not within the scope of this class.
7547
7548   """
7549   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7550                disks, delay_iallocator, early_release):
7551     """Initializes this class.
7552
7553     """
7554     Tasklet.__init__(self, lu)
7555
7556     # Parameters
7557     self.instance_name = instance_name
7558     self.mode = mode
7559     self.iallocator_name = iallocator_name
7560     self.remote_node = remote_node
7561     self.disks = disks
7562     self.delay_iallocator = delay_iallocator
7563     self.early_release = early_release
7564
7565     # Runtime data
7566     self.instance = None
7567     self.new_node = None
7568     self.target_node = None
7569     self.other_node = None
7570     self.remote_node_info = None
7571     self.node_secondary_ip = None
7572
7573   @staticmethod
7574   def CheckArguments(mode, remote_node, iallocator):
7575     """Helper function for users of this class.
7576
7577     """
7578     # check for valid parameter combination
7579     if mode == constants.REPLACE_DISK_CHG:
7580       if remote_node is None and iallocator is None:
7581         raise errors.OpPrereqError("When changing the secondary either an"
7582                                    " iallocator script must be used or the"
7583                                    " new node given", errors.ECODE_INVAL)
7584
7585       if remote_node is not None and iallocator is not None:
7586         raise errors.OpPrereqError("Give either the iallocator or the new"
7587                                    " secondary, not both", errors.ECODE_INVAL)
7588
7589     elif remote_node is not None or iallocator is not None:
7590       # Not replacing the secondary
7591       raise errors.OpPrereqError("The iallocator and new node options can"
7592                                  " only be used when changing the"
7593                                  " secondary node", errors.ECODE_INVAL)
7594
7595   @staticmethod
7596   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7597     """Compute a new secondary node using an IAllocator.
7598
7599     """
7600     ial = IAllocator(lu.cfg, lu.rpc,
7601                      mode=constants.IALLOCATOR_MODE_RELOC,
7602                      name=instance_name,
7603                      relocate_from=relocate_from)
7604
7605     ial.Run(iallocator_name)
7606
7607     if not ial.success:
7608       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7609                                  " %s" % (iallocator_name, ial.info),
7610                                  errors.ECODE_NORES)
7611
7612     if len(ial.result) != ial.required_nodes:
7613       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7614                                  " of nodes (%s), required %s" %
7615                                  (iallocator_name,
7616                                   len(ial.result), ial.required_nodes),
7617                                  errors.ECODE_FAULT)
7618
7619     remote_node_name = ial.result[0]
7620
7621     lu.LogInfo("Selected new secondary for instance '%s': %s",
7622                instance_name, remote_node_name)
7623
7624     return remote_node_name
7625
7626   def _FindFaultyDisks(self, node_name):
7627     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7628                                     node_name, True)
7629
7630   def CheckPrereq(self):
7631     """Check prerequisites.
7632
7633     This checks that the instance is in the cluster.
7634
7635     """
7636     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7637     assert instance is not None, \
7638       "Cannot retrieve locked instance %s" % self.instance_name
7639
7640     if instance.disk_template != constants.DT_DRBD8:
7641       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7642                                  " instances", errors.ECODE_INVAL)
7643
7644     if len(instance.secondary_nodes) != 1:
7645       raise errors.OpPrereqError("The instance has a strange layout,"
7646                                  " expected one secondary but found %d" %
7647                                  len(instance.secondary_nodes),
7648                                  errors.ECODE_FAULT)
7649
7650     if not self.delay_iallocator:
7651       self._CheckPrereq2()
7652
7653   def _CheckPrereq2(self):
7654     """Check prerequisites, second part.
7655
7656     This function should always be part of CheckPrereq. It was separated and is
7657     now called from Exec because during node evacuation iallocator was only
7658     called with an unmodified cluster model, not taking planned changes into
7659     account.
7660
7661     """
7662     instance = self.instance
7663     secondary_node = instance.secondary_nodes[0]
7664
7665     if self.iallocator_name is None:
7666       remote_node = self.remote_node
7667     else:
7668       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7669                                        instance.name, instance.secondary_nodes)
7670
7671     if remote_node is not None:
7672       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7673       assert self.remote_node_info is not None, \
7674         "Cannot retrieve locked node %s" % remote_node
7675     else:
7676       self.remote_node_info = None
7677
7678     if remote_node == self.instance.primary_node:
7679       raise errors.OpPrereqError("The specified node is the primary node of"
7680                                  " the instance.", errors.ECODE_INVAL)
7681
7682     if remote_node == secondary_node:
7683       raise errors.OpPrereqError("The specified node is already the"
7684                                  " secondary node of the instance.",
7685                                  errors.ECODE_INVAL)
7686
7687     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7688                                     constants.REPLACE_DISK_CHG):
7689       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7690                                  errors.ECODE_INVAL)
7691
7692     if self.mode == constants.REPLACE_DISK_AUTO:
7693       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7694       faulty_secondary = self._FindFaultyDisks(secondary_node)
7695
7696       if faulty_primary and faulty_secondary:
7697         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7698                                    " one node and can not be repaired"
7699                                    " automatically" % self.instance_name,
7700                                    errors.ECODE_STATE)
7701
7702       if faulty_primary:
7703         self.disks = faulty_primary
7704         self.target_node = instance.primary_node
7705         self.other_node = secondary_node
7706         check_nodes = [self.target_node, self.other_node]
7707       elif faulty_secondary:
7708         self.disks = faulty_secondary
7709         self.target_node = secondary_node
7710         self.other_node = instance.primary_node
7711         check_nodes = [self.target_node, self.other_node]
7712       else:
7713         self.disks = []
7714         check_nodes = []
7715
7716     else:
7717       # Non-automatic modes
7718       if self.mode == constants.REPLACE_DISK_PRI:
7719         self.target_node = instance.primary_node
7720         self.other_node = secondary_node
7721         check_nodes = [self.target_node, self.other_node]
7722
7723       elif self.mode == constants.REPLACE_DISK_SEC:
7724         self.target_node = secondary_node
7725         self.other_node = instance.primary_node
7726         check_nodes = [self.target_node, self.other_node]
7727
7728       elif self.mode == constants.REPLACE_DISK_CHG:
7729         self.new_node = remote_node
7730         self.other_node = instance.primary_node
7731         self.target_node = secondary_node
7732         check_nodes = [self.new_node, self.other_node]
7733
7734         _CheckNodeNotDrained(self.lu, remote_node)
7735
7736         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7737         assert old_node_info is not None
7738         if old_node_info.offline and not self.early_release:
7739           # doesn't make sense to delay the release
7740           self.early_release = True
7741           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7742                           " early-release mode", secondary_node)
7743
7744       else:
7745         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7746                                      self.mode)
7747
7748       # If not specified all disks should be replaced
7749       if not self.disks:
7750         self.disks = range(len(self.instance.disks))
7751
7752     for node in check_nodes:
7753       _CheckNodeOnline(self.lu, node)
7754
7755     # Check whether disks are valid
7756     for disk_idx in self.disks:
7757       instance.FindDisk(disk_idx)
7758
7759     # Get secondary node IP addresses
7760     node_2nd_ip = {}
7761
7762     for node_name in [self.target_node, self.other_node, self.new_node]:
7763       if node_name is not None:
7764         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7765
7766     self.node_secondary_ip = node_2nd_ip
7767
7768   def Exec(self, feedback_fn):
7769     """Execute disk replacement.
7770
7771     This dispatches the disk replacement to the appropriate handler.
7772
7773     """
7774     if self.delay_iallocator:
7775       self._CheckPrereq2()
7776
7777     if not self.disks:
7778       feedback_fn("No disks need replacement")
7779       return
7780
7781     feedback_fn("Replacing disk(s) %s for %s" %
7782                 (utils.CommaJoin(self.disks), self.instance.name))
7783
7784     activate_disks = (not self.instance.admin_up)
7785
7786     # Activate the instance disks if we're replacing them on a down instance
7787     if activate_disks:
7788       _StartInstanceDisks(self.lu, self.instance, True)
7789
7790     try:
7791       # Should we replace the secondary node?
7792       if self.new_node is not None:
7793         fn = self._ExecDrbd8Secondary
7794       else:
7795         fn = self._ExecDrbd8DiskOnly
7796
7797       return fn(feedback_fn)
7798
7799     finally:
7800       # Deactivate the instance disks if we're replacing them on a
7801       # down instance
7802       if activate_disks:
7803         _SafeShutdownInstanceDisks(self.lu, self.instance)
7804
7805   def _CheckVolumeGroup(self, nodes):
7806     self.lu.LogInfo("Checking volume groups")
7807
7808     vgname = self.cfg.GetVGName()
7809
7810     # Make sure volume group exists on all involved nodes
7811     results = self.rpc.call_vg_list(nodes)
7812     if not results:
7813       raise errors.OpExecError("Can't list volume groups on the nodes")
7814
7815     for node in nodes:
7816       res = results[node]
7817       res.Raise("Error checking node %s" % node)
7818       if vgname not in res.payload:
7819         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7820                                  (vgname, node))
7821
7822   def _CheckDisksExistence(self, nodes):
7823     # Check disk existence
7824     for idx, dev in enumerate(self.instance.disks):
7825       if idx not in self.disks:
7826         continue
7827
7828       for node in nodes:
7829         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7830         self.cfg.SetDiskID(dev, node)
7831
7832         result = self.rpc.call_blockdev_find(node, dev)
7833
7834         msg = result.fail_msg
7835         if msg or not result.payload:
7836           if not msg:
7837             msg = "disk not found"
7838           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7839                                    (idx, node, msg))
7840
7841   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7842     for idx, dev in enumerate(self.instance.disks):
7843       if idx not in self.disks:
7844         continue
7845
7846       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7847                       (idx, node_name))
7848
7849       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7850                                    ldisk=ldisk):
7851         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7852                                  " replace disks for instance %s" %
7853                                  (node_name, self.instance.name))
7854
7855   def _CreateNewStorage(self, node_name):
7856     vgname = self.cfg.GetVGName()
7857     iv_names = {}
7858
7859     for idx, dev in enumerate(self.instance.disks):
7860       if idx not in self.disks:
7861         continue
7862
7863       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7864
7865       self.cfg.SetDiskID(dev, node_name)
7866
7867       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7868       names = _GenerateUniqueNames(self.lu, lv_names)
7869
7870       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7871                              logical_id=(vgname, names[0]))
7872       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7873                              logical_id=(vgname, names[1]))
7874
7875       new_lvs = [lv_data, lv_meta]
7876       old_lvs = dev.children
7877       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7878
7879       # we pass force_create=True to force the LVM creation
7880       for new_lv in new_lvs:
7881         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7882                         _GetInstanceInfoText(self.instance), False)
7883
7884     return iv_names
7885
7886   def _CheckDevices(self, node_name, iv_names):
7887     for name, (dev, _, _) in iv_names.iteritems():
7888       self.cfg.SetDiskID(dev, node_name)
7889
7890       result = self.rpc.call_blockdev_find(node_name, dev)
7891
7892       msg = result.fail_msg
7893       if msg or not result.payload:
7894         if not msg:
7895           msg = "disk not found"
7896         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7897                                  (name, msg))
7898
7899       if result.payload.is_degraded:
7900         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7901
7902   def _RemoveOldStorage(self, node_name, iv_names):
7903     for name, (_, old_lvs, _) in iv_names.iteritems():
7904       self.lu.LogInfo("Remove logical volumes for %s" % name)
7905
7906       for lv in old_lvs:
7907         self.cfg.SetDiskID(lv, node_name)
7908
7909         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7910         if msg:
7911           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7912                              hint="remove unused LVs manually")
7913
7914   def _ReleaseNodeLock(self, node_name):
7915     """Releases the lock for a given node."""
7916     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7917
7918   def _ExecDrbd8DiskOnly(self, feedback_fn):
7919     """Replace a disk on the primary or secondary for DRBD 8.
7920
7921     The algorithm for replace is quite complicated:
7922
7923       1. for each disk to be replaced:
7924
7925         1. create new LVs on the target node with unique names
7926         1. detach old LVs from the drbd device
7927         1. rename old LVs to name_replaced.<time_t>
7928         1. rename new LVs to old LVs
7929         1. attach the new LVs (with the old names now) to the drbd device
7930
7931       1. wait for sync across all devices
7932
7933       1. for each modified disk:
7934
7935         1. remove old LVs (which have the name name_replaces.<time_t>)
7936
7937     Failures are not very well handled.
7938
7939     """
7940     steps_total = 6
7941
7942     # Step: check device activation
7943     self.lu.LogStep(1, steps_total, "Check device existence")
7944     self._CheckDisksExistence([self.other_node, self.target_node])
7945     self._CheckVolumeGroup([self.target_node, self.other_node])
7946
7947     # Step: check other node consistency
7948     self.lu.LogStep(2, steps_total, "Check peer consistency")
7949     self._CheckDisksConsistency(self.other_node,
7950                                 self.other_node == self.instance.primary_node,
7951                                 False)
7952
7953     # Step: create new storage
7954     self.lu.LogStep(3, steps_total, "Allocate new storage")
7955     iv_names = self._CreateNewStorage(self.target_node)
7956
7957     # Step: for each lv, detach+rename*2+attach
7958     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7959     for dev, old_lvs, new_lvs in iv_names.itervalues():
7960       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7961
7962       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7963                                                      old_lvs)
7964       result.Raise("Can't detach drbd from local storage on node"
7965                    " %s for device %s" % (self.target_node, dev.iv_name))
7966       #dev.children = []
7967       #cfg.Update(instance)
7968
7969       # ok, we created the new LVs, so now we know we have the needed
7970       # storage; as such, we proceed on the target node to rename
7971       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7972       # using the assumption that logical_id == physical_id (which in
7973       # turn is the unique_id on that node)
7974
7975       # FIXME(iustin): use a better name for the replaced LVs
7976       temp_suffix = int(time.time())
7977       ren_fn = lambda d, suff: (d.physical_id[0],
7978                                 d.physical_id[1] + "_replaced-%s" % suff)
7979
7980       # Build the rename list based on what LVs exist on the node
7981       rename_old_to_new = []
7982       for to_ren in old_lvs:
7983         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7984         if not result.fail_msg and result.payload:
7985           # device exists
7986           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7987
7988       self.lu.LogInfo("Renaming the old LVs on the target node")
7989       result = self.rpc.call_blockdev_rename(self.target_node,
7990                                              rename_old_to_new)
7991       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7992
7993       # Now we rename the new LVs to the old LVs
7994       self.lu.LogInfo("Renaming the new LVs on the target node")
7995       rename_new_to_old = [(new, old.physical_id)
7996                            for old, new in zip(old_lvs, new_lvs)]
7997       result = self.rpc.call_blockdev_rename(self.target_node,
7998                                              rename_new_to_old)
7999       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8000
8001       for old, new in zip(old_lvs, new_lvs):
8002         new.logical_id = old.logical_id
8003         self.cfg.SetDiskID(new, self.target_node)
8004
8005       for disk in old_lvs:
8006         disk.logical_id = ren_fn(disk, temp_suffix)
8007         self.cfg.SetDiskID(disk, self.target_node)
8008
8009       # Now that the new lvs have the old name, we can add them to the device
8010       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8011       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8012                                                   new_lvs)
8013       msg = result.fail_msg
8014       if msg:
8015         for new_lv in new_lvs:
8016           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8017                                                new_lv).fail_msg
8018           if msg2:
8019             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8020                                hint=("cleanup manually the unused logical"
8021                                      "volumes"))
8022         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8023
8024       dev.children = new_lvs
8025
8026       self.cfg.Update(self.instance, feedback_fn)
8027
8028     cstep = 5
8029     if self.early_release:
8030       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8031       cstep += 1
8032       self._RemoveOldStorage(self.target_node, iv_names)
8033       # WARNING: we release both node locks here, do not do other RPCs
8034       # than WaitForSync to the primary node
8035       self._ReleaseNodeLock([self.target_node, self.other_node])
8036
8037     # Wait for sync
8038     # This can fail as the old devices are degraded and _WaitForSync
8039     # does a combined result over all disks, so we don't check its return value
8040     self.lu.LogStep(cstep, steps_total, "Sync devices")
8041     cstep += 1
8042     _WaitForSync(self.lu, self.instance)
8043
8044     # Check all devices manually
8045     self._CheckDevices(self.instance.primary_node, iv_names)
8046
8047     # Step: remove old storage
8048     if not self.early_release:
8049       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8050       cstep += 1
8051       self._RemoveOldStorage(self.target_node, iv_names)
8052
8053   def _ExecDrbd8Secondary(self, feedback_fn):
8054     """Replace the secondary node for DRBD 8.
8055
8056     The algorithm for replace is quite complicated:
8057       - for all disks of the instance:
8058         - create new LVs on the new node with same names
8059         - shutdown the drbd device on the old secondary
8060         - disconnect the drbd network on the primary
8061         - create the drbd device on the new secondary
8062         - network attach the drbd on the primary, using an artifice:
8063           the drbd code for Attach() will connect to the network if it
8064           finds a device which is connected to the good local disks but
8065           not network enabled
8066       - wait for sync across all devices
8067       - remove all disks from the old secondary
8068
8069     Failures are not very well handled.
8070
8071     """
8072     steps_total = 6
8073
8074     # Step: check device activation
8075     self.lu.LogStep(1, steps_total, "Check device existence")
8076     self._CheckDisksExistence([self.instance.primary_node])
8077     self._CheckVolumeGroup([self.instance.primary_node])
8078
8079     # Step: check other node consistency
8080     self.lu.LogStep(2, steps_total, "Check peer consistency")
8081     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8082
8083     # Step: create new storage
8084     self.lu.LogStep(3, steps_total, "Allocate new storage")
8085     for idx, dev in enumerate(self.instance.disks):
8086       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8087                       (self.new_node, idx))
8088       # we pass force_create=True to force LVM creation
8089       for new_lv in dev.children:
8090         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8091                         _GetInstanceInfoText(self.instance), False)
8092
8093     # Step 4: dbrd minors and drbd setups changes
8094     # after this, we must manually remove the drbd minors on both the
8095     # error and the success paths
8096     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8097     minors = self.cfg.AllocateDRBDMinor([self.new_node
8098                                          for dev in self.instance.disks],
8099                                         self.instance.name)
8100     logging.debug("Allocated minors %r", minors)
8101
8102     iv_names = {}
8103     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8104       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8105                       (self.new_node, idx))
8106       # create new devices on new_node; note that we create two IDs:
8107       # one without port, so the drbd will be activated without
8108       # networking information on the new node at this stage, and one
8109       # with network, for the latter activation in step 4
8110       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8111       if self.instance.primary_node == o_node1:
8112         p_minor = o_minor1
8113       else:
8114         assert self.instance.primary_node == o_node2, "Three-node instance?"
8115         p_minor = o_minor2
8116
8117       new_alone_id = (self.instance.primary_node, self.new_node, None,
8118                       p_minor, new_minor, o_secret)
8119       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8120                     p_minor, new_minor, o_secret)
8121
8122       iv_names[idx] = (dev, dev.children, new_net_id)
8123       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8124                     new_net_id)
8125       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8126                               logical_id=new_alone_id,
8127                               children=dev.children,
8128                               size=dev.size)
8129       try:
8130         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8131                               _GetInstanceInfoText(self.instance), False)
8132       except errors.GenericError:
8133         self.cfg.ReleaseDRBDMinors(self.instance.name)
8134         raise
8135
8136     # We have new devices, shutdown the drbd on the old secondary
8137     for idx, dev in enumerate(self.instance.disks):
8138       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8139       self.cfg.SetDiskID(dev, self.target_node)
8140       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8141       if msg:
8142         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8143                            "node: %s" % (idx, msg),
8144                            hint=("Please cleanup this device manually as"
8145                                  " soon as possible"))
8146
8147     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8148     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8149                                                self.node_secondary_ip,
8150                                                self.instance.disks)\
8151                                               [self.instance.primary_node]
8152
8153     msg = result.fail_msg
8154     if msg:
8155       # detaches didn't succeed (unlikely)
8156       self.cfg.ReleaseDRBDMinors(self.instance.name)
8157       raise errors.OpExecError("Can't detach the disks from the network on"
8158                                " old node: %s" % (msg,))
8159
8160     # if we managed to detach at least one, we update all the disks of
8161     # the instance to point to the new secondary
8162     self.lu.LogInfo("Updating instance configuration")
8163     for dev, _, new_logical_id in iv_names.itervalues():
8164       dev.logical_id = new_logical_id
8165       self.cfg.SetDiskID(dev, self.instance.primary_node)
8166
8167     self.cfg.Update(self.instance, feedback_fn)
8168
8169     # and now perform the drbd attach
8170     self.lu.LogInfo("Attaching primary drbds to new secondary"
8171                     " (standalone => connected)")
8172     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8173                                             self.new_node],
8174                                            self.node_secondary_ip,
8175                                            self.instance.disks,
8176                                            self.instance.name,
8177                                            False)
8178     for to_node, to_result in result.items():
8179       msg = to_result.fail_msg
8180       if msg:
8181         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8182                            to_node, msg,
8183                            hint=("please do a gnt-instance info to see the"
8184                                  " status of disks"))
8185     cstep = 5
8186     if self.early_release:
8187       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8188       cstep += 1
8189       self._RemoveOldStorage(self.target_node, iv_names)
8190       # WARNING: we release all node locks here, do not do other RPCs
8191       # than WaitForSync to the primary node
8192       self._ReleaseNodeLock([self.instance.primary_node,
8193                              self.target_node,
8194                              self.new_node])
8195
8196     # Wait for sync
8197     # This can fail as the old devices are degraded and _WaitForSync
8198     # does a combined result over all disks, so we don't check its return value
8199     self.lu.LogStep(cstep, steps_total, "Sync devices")
8200     cstep += 1
8201     _WaitForSync(self.lu, self.instance)
8202
8203     # Check all devices manually
8204     self._CheckDevices(self.instance.primary_node, iv_names)
8205
8206     # Step: remove old storage
8207     if not self.early_release:
8208       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8209       self._RemoveOldStorage(self.target_node, iv_names)
8210
8211
8212 class LURepairNodeStorage(NoHooksLU):
8213   """Repairs the volume group on a node.
8214
8215   """
8216   _OP_PARAMS = [
8217     _PNodeName,
8218     ("storage_type", _NoDefault, _CheckStorageType),
8219     ("name", _NoDefault, _TNonEmptyString),
8220     ("ignore_consistency", False, _TBool),
8221     ]
8222   REQ_BGL = False
8223
8224   def CheckArguments(self):
8225     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8226
8227     storage_type = self.op.storage_type
8228
8229     if (constants.SO_FIX_CONSISTENCY not in
8230         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8231       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8232                                  " repaired" % storage_type,
8233                                  errors.ECODE_INVAL)
8234
8235   def ExpandNames(self):
8236     self.needed_locks = {
8237       locking.LEVEL_NODE: [self.op.node_name],
8238       }
8239
8240   def _CheckFaultyDisks(self, instance, node_name):
8241     """Ensure faulty disks abort the opcode or at least warn."""
8242     try:
8243       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8244                                   node_name, True):
8245         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8246                                    " node '%s'" % (instance.name, node_name),
8247                                    errors.ECODE_STATE)
8248     except errors.OpPrereqError, err:
8249       if self.op.ignore_consistency:
8250         self.proc.LogWarning(str(err.args[0]))
8251       else:
8252         raise
8253
8254   def CheckPrereq(self):
8255     """Check prerequisites.
8256
8257     """
8258     # Check whether any instance on this node has faulty disks
8259     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8260       if not inst.admin_up:
8261         continue
8262       check_nodes = set(inst.all_nodes)
8263       check_nodes.discard(self.op.node_name)
8264       for inst_node_name in check_nodes:
8265         self._CheckFaultyDisks(inst, inst_node_name)
8266
8267   def Exec(self, feedback_fn):
8268     feedback_fn("Repairing storage unit '%s' on %s ..." %
8269                 (self.op.name, self.op.node_name))
8270
8271     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8272     result = self.rpc.call_storage_execute(self.op.node_name,
8273                                            self.op.storage_type, st_args,
8274                                            self.op.name,
8275                                            constants.SO_FIX_CONSISTENCY)
8276     result.Raise("Failed to repair storage unit '%s' on %s" %
8277                  (self.op.name, self.op.node_name))
8278
8279
8280 class LUNodeEvacuationStrategy(NoHooksLU):
8281   """Computes the node evacuation strategy.
8282
8283   """
8284   _OP_PARAMS = [
8285     ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8286     ("remote_node", None, _TMaybeString),
8287     ("iallocator", None, _TMaybeString),
8288     ]
8289   REQ_BGL = False
8290
8291   def CheckArguments(self):
8292     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8293
8294   def ExpandNames(self):
8295     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8296     self.needed_locks = locks = {}
8297     if self.op.remote_node is None:
8298       locks[locking.LEVEL_NODE] = locking.ALL_SET
8299     else:
8300       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8301       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8302
8303   def Exec(self, feedback_fn):
8304     if self.op.remote_node is not None:
8305       instances = []
8306       for node in self.op.nodes:
8307         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8308       result = []
8309       for i in instances:
8310         if i.primary_node == self.op.remote_node:
8311           raise errors.OpPrereqError("Node %s is the primary node of"
8312                                      " instance %s, cannot use it as"
8313                                      " secondary" %
8314                                      (self.op.remote_node, i.name),
8315                                      errors.ECODE_INVAL)
8316         result.append([i.name, self.op.remote_node])
8317     else:
8318       ial = IAllocator(self.cfg, self.rpc,
8319                        mode=constants.IALLOCATOR_MODE_MEVAC,
8320                        evac_nodes=self.op.nodes)
8321       ial.Run(self.op.iallocator, validate=True)
8322       if not ial.success:
8323         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8324                                  errors.ECODE_NORES)
8325       result = ial.result
8326     return result
8327
8328
8329 class LUGrowDisk(LogicalUnit):
8330   """Grow a disk of an instance.
8331
8332   """
8333   HPATH = "disk-grow"
8334   HTYPE = constants.HTYPE_INSTANCE
8335   _OP_PARAMS = [
8336     _PInstanceName,
8337     ("disk", _NoDefault, _TInt),
8338     ("amount", _NoDefault, _TInt),
8339     ("wait_for_sync", True, _TBool),
8340     ]
8341   REQ_BGL = False
8342
8343   def ExpandNames(self):
8344     self._ExpandAndLockInstance()
8345     self.needed_locks[locking.LEVEL_NODE] = []
8346     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8347
8348   def DeclareLocks(self, level):
8349     if level == locking.LEVEL_NODE:
8350       self._LockInstancesNodes()
8351
8352   def BuildHooksEnv(self):
8353     """Build hooks env.
8354
8355     This runs on the master, the primary and all the secondaries.
8356
8357     """
8358     env = {
8359       "DISK": self.op.disk,
8360       "AMOUNT": self.op.amount,
8361       }
8362     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8363     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8364     return env, nl, nl
8365
8366   def CheckPrereq(self):
8367     """Check prerequisites.
8368
8369     This checks that the instance is in the cluster.
8370
8371     """
8372     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8373     assert instance is not None, \
8374       "Cannot retrieve locked instance %s" % self.op.instance_name
8375     nodenames = list(instance.all_nodes)
8376     for node in nodenames:
8377       _CheckNodeOnline(self, node)
8378
8379     self.instance = instance
8380
8381     if instance.disk_template not in constants.DTS_GROWABLE:
8382       raise errors.OpPrereqError("Instance's disk layout does not support"
8383                                  " growing.", errors.ECODE_INVAL)
8384
8385     self.disk = instance.FindDisk(self.op.disk)
8386
8387     if instance.disk_template != constants.DT_FILE:
8388       # TODO: check the free disk space for file, when that feature will be
8389       # supported
8390       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8391
8392   def Exec(self, feedback_fn):
8393     """Execute disk grow.
8394
8395     """
8396     instance = self.instance
8397     disk = self.disk
8398
8399     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8400     if not disks_ok:
8401       raise errors.OpExecError("Cannot activate block device to grow")
8402
8403     for node in instance.all_nodes:
8404       self.cfg.SetDiskID(disk, node)
8405       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8406       result.Raise("Grow request failed to node %s" % node)
8407
8408       # TODO: Rewrite code to work properly
8409       # DRBD goes into sync mode for a short amount of time after executing the
8410       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8411       # calling "resize" in sync mode fails. Sleeping for a short amount of
8412       # time is a work-around.
8413       time.sleep(5)
8414
8415     disk.RecordGrow(self.op.amount)
8416     self.cfg.Update(instance, feedback_fn)
8417     if self.op.wait_for_sync:
8418       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8419       if disk_abort:
8420         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8421                              " status.\nPlease check the instance.")
8422       if not instance.admin_up:
8423         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8424     elif not instance.admin_up:
8425       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8426                            " not supposed to be running because no wait for"
8427                            " sync mode was requested.")
8428
8429
8430 class LUQueryInstanceData(NoHooksLU):
8431   """Query runtime instance data.
8432
8433   """
8434   _OP_PARAMS = [
8435     ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8436     ("static", False, _TBool),
8437     ]
8438   REQ_BGL = False
8439
8440   def ExpandNames(self):
8441     self.needed_locks = {}
8442     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8443
8444     if self.op.instances:
8445       self.wanted_names = []
8446       for name in self.op.instances:
8447         full_name = _ExpandInstanceName(self.cfg, name)
8448         self.wanted_names.append(full_name)
8449       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8450     else:
8451       self.wanted_names = None
8452       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8453
8454     self.needed_locks[locking.LEVEL_NODE] = []
8455     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8456
8457   def DeclareLocks(self, level):
8458     if level == locking.LEVEL_NODE:
8459       self._LockInstancesNodes()
8460
8461   def CheckPrereq(self):
8462     """Check prerequisites.
8463
8464     This only checks the optional instance list against the existing names.
8465
8466     """
8467     if self.wanted_names is None:
8468       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8469
8470     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8471                              in self.wanted_names]
8472
8473   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8474     """Returns the status of a block device
8475
8476     """
8477     if self.op.static or not node:
8478       return None
8479
8480     self.cfg.SetDiskID(dev, node)
8481
8482     result = self.rpc.call_blockdev_find(node, dev)
8483     if result.offline:
8484       return None
8485
8486     result.Raise("Can't compute disk status for %s" % instance_name)
8487
8488     status = result.payload
8489     if status is None:
8490       return None
8491
8492     return (status.dev_path, status.major, status.minor,
8493             status.sync_percent, status.estimated_time,
8494             status.is_degraded, status.ldisk_status)
8495
8496   def _ComputeDiskStatus(self, instance, snode, dev):
8497     """Compute block device status.
8498
8499     """
8500     if dev.dev_type in constants.LDS_DRBD:
8501       # we change the snode then (otherwise we use the one passed in)
8502       if dev.logical_id[0] == instance.primary_node:
8503         snode = dev.logical_id[1]
8504       else:
8505         snode = dev.logical_id[0]
8506
8507     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8508                                               instance.name, dev)
8509     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8510
8511     if dev.children:
8512       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8513                       for child in dev.children]
8514     else:
8515       dev_children = []
8516
8517     data = {
8518       "iv_name": dev.iv_name,
8519       "dev_type": dev.dev_type,
8520       "logical_id": dev.logical_id,
8521       "physical_id": dev.physical_id,
8522       "pstatus": dev_pstatus,
8523       "sstatus": dev_sstatus,
8524       "children": dev_children,
8525       "mode": dev.mode,
8526       "size": dev.size,
8527       }
8528
8529     return data
8530
8531   def Exec(self, feedback_fn):
8532     """Gather and return data"""
8533     result = {}
8534
8535     cluster = self.cfg.GetClusterInfo()
8536
8537     for instance in self.wanted_instances:
8538       if not self.op.static:
8539         remote_info = self.rpc.call_instance_info(instance.primary_node,
8540                                                   instance.name,
8541                                                   instance.hypervisor)
8542         remote_info.Raise("Error checking node %s" % instance.primary_node)
8543         remote_info = remote_info.payload
8544         if remote_info and "state" in remote_info:
8545           remote_state = "up"
8546         else:
8547           remote_state = "down"
8548       else:
8549         remote_state = None
8550       if instance.admin_up:
8551         config_state = "up"
8552       else:
8553         config_state = "down"
8554
8555       disks = [self._ComputeDiskStatus(instance, None, device)
8556                for device in instance.disks]
8557
8558       idict = {
8559         "name": instance.name,
8560         "config_state": config_state,
8561         "run_state": remote_state,
8562         "pnode": instance.primary_node,
8563         "snodes": instance.secondary_nodes,
8564         "os": instance.os,
8565         # this happens to be the same format used for hooks
8566         "nics": _NICListToTuple(self, instance.nics),
8567         "disk_template": instance.disk_template,
8568         "disks": disks,
8569         "hypervisor": instance.hypervisor,
8570         "network_port": instance.network_port,
8571         "hv_instance": instance.hvparams,
8572         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8573         "be_instance": instance.beparams,
8574         "be_actual": cluster.FillBE(instance),
8575         "os_instance": instance.osparams,
8576         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8577         "serial_no": instance.serial_no,
8578         "mtime": instance.mtime,
8579         "ctime": instance.ctime,
8580         "uuid": instance.uuid,
8581         }
8582
8583       result[instance.name] = idict
8584
8585     return result
8586
8587
8588 class LUSetInstanceParams(LogicalUnit):
8589   """Modifies an instances's parameters.
8590
8591   """
8592   HPATH = "instance-modify"
8593   HTYPE = constants.HTYPE_INSTANCE
8594   _OP_PARAMS = [
8595     _PInstanceName,
8596     ("nics", _EmptyList, _TList),
8597     ("disks", _EmptyList, _TList),
8598     ("beparams", _EmptyDict, _TDict),
8599     ("hvparams", _EmptyDict, _TDict),
8600     ("disk_template", None, _TMaybeString),
8601     ("remote_node", None, _TMaybeString),
8602     ("os_name", None, _TMaybeString),
8603     ("force_variant", False, _TBool),
8604     ("osparams", None, _TOr(_TDict, _TNone)),
8605     _PForce,
8606     ]
8607   REQ_BGL = False
8608
8609   def CheckArguments(self):
8610     if not (self.op.nics or self.op.disks or self.op.disk_template or
8611             self.op.hvparams or self.op.beparams or self.op.os_name):
8612       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8613
8614     if self.op.hvparams:
8615       _CheckGlobalHvParams(self.op.hvparams)
8616
8617     # Disk validation
8618     disk_addremove = 0
8619     for disk_op, disk_dict in self.op.disks:
8620       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8621       if disk_op == constants.DDM_REMOVE:
8622         disk_addremove += 1
8623         continue
8624       elif disk_op == constants.DDM_ADD:
8625         disk_addremove += 1
8626       else:
8627         if not isinstance(disk_op, int):
8628           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8629         if not isinstance(disk_dict, dict):
8630           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8631           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8632
8633       if disk_op == constants.DDM_ADD:
8634         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8635         if mode not in constants.DISK_ACCESS_SET:
8636           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8637                                      errors.ECODE_INVAL)
8638         size = disk_dict.get('size', None)
8639         if size is None:
8640           raise errors.OpPrereqError("Required disk parameter size missing",
8641                                      errors.ECODE_INVAL)
8642         try:
8643           size = int(size)
8644         except (TypeError, ValueError), err:
8645           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8646                                      str(err), errors.ECODE_INVAL)
8647         disk_dict['size'] = size
8648       else:
8649         # modification of disk
8650         if 'size' in disk_dict:
8651           raise errors.OpPrereqError("Disk size change not possible, use"
8652                                      " grow-disk", errors.ECODE_INVAL)
8653
8654     if disk_addremove > 1:
8655       raise errors.OpPrereqError("Only one disk add or remove operation"
8656                                  " supported at a time", errors.ECODE_INVAL)
8657
8658     if self.op.disks and self.op.disk_template is not None:
8659       raise errors.OpPrereqError("Disk template conversion and other disk"
8660                                  " changes not supported at the same time",
8661                                  errors.ECODE_INVAL)
8662
8663     if self.op.disk_template:
8664       _CheckDiskTemplate(self.op.disk_template)
8665       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8666           self.op.remote_node is None):
8667         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8668                                    " one requires specifying a secondary node",
8669                                    errors.ECODE_INVAL)
8670
8671     # NIC validation
8672     nic_addremove = 0
8673     for nic_op, nic_dict in self.op.nics:
8674       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8675       if nic_op == constants.DDM_REMOVE:
8676         nic_addremove += 1
8677         continue
8678       elif nic_op == constants.DDM_ADD:
8679         nic_addremove += 1
8680       else:
8681         if not isinstance(nic_op, int):
8682           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8683         if not isinstance(nic_dict, dict):
8684           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8685           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8686
8687       # nic_dict should be a dict
8688       nic_ip = nic_dict.get('ip', None)
8689       if nic_ip is not None:
8690         if nic_ip.lower() == constants.VALUE_NONE:
8691           nic_dict['ip'] = None
8692         else:
8693           if not netutils.IP4Address.IsValid(nic_ip):
8694             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8695                                        errors.ECODE_INVAL)
8696
8697       nic_bridge = nic_dict.get('bridge', None)
8698       nic_link = nic_dict.get('link', None)
8699       if nic_bridge and nic_link:
8700         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8701                                    " at the same time", errors.ECODE_INVAL)
8702       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8703         nic_dict['bridge'] = None
8704       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8705         nic_dict['link'] = None
8706
8707       if nic_op == constants.DDM_ADD:
8708         nic_mac = nic_dict.get('mac', None)
8709         if nic_mac is None:
8710           nic_dict['mac'] = constants.VALUE_AUTO
8711
8712       if 'mac' in nic_dict:
8713         nic_mac = nic_dict['mac']
8714         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8715           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8716
8717         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8718           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8719                                      " modifying an existing nic",
8720                                      errors.ECODE_INVAL)
8721
8722     if nic_addremove > 1:
8723       raise errors.OpPrereqError("Only one NIC add or remove operation"
8724                                  " supported at a time", errors.ECODE_INVAL)
8725
8726   def ExpandNames(self):
8727     self._ExpandAndLockInstance()
8728     self.needed_locks[locking.LEVEL_NODE] = []
8729     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8730
8731   def DeclareLocks(self, level):
8732     if level == locking.LEVEL_NODE:
8733       self._LockInstancesNodes()
8734       if self.op.disk_template and self.op.remote_node:
8735         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8736         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8737
8738   def BuildHooksEnv(self):
8739     """Build hooks env.
8740
8741     This runs on the master, primary and secondaries.
8742
8743     """
8744     args = dict()
8745     if constants.BE_MEMORY in self.be_new:
8746       args['memory'] = self.be_new[constants.BE_MEMORY]
8747     if constants.BE_VCPUS in self.be_new:
8748       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8749     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8750     # information at all.
8751     if self.op.nics:
8752       args['nics'] = []
8753       nic_override = dict(self.op.nics)
8754       for idx, nic in enumerate(self.instance.nics):
8755         if idx in nic_override:
8756           this_nic_override = nic_override[idx]
8757         else:
8758           this_nic_override = {}
8759         if 'ip' in this_nic_override:
8760           ip = this_nic_override['ip']
8761         else:
8762           ip = nic.ip
8763         if 'mac' in this_nic_override:
8764           mac = this_nic_override['mac']
8765         else:
8766           mac = nic.mac
8767         if idx in self.nic_pnew:
8768           nicparams = self.nic_pnew[idx]
8769         else:
8770           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8771         mode = nicparams[constants.NIC_MODE]
8772         link = nicparams[constants.NIC_LINK]
8773         args['nics'].append((ip, mac, mode, link))
8774       if constants.DDM_ADD in nic_override:
8775         ip = nic_override[constants.DDM_ADD].get('ip', None)
8776         mac = nic_override[constants.DDM_ADD]['mac']
8777         nicparams = self.nic_pnew[constants.DDM_ADD]
8778         mode = nicparams[constants.NIC_MODE]
8779         link = nicparams[constants.NIC_LINK]
8780         args['nics'].append((ip, mac, mode, link))
8781       elif constants.DDM_REMOVE in nic_override:
8782         del args['nics'][-1]
8783
8784     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8785     if self.op.disk_template:
8786       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8787     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8788     return env, nl, nl
8789
8790   def CheckPrereq(self):
8791     """Check prerequisites.
8792
8793     This only checks the instance list against the existing names.
8794
8795     """
8796     # checking the new params on the primary/secondary nodes
8797
8798     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8799     cluster = self.cluster = self.cfg.GetClusterInfo()
8800     assert self.instance is not None, \
8801       "Cannot retrieve locked instance %s" % self.op.instance_name
8802     pnode = instance.primary_node
8803     nodelist = list(instance.all_nodes)
8804
8805     # OS change
8806     if self.op.os_name and not self.op.force:
8807       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8808                       self.op.force_variant)
8809       instance_os = self.op.os_name
8810     else:
8811       instance_os = instance.os
8812
8813     if self.op.disk_template:
8814       if instance.disk_template == self.op.disk_template:
8815         raise errors.OpPrereqError("Instance already has disk template %s" %
8816                                    instance.disk_template, errors.ECODE_INVAL)
8817
8818       if (instance.disk_template,
8819           self.op.disk_template) not in self._DISK_CONVERSIONS:
8820         raise errors.OpPrereqError("Unsupported disk template conversion from"
8821                                    " %s to %s" % (instance.disk_template,
8822                                                   self.op.disk_template),
8823                                    errors.ECODE_INVAL)
8824       _CheckInstanceDown(self, instance, "cannot change disk template")
8825       if self.op.disk_template in constants.DTS_NET_MIRROR:
8826         if self.op.remote_node == pnode:
8827           raise errors.OpPrereqError("Given new secondary node %s is the same"
8828                                      " as the primary node of the instance" %
8829                                      self.op.remote_node, errors.ECODE_STATE)
8830         _CheckNodeOnline(self, self.op.remote_node)
8831         _CheckNodeNotDrained(self, self.op.remote_node)
8832         disks = [{"size": d.size} for d in instance.disks]
8833         required = _ComputeDiskSize(self.op.disk_template, disks)
8834         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8835
8836     # hvparams processing
8837     if self.op.hvparams:
8838       hv_type = instance.hypervisor
8839       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8840       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8841       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8842
8843       # local check
8844       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8845       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8846       self.hv_new = hv_new # the new actual values
8847       self.hv_inst = i_hvdict # the new dict (without defaults)
8848     else:
8849       self.hv_new = self.hv_inst = {}
8850
8851     # beparams processing
8852     if self.op.beparams:
8853       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8854                                    use_none=True)
8855       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8856       be_new = cluster.SimpleFillBE(i_bedict)
8857       self.be_new = be_new # the new actual values
8858       self.be_inst = i_bedict # the new dict (without defaults)
8859     else:
8860       self.be_new = self.be_inst = {}
8861
8862     # osparams processing
8863     if self.op.osparams:
8864       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8865       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8866       self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8867       self.os_inst = i_osdict # the new dict (without defaults)
8868     else:
8869       self.os_new = self.os_inst = {}
8870
8871     self.warn = []
8872
8873     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8874       mem_check_list = [pnode]
8875       if be_new[constants.BE_AUTO_BALANCE]:
8876         # either we changed auto_balance to yes or it was from before
8877         mem_check_list.extend(instance.secondary_nodes)
8878       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8879                                                   instance.hypervisor)
8880       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8881                                          instance.hypervisor)
8882       pninfo = nodeinfo[pnode]
8883       msg = pninfo.fail_msg
8884       if msg:
8885         # Assume the primary node is unreachable and go ahead
8886         self.warn.append("Can't get info from primary node %s: %s" %
8887                          (pnode,  msg))
8888       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8889         self.warn.append("Node data from primary node %s doesn't contain"
8890                          " free memory information" % pnode)
8891       elif instance_info.fail_msg:
8892         self.warn.append("Can't get instance runtime information: %s" %
8893                         instance_info.fail_msg)
8894       else:
8895         if instance_info.payload:
8896           current_mem = int(instance_info.payload['memory'])
8897         else:
8898           # Assume instance not running
8899           # (there is a slight race condition here, but it's not very probable,
8900           # and we have no other way to check)
8901           current_mem = 0
8902         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8903                     pninfo.payload['memory_free'])
8904         if miss_mem > 0:
8905           raise errors.OpPrereqError("This change will prevent the instance"
8906                                      " from starting, due to %d MB of memory"
8907                                      " missing on its primary node" % miss_mem,
8908                                      errors.ECODE_NORES)
8909
8910       if be_new[constants.BE_AUTO_BALANCE]:
8911         for node, nres in nodeinfo.items():
8912           if node not in instance.secondary_nodes:
8913             continue
8914           msg = nres.fail_msg
8915           if msg:
8916             self.warn.append("Can't get info from secondary node %s: %s" %
8917                              (node, msg))
8918           elif not isinstance(nres.payload.get('memory_free', None), int):
8919             self.warn.append("Secondary node %s didn't return free"
8920                              " memory information" % node)
8921           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8922             self.warn.append("Not enough memory to failover instance to"
8923                              " secondary node %s" % node)
8924
8925     # NIC processing
8926     self.nic_pnew = {}
8927     self.nic_pinst = {}
8928     for nic_op, nic_dict in self.op.nics:
8929       if nic_op == constants.DDM_REMOVE:
8930         if not instance.nics:
8931           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8932                                      errors.ECODE_INVAL)
8933         continue
8934       if nic_op != constants.DDM_ADD:
8935         # an existing nic
8936         if not instance.nics:
8937           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8938                                      " no NICs" % nic_op,
8939                                      errors.ECODE_INVAL)
8940         if nic_op < 0 or nic_op >= len(instance.nics):
8941           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8942                                      " are 0 to %d" %
8943                                      (nic_op, len(instance.nics) - 1),
8944                                      errors.ECODE_INVAL)
8945         old_nic_params = instance.nics[nic_op].nicparams
8946         old_nic_ip = instance.nics[nic_op].ip
8947       else:
8948         old_nic_params = {}
8949         old_nic_ip = None
8950
8951       update_params_dict = dict([(key, nic_dict[key])
8952                                  for key in constants.NICS_PARAMETERS
8953                                  if key in nic_dict])
8954
8955       if 'bridge' in nic_dict:
8956         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8957
8958       new_nic_params = _GetUpdatedParams(old_nic_params,
8959                                          update_params_dict)
8960       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8961       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8962       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8963       self.nic_pinst[nic_op] = new_nic_params
8964       self.nic_pnew[nic_op] = new_filled_nic_params
8965       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8966
8967       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8968         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8969         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8970         if msg:
8971           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8972           if self.op.force:
8973             self.warn.append(msg)
8974           else:
8975             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8976       if new_nic_mode == constants.NIC_MODE_ROUTED:
8977         if 'ip' in nic_dict:
8978           nic_ip = nic_dict['ip']
8979         else:
8980           nic_ip = old_nic_ip
8981         if nic_ip is None:
8982           raise errors.OpPrereqError('Cannot set the nic ip to None'
8983                                      ' on a routed nic', errors.ECODE_INVAL)
8984       if 'mac' in nic_dict:
8985         nic_mac = nic_dict['mac']
8986         if nic_mac is None:
8987           raise errors.OpPrereqError('Cannot set the nic mac to None',
8988                                      errors.ECODE_INVAL)
8989         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8990           # otherwise generate the mac
8991           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8992         else:
8993           # or validate/reserve the current one
8994           try:
8995             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8996           except errors.ReservationError:
8997             raise errors.OpPrereqError("MAC address %s already in use"
8998                                        " in cluster" % nic_mac,
8999                                        errors.ECODE_NOTUNIQUE)
9000
9001     # DISK processing
9002     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9003       raise errors.OpPrereqError("Disk operations not supported for"
9004                                  " diskless instances",
9005                                  errors.ECODE_INVAL)
9006     for disk_op, _ in self.op.disks:
9007       if disk_op == constants.DDM_REMOVE:
9008         if len(instance.disks) == 1:
9009           raise errors.OpPrereqError("Cannot remove the last disk of"
9010                                      " an instance", errors.ECODE_INVAL)
9011         _CheckInstanceDown(self, instance, "cannot remove disks")
9012
9013       if (disk_op == constants.DDM_ADD and
9014           len(instance.nics) >= constants.MAX_DISKS):
9015         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9016                                    " add more" % constants.MAX_DISKS,
9017                                    errors.ECODE_STATE)
9018       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9019         # an existing disk
9020         if disk_op < 0 or disk_op >= len(instance.disks):
9021           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9022                                      " are 0 to %d" %
9023                                      (disk_op, len(instance.disks)),
9024                                      errors.ECODE_INVAL)
9025
9026     return
9027
9028   def _ConvertPlainToDrbd(self, feedback_fn):
9029     """Converts an instance from plain to drbd.
9030
9031     """
9032     feedback_fn("Converting template to drbd")
9033     instance = self.instance
9034     pnode = instance.primary_node
9035     snode = self.op.remote_node
9036
9037     # create a fake disk info for _GenerateDiskTemplate
9038     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9039     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9040                                       instance.name, pnode, [snode],
9041                                       disk_info, None, None, 0)
9042     info = _GetInstanceInfoText(instance)
9043     feedback_fn("Creating aditional volumes...")
9044     # first, create the missing data and meta devices
9045     for disk in new_disks:
9046       # unfortunately this is... not too nice
9047       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9048                             info, True)
9049       for child in disk.children:
9050         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9051     # at this stage, all new LVs have been created, we can rename the
9052     # old ones
9053     feedback_fn("Renaming original volumes...")
9054     rename_list = [(o, n.children[0].logical_id)
9055                    for (o, n) in zip(instance.disks, new_disks)]
9056     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9057     result.Raise("Failed to rename original LVs")
9058
9059     feedback_fn("Initializing DRBD devices...")
9060     # all child devices are in place, we can now create the DRBD devices
9061     for disk in new_disks:
9062       for node in [pnode, snode]:
9063         f_create = node == pnode
9064         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9065
9066     # at this point, the instance has been modified
9067     instance.disk_template = constants.DT_DRBD8
9068     instance.disks = new_disks
9069     self.cfg.Update(instance, feedback_fn)
9070
9071     # disks are created, waiting for sync
9072     disk_abort = not _WaitForSync(self, instance)
9073     if disk_abort:
9074       raise errors.OpExecError("There are some degraded disks for"
9075                                " this instance, please cleanup manually")
9076
9077   def _ConvertDrbdToPlain(self, feedback_fn):
9078     """Converts an instance from drbd to plain.
9079
9080     """
9081     instance = self.instance
9082     assert len(instance.secondary_nodes) == 1
9083     pnode = instance.primary_node
9084     snode = instance.secondary_nodes[0]
9085     feedback_fn("Converting template to plain")
9086
9087     old_disks = instance.disks
9088     new_disks = [d.children[0] for d in old_disks]
9089
9090     # copy over size and mode
9091     for parent, child in zip(old_disks, new_disks):
9092       child.size = parent.size
9093       child.mode = parent.mode
9094
9095     # update instance structure
9096     instance.disks = new_disks
9097     instance.disk_template = constants.DT_PLAIN
9098     self.cfg.Update(instance, feedback_fn)
9099
9100     feedback_fn("Removing volumes on the secondary node...")
9101     for disk in old_disks:
9102       self.cfg.SetDiskID(disk, snode)
9103       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9104       if msg:
9105         self.LogWarning("Could not remove block device %s on node %s,"
9106                         " continuing anyway: %s", disk.iv_name, snode, msg)
9107
9108     feedback_fn("Removing unneeded volumes on the primary node...")
9109     for idx, disk in enumerate(old_disks):
9110       meta = disk.children[1]
9111       self.cfg.SetDiskID(meta, pnode)
9112       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9113       if msg:
9114         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9115                         " continuing anyway: %s", idx, pnode, msg)
9116
9117
9118   def Exec(self, feedback_fn):
9119     """Modifies an instance.
9120
9121     All parameters take effect only at the next restart of the instance.
9122
9123     """
9124     # Process here the warnings from CheckPrereq, as we don't have a
9125     # feedback_fn there.
9126     for warn in self.warn:
9127       feedback_fn("WARNING: %s" % warn)
9128
9129     result = []
9130     instance = self.instance
9131     # disk changes
9132     for disk_op, disk_dict in self.op.disks:
9133       if disk_op == constants.DDM_REMOVE:
9134         # remove the last disk
9135         device = instance.disks.pop()
9136         device_idx = len(instance.disks)
9137         for node, disk in device.ComputeNodeTree(instance.primary_node):
9138           self.cfg.SetDiskID(disk, node)
9139           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9140           if msg:
9141             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9142                             " continuing anyway", device_idx, node, msg)
9143         result.append(("disk/%d" % device_idx, "remove"))
9144       elif disk_op == constants.DDM_ADD:
9145         # add a new disk
9146         if instance.disk_template == constants.DT_FILE:
9147           file_driver, file_path = instance.disks[0].logical_id
9148           file_path = os.path.dirname(file_path)
9149         else:
9150           file_driver = file_path = None
9151         disk_idx_base = len(instance.disks)
9152         new_disk = _GenerateDiskTemplate(self,
9153                                          instance.disk_template,
9154                                          instance.name, instance.primary_node,
9155                                          instance.secondary_nodes,
9156                                          [disk_dict],
9157                                          file_path,
9158                                          file_driver,
9159                                          disk_idx_base)[0]
9160         instance.disks.append(new_disk)
9161         info = _GetInstanceInfoText(instance)
9162
9163         logging.info("Creating volume %s for instance %s",
9164                      new_disk.iv_name, instance.name)
9165         # Note: this needs to be kept in sync with _CreateDisks
9166         #HARDCODE
9167         for node in instance.all_nodes:
9168           f_create = node == instance.primary_node
9169           try:
9170             _CreateBlockDev(self, node, instance, new_disk,
9171                             f_create, info, f_create)
9172           except errors.OpExecError, err:
9173             self.LogWarning("Failed to create volume %s (%s) on"
9174                             " node %s: %s",
9175                             new_disk.iv_name, new_disk, node, err)
9176         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9177                        (new_disk.size, new_disk.mode)))
9178       else:
9179         # change a given disk
9180         instance.disks[disk_op].mode = disk_dict['mode']
9181         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9182
9183     if self.op.disk_template:
9184       r_shut = _ShutdownInstanceDisks(self, instance)
9185       if not r_shut:
9186         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9187                                  " proceed with disk template conversion")
9188       mode = (instance.disk_template, self.op.disk_template)
9189       try:
9190         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9191       except:
9192         self.cfg.ReleaseDRBDMinors(instance.name)
9193         raise
9194       result.append(("disk_template", self.op.disk_template))
9195
9196     # NIC changes
9197     for nic_op, nic_dict in self.op.nics:
9198       if nic_op == constants.DDM_REMOVE:
9199         # remove the last nic
9200         del instance.nics[-1]
9201         result.append(("nic.%d" % len(instance.nics), "remove"))
9202       elif nic_op == constants.DDM_ADD:
9203         # mac and bridge should be set, by now
9204         mac = nic_dict['mac']
9205         ip = nic_dict.get('ip', None)
9206         nicparams = self.nic_pinst[constants.DDM_ADD]
9207         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9208         instance.nics.append(new_nic)
9209         result.append(("nic.%d" % (len(instance.nics) - 1),
9210                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9211                        (new_nic.mac, new_nic.ip,
9212                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9213                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9214                        )))
9215       else:
9216         for key in 'mac', 'ip':
9217           if key in nic_dict:
9218             setattr(instance.nics[nic_op], key, nic_dict[key])
9219         if nic_op in self.nic_pinst:
9220           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9221         for key, val in nic_dict.iteritems():
9222           result.append(("nic.%s/%d" % (key, nic_op), val))
9223
9224     # hvparams changes
9225     if self.op.hvparams:
9226       instance.hvparams = self.hv_inst
9227       for key, val in self.op.hvparams.iteritems():
9228         result.append(("hv/%s" % key, val))
9229
9230     # beparams changes
9231     if self.op.beparams:
9232       instance.beparams = self.be_inst
9233       for key, val in self.op.beparams.iteritems():
9234         result.append(("be/%s" % key, val))
9235
9236     # OS change
9237     if self.op.os_name:
9238       instance.os = self.op.os_name
9239
9240     # osparams changes
9241     if self.op.osparams:
9242       instance.osparams = self.os_inst
9243       for key, val in self.op.osparams.iteritems():
9244         result.append(("os/%s" % key, val))
9245
9246     self.cfg.Update(instance, feedback_fn)
9247
9248     return result
9249
9250   _DISK_CONVERSIONS = {
9251     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9252     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9253     }
9254
9255
9256 class LUQueryExports(NoHooksLU):
9257   """Query the exports list
9258
9259   """
9260   _OP_PARAMS = [
9261     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9262     ("use_locking", False, _TBool),
9263     ]
9264   REQ_BGL = False
9265
9266   def ExpandNames(self):
9267     self.needed_locks = {}
9268     self.share_locks[locking.LEVEL_NODE] = 1
9269     if not self.op.nodes:
9270       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9271     else:
9272       self.needed_locks[locking.LEVEL_NODE] = \
9273         _GetWantedNodes(self, self.op.nodes)
9274
9275   def Exec(self, feedback_fn):
9276     """Compute the list of all the exported system images.
9277
9278     @rtype: dict
9279     @return: a dictionary with the structure node->(export-list)
9280         where export-list is a list of the instances exported on
9281         that node.
9282
9283     """
9284     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9285     rpcresult = self.rpc.call_export_list(self.nodes)
9286     result = {}
9287     for node in rpcresult:
9288       if rpcresult[node].fail_msg:
9289         result[node] = False
9290       else:
9291         result[node] = rpcresult[node].payload
9292
9293     return result
9294
9295
9296 class LUPrepareExport(NoHooksLU):
9297   """Prepares an instance for an export and returns useful information.
9298
9299   """
9300   _OP_PARAMS = [
9301     _PInstanceName,
9302     ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9303     ]
9304   REQ_BGL = False
9305
9306   def ExpandNames(self):
9307     self._ExpandAndLockInstance()
9308
9309   def CheckPrereq(self):
9310     """Check prerequisites.
9311
9312     """
9313     instance_name = self.op.instance_name
9314
9315     self.instance = self.cfg.GetInstanceInfo(instance_name)
9316     assert self.instance is not None, \
9317           "Cannot retrieve locked instance %s" % self.op.instance_name
9318     _CheckNodeOnline(self, self.instance.primary_node)
9319
9320     self._cds = _GetClusterDomainSecret()
9321
9322   def Exec(self, feedback_fn):
9323     """Prepares an instance for an export.
9324
9325     """
9326     instance = self.instance
9327
9328     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9329       salt = utils.GenerateSecret(8)
9330
9331       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9332       result = self.rpc.call_x509_cert_create(instance.primary_node,
9333                                               constants.RIE_CERT_VALIDITY)
9334       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9335
9336       (name, cert_pem) = result.payload
9337
9338       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9339                                              cert_pem)
9340
9341       return {
9342         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9343         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9344                           salt),
9345         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9346         }
9347
9348     return None
9349
9350
9351 class LUExportInstance(LogicalUnit):
9352   """Export an instance to an image in the cluster.
9353
9354   """
9355   HPATH = "instance-export"
9356   HTYPE = constants.HTYPE_INSTANCE
9357   _OP_PARAMS = [
9358     _PInstanceName,
9359     ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9360     ("shutdown", True, _TBool),
9361     _PShutdownTimeout,
9362     ("remove_instance", False, _TBool),
9363     ("ignore_remove_failures", False, _TBool),
9364     ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9365     ("x509_key_name", None, _TOr(_TList, _TNone)),
9366     ("destination_x509_ca", None, _TMaybeString),
9367     ]
9368   REQ_BGL = False
9369
9370   def CheckArguments(self):
9371     """Check the arguments.
9372
9373     """
9374     self.x509_key_name = self.op.x509_key_name
9375     self.dest_x509_ca_pem = self.op.destination_x509_ca
9376
9377     if self.op.remove_instance and not self.op.shutdown:
9378       raise errors.OpPrereqError("Can not remove instance without shutting it"
9379                                  " down before")
9380
9381     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9382       if not self.x509_key_name:
9383         raise errors.OpPrereqError("Missing X509 key name for encryption",
9384                                    errors.ECODE_INVAL)
9385
9386       if not self.dest_x509_ca_pem:
9387         raise errors.OpPrereqError("Missing destination X509 CA",
9388                                    errors.ECODE_INVAL)
9389
9390   def ExpandNames(self):
9391     self._ExpandAndLockInstance()
9392
9393     # Lock all nodes for local exports
9394     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9395       # FIXME: lock only instance primary and destination node
9396       #
9397       # Sad but true, for now we have do lock all nodes, as we don't know where
9398       # the previous export might be, and in this LU we search for it and
9399       # remove it from its current node. In the future we could fix this by:
9400       #  - making a tasklet to search (share-lock all), then create the
9401       #    new one, then one to remove, after
9402       #  - removing the removal operation altogether
9403       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9404
9405   def DeclareLocks(self, level):
9406     """Last minute lock declaration."""
9407     # All nodes are locked anyway, so nothing to do here.
9408
9409   def BuildHooksEnv(self):
9410     """Build hooks env.
9411
9412     This will run on the master, primary node and target node.
9413
9414     """
9415     env = {
9416       "EXPORT_MODE": self.op.mode,
9417       "EXPORT_NODE": self.op.target_node,
9418       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9419       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9420       # TODO: Generic function for boolean env variables
9421       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9422       }
9423
9424     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9425
9426     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9427
9428     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9429       nl.append(self.op.target_node)
9430
9431     return env, nl, nl
9432
9433   def CheckPrereq(self):
9434     """Check prerequisites.
9435
9436     This checks that the instance and node names are valid.
9437
9438     """
9439     instance_name = self.op.instance_name
9440
9441     self.instance = self.cfg.GetInstanceInfo(instance_name)
9442     assert self.instance is not None, \
9443           "Cannot retrieve locked instance %s" % self.op.instance_name
9444     _CheckNodeOnline(self, self.instance.primary_node)
9445
9446     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9447       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9448       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9449       assert self.dst_node is not None
9450
9451       _CheckNodeOnline(self, self.dst_node.name)
9452       _CheckNodeNotDrained(self, self.dst_node.name)
9453
9454       self._cds = None
9455       self.dest_disk_info = None
9456       self.dest_x509_ca = None
9457
9458     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9459       self.dst_node = None
9460
9461       if len(self.op.target_node) != len(self.instance.disks):
9462         raise errors.OpPrereqError(("Received destination information for %s"
9463                                     " disks, but instance %s has %s disks") %
9464                                    (len(self.op.target_node), instance_name,
9465                                     len(self.instance.disks)),
9466                                    errors.ECODE_INVAL)
9467
9468       cds = _GetClusterDomainSecret()
9469
9470       # Check X509 key name
9471       try:
9472         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9473       except (TypeError, ValueError), err:
9474         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9475
9476       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9477         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9478                                    errors.ECODE_INVAL)
9479
9480       # Load and verify CA
9481       try:
9482         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9483       except OpenSSL.crypto.Error, err:
9484         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9485                                    (err, ), errors.ECODE_INVAL)
9486
9487       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9488       if errcode is not None:
9489         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9490                                    (msg, ), errors.ECODE_INVAL)
9491
9492       self.dest_x509_ca = cert
9493
9494       # Verify target information
9495       disk_info = []
9496       for idx, disk_data in enumerate(self.op.target_node):
9497         try:
9498           (host, port, magic) = \
9499             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9500         except errors.GenericError, err:
9501           raise errors.OpPrereqError("Target info for disk %s: %s" %
9502                                      (idx, err), errors.ECODE_INVAL)
9503
9504         disk_info.append((host, port, magic))
9505
9506       assert len(disk_info) == len(self.op.target_node)
9507       self.dest_disk_info = disk_info
9508
9509     else:
9510       raise errors.ProgrammerError("Unhandled export mode %r" %
9511                                    self.op.mode)
9512
9513     # instance disk type verification
9514     # TODO: Implement export support for file-based disks
9515     for disk in self.instance.disks:
9516       if disk.dev_type == constants.LD_FILE:
9517         raise errors.OpPrereqError("Export not supported for instances with"
9518                                    " file-based disks", errors.ECODE_INVAL)
9519
9520   def _CleanupExports(self, feedback_fn):
9521     """Removes exports of current instance from all other nodes.
9522
9523     If an instance in a cluster with nodes A..D was exported to node C, its
9524     exports will be removed from the nodes A, B and D.
9525
9526     """
9527     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9528
9529     nodelist = self.cfg.GetNodeList()
9530     nodelist.remove(self.dst_node.name)
9531
9532     # on one-node clusters nodelist will be empty after the removal
9533     # if we proceed the backup would be removed because OpQueryExports
9534     # substitutes an empty list with the full cluster node list.
9535     iname = self.instance.name
9536     if nodelist:
9537       feedback_fn("Removing old exports for instance %s" % iname)
9538       exportlist = self.rpc.call_export_list(nodelist)
9539       for node in exportlist:
9540         if exportlist[node].fail_msg:
9541           continue
9542         if iname in exportlist[node].payload:
9543           msg = self.rpc.call_export_remove(node, iname).fail_msg
9544           if msg:
9545             self.LogWarning("Could not remove older export for instance %s"
9546                             " on node %s: %s", iname, node, msg)
9547
9548   def Exec(self, feedback_fn):
9549     """Export an instance to an image in the cluster.
9550
9551     """
9552     assert self.op.mode in constants.EXPORT_MODES
9553
9554     instance = self.instance
9555     src_node = instance.primary_node
9556
9557     if self.op.shutdown:
9558       # shutdown the instance, but not the disks
9559       feedback_fn("Shutting down instance %s" % instance.name)
9560       result = self.rpc.call_instance_shutdown(src_node, instance,
9561                                                self.op.shutdown_timeout)
9562       # TODO: Maybe ignore failures if ignore_remove_failures is set
9563       result.Raise("Could not shutdown instance %s on"
9564                    " node %s" % (instance.name, src_node))
9565
9566     # set the disks ID correctly since call_instance_start needs the
9567     # correct drbd minor to create the symlinks
9568     for disk in instance.disks:
9569       self.cfg.SetDiskID(disk, src_node)
9570
9571     activate_disks = (not instance.admin_up)
9572
9573     if activate_disks:
9574       # Activate the instance disks if we'exporting a stopped instance
9575       feedback_fn("Activating disks for %s" % instance.name)
9576       _StartInstanceDisks(self, instance, None)
9577
9578     try:
9579       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9580                                                      instance)
9581
9582       helper.CreateSnapshots()
9583       try:
9584         if (self.op.shutdown and instance.admin_up and
9585             not self.op.remove_instance):
9586           assert not activate_disks
9587           feedback_fn("Starting instance %s" % instance.name)
9588           result = self.rpc.call_instance_start(src_node, instance, None, None)
9589           msg = result.fail_msg
9590           if msg:
9591             feedback_fn("Failed to start instance: %s" % msg)
9592             _ShutdownInstanceDisks(self, instance)
9593             raise errors.OpExecError("Could not start instance: %s" % msg)
9594
9595         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9596           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9597         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9598           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9599           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9600
9601           (key_name, _, _) = self.x509_key_name
9602
9603           dest_ca_pem = \
9604             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9605                                             self.dest_x509_ca)
9606
9607           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9608                                                      key_name, dest_ca_pem,
9609                                                      timeouts)
9610       finally:
9611         helper.Cleanup()
9612
9613       # Check for backwards compatibility
9614       assert len(dresults) == len(instance.disks)
9615       assert compat.all(isinstance(i, bool) for i in dresults), \
9616              "Not all results are boolean: %r" % dresults
9617
9618     finally:
9619       if activate_disks:
9620         feedback_fn("Deactivating disks for %s" % instance.name)
9621         _ShutdownInstanceDisks(self, instance)
9622
9623     if not (compat.all(dresults) and fin_resu):
9624       failures = []
9625       if not fin_resu:
9626         failures.append("export finalization")
9627       if not compat.all(dresults):
9628         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9629                                if not dsk)
9630         failures.append("disk export: disk(s) %s" % fdsk)
9631
9632       raise errors.OpExecError("Export failed, errors in %s" %
9633                                utils.CommaJoin(failures))
9634
9635     # At this point, the export was successful, we can cleanup/finish
9636
9637     # Remove instance if requested
9638     if self.op.remove_instance:
9639       feedback_fn("Removing instance %s" % instance.name)
9640       _RemoveInstance(self, feedback_fn, instance,
9641                       self.op.ignore_remove_failures)
9642
9643     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9644       self._CleanupExports(feedback_fn)
9645
9646     return fin_resu, dresults
9647
9648
9649 class LURemoveExport(NoHooksLU):
9650   """Remove exports related to the named instance.
9651
9652   """
9653   _OP_PARAMS = [
9654     _PInstanceName,
9655     ]
9656   REQ_BGL = False
9657
9658   def ExpandNames(self):
9659     self.needed_locks = {}
9660     # We need all nodes to be locked in order for RemoveExport to work, but we
9661     # don't need to lock the instance itself, as nothing will happen to it (and
9662     # we can remove exports also for a removed instance)
9663     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9664
9665   def Exec(self, feedback_fn):
9666     """Remove any export.
9667
9668     """
9669     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9670     # If the instance was not found we'll try with the name that was passed in.
9671     # This will only work if it was an FQDN, though.
9672     fqdn_warn = False
9673     if not instance_name:
9674       fqdn_warn = True
9675       instance_name = self.op.instance_name
9676
9677     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9678     exportlist = self.rpc.call_export_list(locked_nodes)
9679     found = False
9680     for node in exportlist:
9681       msg = exportlist[node].fail_msg
9682       if msg:
9683         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9684         continue
9685       if instance_name in exportlist[node].payload:
9686         found = True
9687         result = self.rpc.call_export_remove(node, instance_name)
9688         msg = result.fail_msg
9689         if msg:
9690           logging.error("Could not remove export for instance %s"
9691                         " on node %s: %s", instance_name, node, msg)
9692
9693     if fqdn_warn and not found:
9694       feedback_fn("Export not found. If trying to remove an export belonging"
9695                   " to a deleted instance please use its Fully Qualified"
9696                   " Domain Name.")
9697
9698
9699 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9700   """Generic tags LU.
9701
9702   This is an abstract class which is the parent of all the other tags LUs.
9703
9704   """
9705
9706   def ExpandNames(self):
9707     self.needed_locks = {}
9708     if self.op.kind == constants.TAG_NODE:
9709       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9710       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9711     elif self.op.kind == constants.TAG_INSTANCE:
9712       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9713       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9714
9715   def CheckPrereq(self):
9716     """Check prerequisites.
9717
9718     """
9719     if self.op.kind == constants.TAG_CLUSTER:
9720       self.target = self.cfg.GetClusterInfo()
9721     elif self.op.kind == constants.TAG_NODE:
9722       self.target = self.cfg.GetNodeInfo(self.op.name)
9723     elif self.op.kind == constants.TAG_INSTANCE:
9724       self.target = self.cfg.GetInstanceInfo(self.op.name)
9725     else:
9726       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9727                                  str(self.op.kind), errors.ECODE_INVAL)
9728
9729
9730 class LUGetTags(TagsLU):
9731   """Returns the tags of a given object.
9732
9733   """
9734   _OP_PARAMS = [
9735     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9736     ("name", _NoDefault, _TNonEmptyString),
9737     ]
9738   REQ_BGL = False
9739
9740   def Exec(self, feedback_fn):
9741     """Returns the tag list.
9742
9743     """
9744     return list(self.target.GetTags())
9745
9746
9747 class LUSearchTags(NoHooksLU):
9748   """Searches the tags for a given pattern.
9749
9750   """
9751   _OP_PARAMS = [
9752     ("pattern", _NoDefault, _TNonEmptyString),
9753     ]
9754   REQ_BGL = False
9755
9756   def ExpandNames(self):
9757     self.needed_locks = {}
9758
9759   def CheckPrereq(self):
9760     """Check prerequisites.
9761
9762     This checks the pattern passed for validity by compiling it.
9763
9764     """
9765     try:
9766       self.re = re.compile(self.op.pattern)
9767     except re.error, err:
9768       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9769                                  (self.op.pattern, err), errors.ECODE_INVAL)
9770
9771   def Exec(self, feedback_fn):
9772     """Returns the tag list.
9773
9774     """
9775     cfg = self.cfg
9776     tgts = [("/cluster", cfg.GetClusterInfo())]
9777     ilist = cfg.GetAllInstancesInfo().values()
9778     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9779     nlist = cfg.GetAllNodesInfo().values()
9780     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9781     results = []
9782     for path, target in tgts:
9783       for tag in target.GetTags():
9784         if self.re.search(tag):
9785           results.append((path, tag))
9786     return results
9787
9788
9789 class LUAddTags(TagsLU):
9790   """Sets a tag on a given object.
9791
9792   """
9793   _OP_PARAMS = [
9794     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9795     ("name", _NoDefault, _TNonEmptyString),
9796     ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9797     ]
9798   REQ_BGL = False
9799
9800   def CheckPrereq(self):
9801     """Check prerequisites.
9802
9803     This checks the type and length of the tag name and value.
9804
9805     """
9806     TagsLU.CheckPrereq(self)
9807     for tag in self.op.tags:
9808       objects.TaggableObject.ValidateTag(tag)
9809
9810   def Exec(self, feedback_fn):
9811     """Sets the tag.
9812
9813     """
9814     try:
9815       for tag in self.op.tags:
9816         self.target.AddTag(tag)
9817     except errors.TagError, err:
9818       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9819     self.cfg.Update(self.target, feedback_fn)
9820
9821
9822 class LUDelTags(TagsLU):
9823   """Delete a list of tags from a given object.
9824
9825   """
9826   _OP_PARAMS = [
9827     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9828     ("name", _NoDefault, _TNonEmptyString),
9829     ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9830     ]
9831   REQ_BGL = False
9832
9833   def CheckPrereq(self):
9834     """Check prerequisites.
9835
9836     This checks that we have the given tag.
9837
9838     """
9839     TagsLU.CheckPrereq(self)
9840     for tag in self.op.tags:
9841       objects.TaggableObject.ValidateTag(tag)
9842     del_tags = frozenset(self.op.tags)
9843     cur_tags = self.target.GetTags()
9844     if not del_tags <= cur_tags:
9845       diff_tags = del_tags - cur_tags
9846       diff_names = ["'%s'" % tag for tag in diff_tags]
9847       diff_names.sort()
9848       raise errors.OpPrereqError("Tag(s) %s not found" %
9849                                  (",".join(diff_names)), errors.ECODE_NOENT)
9850
9851   def Exec(self, feedback_fn):
9852     """Remove the tag from the object.
9853
9854     """
9855     for tag in self.op.tags:
9856       self.target.RemoveTag(tag)
9857     self.cfg.Update(self.target, feedback_fn)
9858
9859
9860 class LUTestDelay(NoHooksLU):
9861   """Sleep for a specified amount of time.
9862
9863   This LU sleeps on the master and/or nodes for a specified amount of
9864   time.
9865
9866   """
9867   _OP_PARAMS = [
9868     ("duration", _NoDefault, _TFloat),
9869     ("on_master", True, _TBool),
9870     ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9871     ("repeat", 0, _TPositiveInt)
9872     ]
9873   REQ_BGL = False
9874
9875   def ExpandNames(self):
9876     """Expand names and set required locks.
9877
9878     This expands the node list, if any.
9879
9880     """
9881     self.needed_locks = {}
9882     if self.op.on_nodes:
9883       # _GetWantedNodes can be used here, but is not always appropriate to use
9884       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9885       # more information.
9886       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9887       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9888
9889   def _TestDelay(self):
9890     """Do the actual sleep.
9891
9892     """
9893     if self.op.on_master:
9894       if not utils.TestDelay(self.op.duration):
9895         raise errors.OpExecError("Error during master delay test")
9896     if self.op.on_nodes:
9897       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9898       for node, node_result in result.items():
9899         node_result.Raise("Failure during rpc call to node %s" % node)
9900
9901   def Exec(self, feedback_fn):
9902     """Execute the test delay opcode, with the wanted repetitions.
9903
9904     """
9905     if self.op.repeat == 0:
9906       self._TestDelay()
9907     else:
9908       top_value = self.op.repeat - 1
9909       for i in range(self.op.repeat):
9910         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9911         self._TestDelay()
9912
9913
9914 class LUTestJobqueue(NoHooksLU):
9915   """Utility LU to test some aspects of the job queue.
9916
9917   """
9918   _OP_PARAMS = [
9919     ("notify_waitlock", False, _TBool),
9920     ("notify_exec", False, _TBool),
9921     ("log_messages", _EmptyList, _TListOf(_TString)),
9922     ("fail", False, _TBool),
9923     ]
9924   REQ_BGL = False
9925
9926   # Must be lower than default timeout for WaitForJobChange to see whether it
9927   # notices changed jobs
9928   _CLIENT_CONNECT_TIMEOUT = 20.0
9929   _CLIENT_CONFIRM_TIMEOUT = 60.0
9930
9931   @classmethod
9932   def _NotifyUsingSocket(cls, cb, errcls):
9933     """Opens a Unix socket and waits for another program to connect.
9934
9935     @type cb: callable
9936     @param cb: Callback to send socket name to client
9937     @type errcls: class
9938     @param errcls: Exception class to use for errors
9939
9940     """
9941     # Using a temporary directory as there's no easy way to create temporary
9942     # sockets without writing a custom loop around tempfile.mktemp and
9943     # socket.bind
9944     tmpdir = tempfile.mkdtemp()
9945     try:
9946       tmpsock = utils.PathJoin(tmpdir, "sock")
9947
9948       logging.debug("Creating temporary socket at %s", tmpsock)
9949       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
9950       try:
9951         sock.bind(tmpsock)
9952         sock.listen(1)
9953
9954         # Send details to client
9955         cb(tmpsock)
9956
9957         # Wait for client to connect before continuing
9958         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
9959         try:
9960           (conn, _) = sock.accept()
9961         except socket.error, err:
9962           raise errcls("Client didn't connect in time (%s)" % err)
9963       finally:
9964         sock.close()
9965     finally:
9966       # Remove as soon as client is connected
9967       shutil.rmtree(tmpdir)
9968
9969     # Wait for client to close
9970     try:
9971       try:
9972         # pylint: disable-msg=E1101
9973         # Instance of '_socketobject' has no ... member
9974         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
9975         conn.recv(1)
9976       except socket.error, err:
9977         raise errcls("Client failed to confirm notification (%s)" % err)
9978     finally:
9979       conn.close()
9980
9981   def _SendNotification(self, test, arg, sockname):
9982     """Sends a notification to the client.
9983
9984     @type test: string
9985     @param test: Test name
9986     @param arg: Test argument (depends on test)
9987     @type sockname: string
9988     @param sockname: Socket path
9989
9990     """
9991     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
9992
9993   def _Notify(self, prereq, test, arg):
9994     """Notifies the client of a test.
9995
9996     @type prereq: bool
9997     @param prereq: Whether this is a prereq-phase test
9998     @type test: string
9999     @param test: Test name
10000     @param arg: Test argument (depends on test)
10001
10002     """
10003     if prereq:
10004       errcls = errors.OpPrereqError
10005     else:
10006       errcls = errors.OpExecError
10007
10008     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10009                                                   test, arg),
10010                                    errcls)
10011
10012   def CheckArguments(self):
10013     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10014     self.expandnames_calls = 0
10015
10016   def ExpandNames(self):
10017     checkargs_calls = getattr(self, "checkargs_calls", 0)
10018     if checkargs_calls < 1:
10019       raise errors.ProgrammerError("CheckArguments was not called")
10020
10021     self.expandnames_calls += 1
10022
10023     if self.op.notify_waitlock:
10024       self._Notify(True, constants.JQT_EXPANDNAMES, None)
10025
10026     self.LogInfo("Expanding names")
10027
10028     # Get lock on master node (just to get a lock, not for a particular reason)
10029     self.needed_locks = {
10030       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10031       }
10032
10033   def Exec(self, feedback_fn):
10034     if self.expandnames_calls < 1:
10035       raise errors.ProgrammerError("ExpandNames was not called")
10036
10037     if self.op.notify_exec:
10038       self._Notify(False, constants.JQT_EXEC, None)
10039
10040     self.LogInfo("Executing")
10041
10042     if self.op.log_messages:
10043       for idx, msg in enumerate(self.op.log_messages):
10044         self.LogInfo("Sending log message %s", idx + 1)
10045         feedback_fn(constants.JQT_MSGPREFIX + msg)
10046         # Report how many test messages have been sent
10047         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10048
10049     if self.op.fail:
10050       raise errors.OpExecError("Opcode failure was requested")
10051
10052     return True
10053
10054
10055 class IAllocator(object):
10056   """IAllocator framework.
10057
10058   An IAllocator instance has three sets of attributes:
10059     - cfg that is needed to query the cluster
10060     - input data (all members of the _KEYS class attribute are required)
10061     - four buffer attributes (in|out_data|text), that represent the
10062       input (to the external script) in text and data structure format,
10063       and the output from it, again in two formats
10064     - the result variables from the script (success, info, nodes) for
10065       easy usage
10066
10067   """
10068   # pylint: disable-msg=R0902
10069   # lots of instance attributes
10070   _ALLO_KEYS = [
10071     "name", "mem_size", "disks", "disk_template",
10072     "os", "tags", "nics", "vcpus", "hypervisor",
10073     ]
10074   _RELO_KEYS = [
10075     "name", "relocate_from",
10076     ]
10077   _EVAC_KEYS = [
10078     "evac_nodes",
10079     ]
10080
10081   def __init__(self, cfg, rpc, mode, **kwargs):
10082     self.cfg = cfg
10083     self.rpc = rpc
10084     # init buffer variables
10085     self.in_text = self.out_text = self.in_data = self.out_data = None
10086     # init all input fields so that pylint is happy
10087     self.mode = mode
10088     self.mem_size = self.disks = self.disk_template = None
10089     self.os = self.tags = self.nics = self.vcpus = None
10090     self.hypervisor = None
10091     self.relocate_from = None
10092     self.name = None
10093     self.evac_nodes = None
10094     # computed fields
10095     self.required_nodes = None
10096     # init result fields
10097     self.success = self.info = self.result = None
10098     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10099       keyset = self._ALLO_KEYS
10100       fn = self._AddNewInstance
10101     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10102       keyset = self._RELO_KEYS
10103       fn = self._AddRelocateInstance
10104     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10105       keyset = self._EVAC_KEYS
10106       fn = self._AddEvacuateNodes
10107     else:
10108       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10109                                    " IAllocator" % self.mode)
10110     for key in kwargs:
10111       if key not in keyset:
10112         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10113                                      " IAllocator" % key)
10114       setattr(self, key, kwargs[key])
10115
10116     for key in keyset:
10117       if key not in kwargs:
10118         raise errors.ProgrammerError("Missing input parameter '%s' to"
10119                                      " IAllocator" % key)
10120     self._BuildInputData(fn)
10121
10122   def _ComputeClusterData(self):
10123     """Compute the generic allocator input data.
10124
10125     This is the data that is independent of the actual operation.
10126
10127     """
10128     cfg = self.cfg
10129     cluster_info = cfg.GetClusterInfo()
10130     # cluster data
10131     data = {
10132       "version": constants.IALLOCATOR_VERSION,
10133       "cluster_name": cfg.GetClusterName(),
10134       "cluster_tags": list(cluster_info.GetTags()),
10135       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10136       # we don't have job IDs
10137       }
10138     iinfo = cfg.GetAllInstancesInfo().values()
10139     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10140
10141     # node data
10142     node_results = {}
10143     node_list = cfg.GetNodeList()
10144
10145     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10146       hypervisor_name = self.hypervisor
10147     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10148       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10149     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10150       hypervisor_name = cluster_info.enabled_hypervisors[0]
10151
10152     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10153                                         hypervisor_name)
10154     node_iinfo = \
10155       self.rpc.call_all_instances_info(node_list,
10156                                        cluster_info.enabled_hypervisors)
10157     for nname, nresult in node_data.items():
10158       # first fill in static (config-based) values
10159       ninfo = cfg.GetNodeInfo(nname)
10160       pnr = {
10161         "tags": list(ninfo.GetTags()),
10162         "primary_ip": ninfo.primary_ip,
10163         "secondary_ip": ninfo.secondary_ip,
10164         "offline": ninfo.offline,
10165         "drained": ninfo.drained,
10166         "master_candidate": ninfo.master_candidate,
10167         }
10168
10169       if not (ninfo.offline or ninfo.drained):
10170         nresult.Raise("Can't get data for node %s" % nname)
10171         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10172                                 nname)
10173         remote_info = nresult.payload
10174
10175         for attr in ['memory_total', 'memory_free', 'memory_dom0',
10176                      'vg_size', 'vg_free', 'cpu_total']:
10177           if attr not in remote_info:
10178             raise errors.OpExecError("Node '%s' didn't return attribute"
10179                                      " '%s'" % (nname, attr))
10180           if not isinstance(remote_info[attr], int):
10181             raise errors.OpExecError("Node '%s' returned invalid value"
10182                                      " for '%s': %s" %
10183                                      (nname, attr, remote_info[attr]))
10184         # compute memory used by primary instances
10185         i_p_mem = i_p_up_mem = 0
10186         for iinfo, beinfo in i_list:
10187           if iinfo.primary_node == nname:
10188             i_p_mem += beinfo[constants.BE_MEMORY]
10189             if iinfo.name not in node_iinfo[nname].payload:
10190               i_used_mem = 0
10191             else:
10192               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10193             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10194             remote_info['memory_free'] -= max(0, i_mem_diff)
10195
10196             if iinfo.admin_up:
10197               i_p_up_mem += beinfo[constants.BE_MEMORY]
10198
10199         # compute memory used by instances
10200         pnr_dyn = {
10201           "total_memory": remote_info['memory_total'],
10202           "reserved_memory": remote_info['memory_dom0'],
10203           "free_memory": remote_info['memory_free'],
10204           "total_disk": remote_info['vg_size'],
10205           "free_disk": remote_info['vg_free'],
10206           "total_cpus": remote_info['cpu_total'],
10207           "i_pri_memory": i_p_mem,
10208           "i_pri_up_memory": i_p_up_mem,
10209           }
10210         pnr.update(pnr_dyn)
10211
10212       node_results[nname] = pnr
10213     data["nodes"] = node_results
10214
10215     # instance data
10216     instance_data = {}
10217     for iinfo, beinfo in i_list:
10218       nic_data = []
10219       for nic in iinfo.nics:
10220         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10221         nic_dict = {"mac": nic.mac,
10222                     "ip": nic.ip,
10223                     "mode": filled_params[constants.NIC_MODE],
10224                     "link": filled_params[constants.NIC_LINK],
10225                    }
10226         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10227           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10228         nic_data.append(nic_dict)
10229       pir = {
10230         "tags": list(iinfo.GetTags()),
10231         "admin_up": iinfo.admin_up,
10232         "vcpus": beinfo[constants.BE_VCPUS],
10233         "memory": beinfo[constants.BE_MEMORY],
10234         "os": iinfo.os,
10235         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10236         "nics": nic_data,
10237         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10238         "disk_template": iinfo.disk_template,
10239         "hypervisor": iinfo.hypervisor,
10240         }
10241       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10242                                                  pir["disks"])
10243       instance_data[iinfo.name] = pir
10244
10245     data["instances"] = instance_data
10246
10247     self.in_data = data
10248
10249   def _AddNewInstance(self):
10250     """Add new instance data to allocator structure.
10251
10252     This in combination with _AllocatorGetClusterData will create the
10253     correct structure needed as input for the allocator.
10254
10255     The checks for the completeness of the opcode must have already been
10256     done.
10257
10258     """
10259     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10260
10261     if self.disk_template in constants.DTS_NET_MIRROR:
10262       self.required_nodes = 2
10263     else:
10264       self.required_nodes = 1
10265     request = {
10266       "name": self.name,
10267       "disk_template": self.disk_template,
10268       "tags": self.tags,
10269       "os": self.os,
10270       "vcpus": self.vcpus,
10271       "memory": self.mem_size,
10272       "disks": self.disks,
10273       "disk_space_total": disk_space,
10274       "nics": self.nics,
10275       "required_nodes": self.required_nodes,
10276       }
10277     return request
10278
10279   def _AddRelocateInstance(self):
10280     """Add relocate instance data to allocator structure.
10281
10282     This in combination with _IAllocatorGetClusterData will create the
10283     correct structure needed as input for the allocator.
10284
10285     The checks for the completeness of the opcode must have already been
10286     done.
10287
10288     """
10289     instance = self.cfg.GetInstanceInfo(self.name)
10290     if instance is None:
10291       raise errors.ProgrammerError("Unknown instance '%s' passed to"
10292                                    " IAllocator" % self.name)
10293
10294     if instance.disk_template not in constants.DTS_NET_MIRROR:
10295       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10296                                  errors.ECODE_INVAL)
10297
10298     if len(instance.secondary_nodes) != 1:
10299       raise errors.OpPrereqError("Instance has not exactly one secondary node",
10300                                  errors.ECODE_STATE)
10301
10302     self.required_nodes = 1
10303     disk_sizes = [{'size': disk.size} for disk in instance.disks]
10304     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10305
10306     request = {
10307       "name": self.name,
10308       "disk_space_total": disk_space,
10309       "required_nodes": self.required_nodes,
10310       "relocate_from": self.relocate_from,
10311       }
10312     return request
10313
10314   def _AddEvacuateNodes(self):
10315     """Add evacuate nodes data to allocator structure.
10316
10317     """
10318     request = {
10319       "evac_nodes": self.evac_nodes
10320       }
10321     return request
10322
10323   def _BuildInputData(self, fn):
10324     """Build input data structures.
10325
10326     """
10327     self._ComputeClusterData()
10328
10329     request = fn()
10330     request["type"] = self.mode
10331     self.in_data["request"] = request
10332
10333     self.in_text = serializer.Dump(self.in_data)
10334
10335   def Run(self, name, validate=True, call_fn=None):
10336     """Run an instance allocator and return the results.
10337
10338     """
10339     if call_fn is None:
10340       call_fn = self.rpc.call_iallocator_runner
10341
10342     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10343     result.Raise("Failure while running the iallocator script")
10344
10345     self.out_text = result.payload
10346     if validate:
10347       self._ValidateResult()
10348
10349   def _ValidateResult(self):
10350     """Process the allocator results.
10351
10352     This will process and if successful save the result in
10353     self.out_data and the other parameters.
10354
10355     """
10356     try:
10357       rdict = serializer.Load(self.out_text)
10358     except Exception, err:
10359       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10360
10361     if not isinstance(rdict, dict):
10362       raise errors.OpExecError("Can't parse iallocator results: not a dict")
10363
10364     # TODO: remove backwards compatiblity in later versions
10365     if "nodes" in rdict and "result" not in rdict:
10366       rdict["result"] = rdict["nodes"]
10367       del rdict["nodes"]
10368
10369     for key in "success", "info", "result":
10370       if key not in rdict:
10371         raise errors.OpExecError("Can't parse iallocator results:"
10372                                  " missing key '%s'" % key)
10373       setattr(self, key, rdict[key])
10374
10375     if not isinstance(rdict["result"], list):
10376       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10377                                " is not a list")
10378     self.out_data = rdict
10379
10380
10381 class LUTestAllocator(NoHooksLU):
10382   """Run allocator tests.
10383
10384   This LU runs the allocator tests
10385
10386   """
10387   _OP_PARAMS = [
10388     ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10389     ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10390     ("name", _NoDefault, _TNonEmptyString),
10391     ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10392       _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10393                _TOr(_TNone, _TNonEmptyString))))),
10394     ("disks", _NoDefault, _TOr(_TNone, _TList)),
10395     ("hypervisor", None, _TMaybeString),
10396     ("allocator", None, _TMaybeString),
10397     ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10398     ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10399     ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10400     ("os", None, _TMaybeString),
10401     ("disk_template", None, _TMaybeString),
10402     ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10403     ]
10404
10405   def CheckPrereq(self):
10406     """Check prerequisites.
10407
10408     This checks the opcode parameters depending on the director and mode test.
10409
10410     """
10411     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10412       for attr in ["mem_size", "disks", "disk_template",
10413                    "os", "tags", "nics", "vcpus"]:
10414         if not hasattr(self.op, attr):
10415           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10416                                      attr, errors.ECODE_INVAL)
10417       iname = self.cfg.ExpandInstanceName(self.op.name)
10418       if iname is not None:
10419         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10420                                    iname, errors.ECODE_EXISTS)
10421       if not isinstance(self.op.nics, list):
10422         raise errors.OpPrereqError("Invalid parameter 'nics'",
10423                                    errors.ECODE_INVAL)
10424       if not isinstance(self.op.disks, list):
10425         raise errors.OpPrereqError("Invalid parameter 'disks'",
10426                                    errors.ECODE_INVAL)
10427       for row in self.op.disks:
10428         if (not isinstance(row, dict) or
10429             "size" not in row or
10430             not isinstance(row["size"], int) or
10431             "mode" not in row or
10432             row["mode"] not in ['r', 'w']):
10433           raise errors.OpPrereqError("Invalid contents of the 'disks'"
10434                                      " parameter", errors.ECODE_INVAL)
10435       if self.op.hypervisor is None:
10436         self.op.hypervisor = self.cfg.GetHypervisorType()
10437     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10438       fname = _ExpandInstanceName(self.cfg, self.op.name)
10439       self.op.name = fname
10440       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10441     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10442       if not hasattr(self.op, "evac_nodes"):
10443         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10444                                    " opcode input", errors.ECODE_INVAL)
10445     else:
10446       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10447                                  self.op.mode, errors.ECODE_INVAL)
10448
10449     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10450       if self.op.allocator is None:
10451         raise errors.OpPrereqError("Missing allocator name",
10452                                    errors.ECODE_INVAL)
10453     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10454       raise errors.OpPrereqError("Wrong allocator test '%s'" %
10455                                  self.op.direction, errors.ECODE_INVAL)
10456
10457   def Exec(self, feedback_fn):
10458     """Run the allocator test.
10459
10460     """
10461     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10462       ial = IAllocator(self.cfg, self.rpc,
10463                        mode=self.op.mode,
10464                        name=self.op.name,
10465                        mem_size=self.op.mem_size,
10466                        disks=self.op.disks,
10467                        disk_template=self.op.disk_template,
10468                        os=self.op.os,
10469                        tags=self.op.tags,
10470                        nics=self.op.nics,
10471                        vcpus=self.op.vcpus,
10472                        hypervisor=self.op.hypervisor,
10473                        )
10474     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10475       ial = IAllocator(self.cfg, self.rpc,
10476                        mode=self.op.mode,
10477                        name=self.op.name,
10478                        relocate_from=list(self.relocate_from),
10479                        )
10480     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10481       ial = IAllocator(self.cfg, self.rpc,
10482                        mode=self.op.mode,
10483                        evac_nodes=self.op.evac_nodes)
10484     else:
10485       raise errors.ProgrammerError("Uncatched mode %s in"
10486                                    " LUTestAllocator.Exec", self.op.mode)
10487
10488     if self.op.direction == constants.IALLOCATOR_DIR_IN:
10489       result = ial.in_text
10490     else:
10491       ial.Run(self.op.allocator, validate=False)
10492       result = ial.out_text
10493     return result