code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39
  40 from ganeti import ssh
  41 from ganeti import utils
  42 from ganeti import errors
  43 from ganeti import hypervisor
  44 from ganeti import locking
  45 from ganeti import constants
  46 from ganeti import objects
  47 from ganeti import serializer
  48 from ganeti import ssconf
  49 from ganeti import uidpool
  50 from ganeti import compat
  51 from ganeti import masterd
  52
  53 import ganeti.masterd.instance # pylint: disable-msg=W0611
  54
  55
  56 # Modifiable default values; need to define these here before the
  57 # actual LUs
  58
  59 def _EmptyList():
  60   """Returns an empty list.
  61
  62   """
  63   return []
  64
  65
  66 def _EmptyDict():
  67   """Returns an empty dict.
  68
  69   """
  70   return {}
  71
  72
  73 # Some basic types
  74 def _TNotNone(val):
  75   """Checks if the given value is not None.
  76
  77   """
  78   return val is not None
  79
  80
  81 def _TNone(val):
  82   """Checks if the given value is None.
  83
  84   """
  85   return val is None
  86
  87
  88 def _TBool(val):
  89   """Checks if the given value is a boolean.
  90
  91   """
  92   return isinstance(val, bool)
  93
  94
  95 def _TInt(val):
  96   """Checks if the given value is an integer.
  97
  98   """
  99   return isinstance(val, int)
 100
 101
 102 def _TFloat(val):
 103   """Checks if the given value is a float.
 104
 105   """
 106   return isinstance(val, float)
 107
 108
 109 def _TString(val):
 110   """Checks if the given value is a string.
 111
 112   """
 113   return isinstance(val, basestring)
 114
 115
 116 def _TTrue(val):
 117   """Checks if a given value evaluates to a boolean True value.
 118
 119   """
 120   return bool(val)
 121
 122
 123 def _TElemOf(target_list):
 124   """Builds a function that checks if a given value is a member of a list.
 125
 126   """
 127   return lambda val: val in target_list
 128
 129
 130 # Container types
 131 def _TList(val):
 132   """Checks if the given value is a list.
 133
 134   """
 135   return isinstance(val, list)
 136
 137
 138 def _TDict(val):
 139   """Checks if the given value is a dictionary.
 140
 141   """
 142   return isinstance(val, dict)
 143
 144
 145 # Combinator types
 146 def _TAnd(*args):
 147   """Combine multiple functions using an AND operation.
 148
 149   """
 150   def fn(val):
 151     return compat.all(t(val) for t in args)
 152   return fn
 153
 154
 155 def _TOr(*args):
 156   """Combine multiple functions using an AND operation.
 157
 158   """
 159   def fn(val):
 160     return compat.any(t(val) for t in args)
 161   return fn
 162
 163
 164 # Type aliases
 165
 166 # non-empty string
 167 _TNonEmptyString = _TAnd(_TString, _TTrue)
 168
 169
 170 # positive integer
 171 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
 172
 173
 174 def _TListOf(my_type):
 175   """Checks if a given value is a list with all elements of the same type.
 176
 177   """
 178   return _TAnd(_TList,
 179                lambda lst: compat.all(my_type(v) for v in lst))
 180
 181
 182 def _TDictOf(key_type, val_type):
 183   """Checks a dict type for the type of its key/values.
 184
 185   """
 186   return _TAnd(_TDict,
 187                lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
 188                                 and compat.all(val_type(v)
 189                                                for v in my_dict.values())))
 190
 191
 192 # End types
 193 class LogicalUnit(object):
 194   """Logical Unit base class.
 195
 196   Subclasses must follow these rules:
 197     - implement ExpandNames
 198     - implement CheckPrereq (except when tasklets are used)
 199     - implement Exec (except when tasklets are used)
 200     - implement BuildHooksEnv
 201     - redefine HPATH and HTYPE
 202     - optionally redefine their run requirements:
 203         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 204
 205   Note that all commands require root permissions.
 206
 207   @ivar dry_run_result: the value (if any) that will be returned to the caller
 208       in dry-run mode (signalled by opcode dry_run parameter)
 209   @cvar _OP_DEFS: a list of opcode attributes and the defaults values
 210       they should get if not already existing
 211
 212   """
 213   HPATH = None
 214   HTYPE = None
 215   _OP_REQP = []
 216   _OP_DEFS = []
 217   REQ_BGL = True
 218
 219   def __init__(self, processor, op, context, rpc):
 220     """Constructor for LogicalUnit.
 221
 222     This needs to be overridden in derived classes in order to check op
 223     validity.
 224
 225     """
 226     self.proc = processor
 227     self.op = op
 228     self.cfg = context.cfg
 229     self.context = context
 230     self.rpc = rpc
 231     # Dicts used to declare locking needs to mcpu
 232     self.needed_locks = None
 233     self.acquired_locks = {}
 234     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 235     self.add_locks = {}
 236     self.remove_locks = {}
 237     # Used to force good behavior when calling helper functions
 238     self.recalculate_locks = {}
 239     self.__ssh = None
 240     # logging
 241     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 242     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 243     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 244     # support for dry-run
 245     self.dry_run_result = None
 246     # support for generic debug attribute
 247     if (not hasattr(self.op, "debug_level") or
 248         not isinstance(self.op.debug_level, int)):
 249       self.op.debug_level = 0
 250
 251     # Tasklets
 252     self.tasklets = None
 253
 254     for aname, aval in self._OP_DEFS:
 255       if not hasattr(self.op, aname):
 256         if callable(aval):
 257           dval = aval()
 258         else:
 259           dval = aval
 260         setattr(self.op, aname, dval)
 261
 262     for attr_name, test in self._OP_REQP:
 263       if not hasattr(op, attr_name):
 264         raise errors.OpPrereqError("Required parameter '%s' missing" %
 265                                    attr_name, errors.ECODE_INVAL)
 266       attr_val = getattr(op, attr_name, None)
 267       if not callable(test):
 268         raise errors.ProgrammerError("Validation for parameter '%s' failed,"
 269                                      " given type is not a proper type (%s)" %
 270                                      (attr_name, test))
 271       if not test(attr_val):
 272         logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
 273                       self.op.OP_ID, attr_name, type(attr_val), attr_val)
 274         raise errors.OpPrereqError("Parameter '%s' has invalid type" %
 275                                    attr_name, errors.ECODE_INVAL)
 276
 277     self.CheckArguments()
 278
 279   def __GetSSH(self):
 280     """Returns the SshRunner object
 281
 282     """
 283     if not self.__ssh:
 284       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 285     return self.__ssh
 286
 287   ssh = property(fget=__GetSSH)
 288
 289   def CheckArguments(self):
 290     """Check syntactic validity for the opcode arguments.
 291
 292     This method is for doing a simple syntactic check and ensure
 293     validity of opcode parameters, without any cluster-related
 294     checks. While the same can be accomplished in ExpandNames and/or
 295     CheckPrereq, doing these separate is better because:
 296
 297       - ExpandNames is left as as purely a lock-related function
 298       - CheckPrereq is run after we have acquired locks (and possible
 299         waited for them)
 300
 301     The function is allowed to change the self.op attribute so that
 302     later methods can no longer worry about missing parameters.
 303
 304     """
 305     pass
 306
 307   def ExpandNames(self):
 308     """Expand names for this LU.
 309
 310     This method is called before starting to execute the opcode, and it should
 311     update all the parameters of the opcode to their canonical form (e.g. a
 312     short node name must be fully expanded after this method has successfully
 313     completed). This way locking, hooks, logging, ecc. can work correctly.
 314
 315     LUs which implement this method must also populate the self.needed_locks
 316     member, as a dict with lock levels as keys, and a list of needed lock names
 317     as values. Rules:
 318
 319       - use an empty dict if you don't need any lock
 320       - if you don't need any lock at a particular level omit that level
 321       - don't put anything for the BGL level
 322       - if you want all locks at a level use locking.ALL_SET as a value
 323
 324     If you need to share locks (rather than acquire them exclusively) at one
 325     level you can modify self.share_locks, setting a true value (usually 1) for
 326     that level. By default locks are not shared.
 327
 328     This function can also define a list of tasklets, which then will be
 329     executed in order instead of the usual LU-level CheckPrereq and Exec
 330     functions, if those are not defined by the LU.
 331
 332     Examples::
 333
 334       # Acquire all nodes and one instance
 335       self.needed_locks = {
 336         locking.LEVEL_NODE: locking.ALL_SET,
 337         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 338       }
 339       # Acquire just two nodes
 340       self.needed_locks = {
 341         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 342       }
 343       # Acquire no locks
 344       self.needed_locks = {} # No, you can't leave it to the default value None
 345
 346     """
 347     # The implementation of this method is mandatory only if the new LU is
 348     # concurrent, so that old LUs don't need to be changed all at the same
 349     # time.
 350     if self.REQ_BGL:
 351       self.needed_locks = {} # Exclusive LUs don't need locks.
 352     else:
 353       raise NotImplementedError
 354
 355   def DeclareLocks(self, level):
 356     """Declare LU locking needs for a level
 357
 358     While most LUs can just declare their locking needs at ExpandNames time,
 359     sometimes there's the need to calculate some locks after having acquired
 360     the ones before. This function is called just before acquiring locks at a
 361     particular level, but after acquiring the ones at lower levels, and permits
 362     such calculations. It can be used to modify self.needed_locks, and by
 363     default it does nothing.
 364
 365     This function is only called if you have something already set in
 366     self.needed_locks for the level.
 367
 368     @param level: Locking level which is going to be locked
 369     @type level: member of ganeti.locking.LEVELS
 370
 371     """
 372
 373   def CheckPrereq(self):
 374     """Check prerequisites for this LU.
 375
 376     This method should check that the prerequisites for the execution
 377     of this LU are fulfilled. It can do internode communication, but
 378     it should be idempotent - no cluster or system changes are
 379     allowed.
 380
 381     The method should raise errors.OpPrereqError in case something is
 382     not fulfilled. Its return value is ignored.
 383
 384     This method should also update all the parameters of the opcode to
 385     their canonical form if it hasn't been done by ExpandNames before.
 386
 387     """
 388     if self.tasklets is not None:
 389       for (idx, tl) in enumerate(self.tasklets):
 390         logging.debug("Checking prerequisites for tasklet %s/%s",
 391                       idx + 1, len(self.tasklets))
 392         tl.CheckPrereq()
 393     else:
 394       pass
 395
 396   def Exec(self, feedback_fn):
 397     """Execute the LU.
 398
 399     This method should implement the actual work. It should raise
 400     errors.OpExecError for failures that are somewhat dealt with in
 401     code, or expected.
 402
 403     """
 404     if self.tasklets is not None:
 405       for (idx, tl) in enumerate(self.tasklets):
 406         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 407         tl.Exec(feedback_fn)
 408     else:
 409       raise NotImplementedError
 410
 411   def BuildHooksEnv(self):
 412     """Build hooks environment for this LU.
 413
 414     This method should return a three-node tuple consisting of: a dict
 415     containing the environment that will be used for running the
 416     specific hook for this LU, a list of node names on which the hook
 417     should run before the execution, and a list of node names on which
 418     the hook should run after the execution.
 419
 420     The keys of the dict must not have 'GANETI_' prefixed as this will
 421     be handled in the hooks runner. Also note additional keys will be
 422     added by the hooks runner. If the LU doesn't define any
 423     environment, an empty dict (and not None) should be returned.
 424
 425     No nodes should be returned as an empty list (and not None).
 426
 427     Note that if the HPATH for a LU class is None, this function will
 428     not be called.
 429
 430     """
 431     raise NotImplementedError
 432
 433   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 434     """Notify the LU about the results of its hooks.
 435
 436     This method is called every time a hooks phase is executed, and notifies
 437     the Logical Unit about the hooks' result. The LU can then use it to alter
 438     its result based on the hooks.  By default the method does nothing and the
 439     previous result is passed back unchanged but any LU can define it if it
 440     wants to use the local cluster hook-scripts somehow.
 441
 442     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 443         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 444     @param hook_results: the results of the multi-node hooks rpc call
 445     @param feedback_fn: function used send feedback back to the caller
 446     @param lu_result: the previous Exec result this LU had, or None
 447         in the PRE phase
 448     @return: the new Exec result, based on the previous result
 449         and hook results
 450
 451     """
 452     # API must be kept, thus we ignore the unused argument and could
 453     # be a function warnings
 454     # pylint: disable-msg=W0613,R0201
 455     return lu_result
 456
 457   def _ExpandAndLockInstance(self):
 458     """Helper function to expand and lock an instance.
 459
 460     Many LUs that work on an instance take its name in self.op.instance_name
 461     and need to expand it and then declare the expanded name for locking. This
 462     function does it, and then updates self.op.instance_name to the expanded
 463     name. It also initializes needed_locks as a dict, if this hasn't been done
 464     before.
 465
 466     """
 467     if self.needed_locks is None:
 468       self.needed_locks = {}
 469     else:
 470       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 471         "_ExpandAndLockInstance called with instance-level locks set"
 472     self.op.instance_name = _ExpandInstanceName(self.cfg,
 473                                                 self.op.instance_name)
 474     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 475
 476   def _LockInstancesNodes(self, primary_only=False):
 477     """Helper function to declare instances' nodes for locking.
 478
 479     This function should be called after locking one or more instances to lock
 480     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 481     with all primary or secondary nodes for instances already locked and
 482     present in self.needed_locks[locking.LEVEL_INSTANCE].
 483
 484     It should be called from DeclareLocks, and for safety only works if
 485     self.recalculate_locks[locking.LEVEL_NODE] is set.
 486
 487     In the future it may grow parameters to just lock some instance's nodes, or
 488     to just lock primaries or secondary nodes, if needed.
 489
 490     If should be called in DeclareLocks in a way similar to::
 491
 492       if level == locking.LEVEL_NODE:
 493         self._LockInstancesNodes()
 494
 495     @type primary_only: boolean
 496     @param primary_only: only lock primary nodes of locked instances
 497
 498     """
 499     assert locking.LEVEL_NODE in self.recalculate_locks, \
 500       "_LockInstancesNodes helper function called with no nodes to recalculate"
 501
 502     # TODO: check if we're really been called with the instance locks held
 503
 504     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 505     # future we might want to have different behaviors depending on the value
 506     # of self.recalculate_locks[locking.LEVEL_NODE]
 507     wanted_nodes = []
 508     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 509       instance = self.context.cfg.GetInstanceInfo(instance_name)
 510       wanted_nodes.append(instance.primary_node)
 511       if not primary_only:
 512         wanted_nodes.extend(instance.secondary_nodes)
 513
 514     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 515       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 516     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 517       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 518
 519     del self.recalculate_locks[locking.LEVEL_NODE]
 520
 521
 522 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 523   """Simple LU which runs no hooks.
 524
 525   This LU is intended as a parent for other LogicalUnits which will
 526   run no hooks, in order to reduce duplicate code.
 527
 528   """
 529   HPATH = None
 530   HTYPE = None
 531
 532   def BuildHooksEnv(self):
 533     """Empty BuildHooksEnv for NoHooksLu.
 534
 535     This just raises an error.
 536
 537     """
 538     assert False, "BuildHooksEnv called for NoHooksLUs"
 539
 540
 541 class Tasklet:
 542   """Tasklet base class.
 543
 544   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 545   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 546   tasklets know nothing about locks.
 547
 548   Subclasses must follow these rules:
 549     - Implement CheckPrereq
 550     - Implement Exec
 551
 552   """
 553   def __init__(self, lu):
 554     self.lu = lu
 555
 556     # Shortcuts
 557     self.cfg = lu.cfg
 558     self.rpc = lu.rpc
 559
 560   def CheckPrereq(self):
 561     """Check prerequisites for this tasklets.
 562
 563     This method should check whether the prerequisites for the execution of
 564     this tasklet are fulfilled. It can do internode communication, but it
 565     should be idempotent - no cluster or system changes are allowed.
 566
 567     The method should raise errors.OpPrereqError in case something is not
 568     fulfilled. Its return value is ignored.
 569
 570     This method should also update all parameters to their canonical form if it
 571     hasn't been done before.
 572
 573     """
 574     pass
 575
 576   def Exec(self, feedback_fn):
 577     """Execute the tasklet.
 578
 579     This method should implement the actual work. It should raise
 580     errors.OpExecError for failures that are somewhat dealt with in code, or
 581     expected.
 582
 583     """
 584     raise NotImplementedError
 585
 586
 587 def _GetWantedNodes(lu, nodes):
 588   """Returns list of checked and expanded node names.
 589
 590   @type lu: L{LogicalUnit}
 591   @param lu: the logical unit on whose behalf we execute
 592   @type nodes: list
 593   @param nodes: list of node names or None for all nodes
 594   @rtype: list
 595   @return: the list of nodes, sorted
 596   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 597
 598   """
 599   if not nodes:
 600     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 601       " non-empty list of nodes whose name is to be expanded.")
 602
 603   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 604   return utils.NiceSort(wanted)
 605
 606
 607 def _GetWantedInstances(lu, instances):
 608   """Returns list of checked and expanded instance names.
 609
 610   @type lu: L{LogicalUnit}
 611   @param lu: the logical unit on whose behalf we execute
 612   @type instances: list
 613   @param instances: list of instance names or None for all instances
 614   @rtype: list
 615   @return: the list of instances, sorted
 616   @raise errors.OpPrereqError: if the instances parameter is wrong type
 617   @raise errors.OpPrereqError: if any of the passed instances is not found
 618
 619   """
 620   if instances:
 621     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 622   else:
 623     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 624   return wanted
 625
 626
 627 def _GetUpdatedParams(old_params, update_dict,
 628                       use_default=True, use_none=False):
 629   """Return the new version of a parameter dictionary.
 630
 631   @type old_params: dict
 632   @param old_params: old parameters
 633   @type update_dict: dict
 634   @param update_dict: dict containing new parameter values, or
 635       constants.VALUE_DEFAULT to reset the parameter to its default
 636       value
 637   @param use_default: boolean
 638   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 639       values as 'to be deleted' values
 640   @param use_none: boolean
 641   @type use_none: whether to recognise C{None} values as 'to be
 642       deleted' values
 643   @rtype: dict
 644   @return: the new parameter dictionary
 645
 646   """
 647   params_copy = copy.deepcopy(old_params)
 648   for key, val in update_dict.iteritems():
 649     if ((use_default and val == constants.VALUE_DEFAULT) or
 650         (use_none and val is None)):
 651       try:
 652         del params_copy[key]
 653       except KeyError:
 654         pass
 655     else:
 656       params_copy[key] = val
 657   return params_copy
 658
 659
 660 def _CheckOutputFields(static, dynamic, selected):
 661   """Checks whether all selected fields are valid.
 662
 663   @type static: L{utils.FieldSet}
 664   @param static: static fields set
 665   @type dynamic: L{utils.FieldSet}
 666   @param dynamic: dynamic fields set
 667
 668   """
 669   f = utils.FieldSet()
 670   f.Extend(static)
 671   f.Extend(dynamic)
 672
 673   delta = f.NonMatching(selected)
 674   if delta:
 675     raise errors.OpPrereqError("Unknown output fields selected: %s"
 676                                % ",".join(delta), errors.ECODE_INVAL)
 677
 678
 679 def _CheckBooleanOpField(op, name):
 680   """Validates boolean opcode parameters.
 681
 682   This will ensure that an opcode parameter is either a boolean value,
 683   or None (but that it always exists).
 684
 685   """
 686   val = getattr(op, name, None)
 687   if not (val is None or isinstance(val, bool)):
 688     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 689                                (name, str(val)), errors.ECODE_INVAL)
 690   setattr(op, name, val)
 691
 692
 693 def _CheckGlobalHvParams(params):
 694   """Validates that given hypervisor params are not global ones.
 695
 696   This will ensure that instances don't get customised versions of
 697   global params.
 698
 699   """
 700   used_globals = constants.HVC_GLOBALS.intersection(params)
 701   if used_globals:
 702     msg = ("The following hypervisor parameters are global and cannot"
 703            " be customized at instance level, please modify them at"
 704            " cluster level: %s" % utils.CommaJoin(used_globals))
 705     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 706
 707
 708 def _CheckNodeOnline(lu, node):
 709   """Ensure that a given node is online.
 710
 711   @param lu: the LU on behalf of which we make the check
 712   @param node: the node to check
 713   @raise errors.OpPrereqError: if the node is offline
 714
 715   """
 716   if lu.cfg.GetNodeInfo(node).offline:
 717     raise errors.OpPrereqError("Can't use offline node %s" % node,
 718                                errors.ECODE_INVAL)
 719
 720
 721 def _CheckNodeNotDrained(lu, node):
 722   """Ensure that a given node is not drained.
 723
 724   @param lu: the LU on behalf of which we make the check
 725   @param node: the node to check
 726   @raise errors.OpPrereqError: if the node is drained
 727
 728   """
 729   if lu.cfg.GetNodeInfo(node).drained:
 730     raise errors.OpPrereqError("Can't use drained node %s" % node,
 731                                errors.ECODE_INVAL)
 732
 733
 734 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 735   """Ensure that a node supports a given OS.
 736
 737   @param lu: the LU on behalf of which we make the check
 738   @param node: the node to check
 739   @param os_name: the OS to query about
 740   @param force_variant: whether to ignore variant errors
 741   @raise errors.OpPrereqError: if the node is not supporting the OS
 742
 743   """
 744   result = lu.rpc.call_os_get(node, os_name)
 745   result.Raise("OS '%s' not in supported OS list for node %s" %
 746                (os_name, node),
 747                prereq=True, ecode=errors.ECODE_INVAL)
 748   if not force_variant:
 749     _CheckOSVariant(result.payload, os_name)
 750
 751
 752 def _RequireFileStorage():
 753   """Checks that file storage is enabled.
 754
 755   @raise errors.OpPrereqError: when file storage is disabled
 756
 757   """
 758   if not constants.ENABLE_FILE_STORAGE:
 759     raise errors.OpPrereqError("File storage disabled at configure time",
 760                                errors.ECODE_INVAL)
 761
 762
 763 def _CheckDiskTemplate(template):
 764   """Ensure a given disk template is valid.
 765
 766   """
 767   if template not in constants.DISK_TEMPLATES:
 768     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 769            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 770     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 771   if template == constants.DT_FILE:
 772     _RequireFileStorage()
 773
 774
 775 def _CheckStorageType(storage_type):
 776   """Ensure a given storage type is valid.
 777
 778   """
 779   if storage_type not in constants.VALID_STORAGE_TYPES:
 780     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 781                                errors.ECODE_INVAL)
 782   if storage_type == constants.ST_FILE:
 783     _RequireFileStorage()
 784   return True
 785
 786
 787 def _GetClusterDomainSecret():
 788   """Reads the cluster domain secret.
 789
 790   """
 791   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 792                                strict=True)
 793
 794
 795 def _CheckInstanceDown(lu, instance, reason):
 796   """Ensure that an instance is not running."""
 797   if instance.admin_up:
 798     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 799                                (instance.name, reason), errors.ECODE_STATE)
 800
 801   pnode = instance.primary_node
 802   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 803   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 804               prereq=True, ecode=errors.ECODE_ENVIRON)
 805
 806   if instance.name in ins_l.payload:
 807     raise errors.OpPrereqError("Instance %s is running, %s" %
 808                                (instance.name, reason), errors.ECODE_STATE)
 809
 810
 811 def _ExpandItemName(fn, name, kind):
 812   """Expand an item name.
 813
 814   @param fn: the function to use for expansion
 815   @param name: requested item name
 816   @param kind: text description ('Node' or 'Instance')
 817   @return: the resolved (full) name
 818   @raise errors.OpPrereqError: if the item is not found
 819
 820   """
 821   full_name = fn(name)
 822   if full_name is None:
 823     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 824                                errors.ECODE_NOENT)
 825   return full_name
 826
 827
 828 def _ExpandNodeName(cfg, name):
 829   """Wrapper over L{_ExpandItemName} for nodes."""
 830   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 831
 832
 833 def _ExpandInstanceName(cfg, name):
 834   """Wrapper over L{_ExpandItemName} for instance."""
 835   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 836
 837
 838 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 839                           memory, vcpus, nics, disk_template, disks,
 840                           bep, hvp, hypervisor_name):
 841   """Builds instance related env variables for hooks
 842
 843   This builds the hook environment from individual variables.
 844
 845   @type name: string
 846   @param name: the name of the instance
 847   @type primary_node: string
 848   @param primary_node: the name of the instance's primary node
 849   @type secondary_nodes: list
 850   @param secondary_nodes: list of secondary nodes as strings
 851   @type os_type: string
 852   @param os_type: the name of the instance's OS
 853   @type status: boolean
 854   @param status: the should_run status of the instance
 855   @type memory: string
 856   @param memory: the memory size of the instance
 857   @type vcpus: string
 858   @param vcpus: the count of VCPUs the instance has
 859   @type nics: list
 860   @param nics: list of tuples (ip, mac, mode, link) representing
 861       the NICs the instance has
 862   @type disk_template: string
 863   @param disk_template: the disk template of the instance
 864   @type disks: list
 865   @param disks: the list of (size, mode) pairs
 866   @type bep: dict
 867   @param bep: the backend parameters for the instance
 868   @type hvp: dict
 869   @param hvp: the hypervisor parameters for the instance
 870   @type hypervisor_name: string
 871   @param hypervisor_name: the hypervisor for the instance
 872   @rtype: dict
 873   @return: the hook environment for this instance
 874
 875   """
 876   if status:
 877     str_status = "up"
 878   else:
 879     str_status = "down"
 880   env = {
 881     "OP_TARGET": name,
 882     "INSTANCE_NAME": name,
 883     "INSTANCE_PRIMARY": primary_node,
 884     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 885     "INSTANCE_OS_TYPE": os_type,
 886     "INSTANCE_STATUS": str_status,
 887     "INSTANCE_MEMORY": memory,
 888     "INSTANCE_VCPUS": vcpus,
 889     "INSTANCE_DISK_TEMPLATE": disk_template,
 890     "INSTANCE_HYPERVISOR": hypervisor_name,
 891   }
 892
 893   if nics:
 894     nic_count = len(nics)
 895     for idx, (ip, mac, mode, link) in enumerate(nics):
 896       if ip is None:
 897         ip = ""
 898       env["INSTANCE_NIC%d_IP" % idx] = ip
 899       env["INSTANCE_NIC%d_MAC" % idx] = mac
 900       env["INSTANCE_NIC%d_MODE" % idx] = mode
 901       env["INSTANCE_NIC%d_LINK" % idx] = link
 902       if mode == constants.NIC_MODE_BRIDGED:
 903         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 904   else:
 905     nic_count = 0
 906
 907   env["INSTANCE_NIC_COUNT"] = nic_count
 908
 909   if disks:
 910     disk_count = len(disks)
 911     for idx, (size, mode) in enumerate(disks):
 912       env["INSTANCE_DISK%d_SIZE" % idx] = size
 913       env["INSTANCE_DISK%d_MODE" % idx] = mode
 914   else:
 915     disk_count = 0
 916
 917   env["INSTANCE_DISK_COUNT"] = disk_count
 918
 919   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 920     for key, value in source.items():
 921       env["INSTANCE_%s_%s" % (kind, key)] = value
 922
 923   return env
 924
 925
 926 def _NICListToTuple(lu, nics):
 927   """Build a list of nic information tuples.
 928
 929   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 930   value in LUQueryInstanceData.
 931
 932   @type lu:  L{LogicalUnit}
 933   @param lu: the logical unit on whose behalf we execute
 934   @type nics: list of L{objects.NIC}
 935   @param nics: list of nics to convert to hooks tuples
 936
 937   """
 938   hooks_nics = []
 939   cluster = lu.cfg.GetClusterInfo()
 940   for nic in nics:
 941     ip = nic.ip
 942     mac = nic.mac
 943     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 944     mode = filled_params[constants.NIC_MODE]
 945     link = filled_params[constants.NIC_LINK]
 946     hooks_nics.append((ip, mac, mode, link))
 947   return hooks_nics
 948
 949
 950 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 951   """Builds instance related env variables for hooks from an object.
 952
 953   @type lu: L{LogicalUnit}
 954   @param lu: the logical unit on whose behalf we execute
 955   @type instance: L{objects.Instance}
 956   @param instance: the instance for which we should build the
 957       environment
 958   @type override: dict
 959   @param override: dictionary with key/values that will override
 960       our values
 961   @rtype: dict
 962   @return: the hook environment dictionary
 963
 964   """
 965   cluster = lu.cfg.GetClusterInfo()
 966   bep = cluster.FillBE(instance)
 967   hvp = cluster.FillHV(instance)
 968   args = {
 969     'name': instance.name,
 970     'primary_node': instance.primary_node,
 971     'secondary_nodes': instance.secondary_nodes,
 972     'os_type': instance.os,
 973     'status': instance.admin_up,
 974     'memory': bep[constants.BE_MEMORY],
 975     'vcpus': bep[constants.BE_VCPUS],
 976     'nics': _NICListToTuple(lu, instance.nics),
 977     'disk_template': instance.disk_template,
 978     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 979     'bep': bep,
 980     'hvp': hvp,
 981     'hypervisor_name': instance.hypervisor,
 982   }
 983   if override:
 984     args.update(override)
 985   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 986
 987
 988 def _AdjustCandidatePool(lu, exceptions):
 989   """Adjust the candidate pool after node operations.
 990
 991   """
 992   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 993   if mod_list:
 994     lu.LogInfo("Promoted nodes to master candidate role: %s",
 995                utils.CommaJoin(node.name for node in mod_list))
 996     for name in mod_list:
 997       lu.context.ReaddNode(name)
 998   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 999   if mc_now > mc_max:
1000     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1001                (mc_now, mc_max))
1002
1003
1004 def _DecideSelfPromotion(lu, exceptions=None):
1005   """Decide whether I should promote myself as a master candidate.
1006
1007   """
1008   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1009   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1010   # the new node will increase mc_max with one, so:
1011   mc_should = min(mc_should + 1, cp_size)
1012   return mc_now < mc_should
1013
1014
1015 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1016   """Check that the brigdes needed by a list of nics exist.
1017
1018   """
1019   cluster = lu.cfg.GetClusterInfo()
1020   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1021   brlist = [params[constants.NIC_LINK] for params in paramslist
1022             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1023   if brlist:
1024     result = lu.rpc.call_bridges_exist(target_node, brlist)
1025     result.Raise("Error checking bridges on destination node '%s'" %
1026                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1027
1028
1029 def _CheckInstanceBridgesExist(lu, instance, node=None):
1030   """Check that the brigdes needed by an instance exist.
1031
1032   """
1033   if node is None:
1034     node = instance.primary_node
1035   _CheckNicsBridgesExist(lu, instance.nics, node)
1036
1037
1038 def _CheckOSVariant(os_obj, name):
1039   """Check whether an OS name conforms to the os variants specification.
1040
1041   @type os_obj: L{objects.OS}
1042   @param os_obj: OS object to check
1043   @type name: string
1044   @param name: OS name passed by the user, to check for validity
1045
1046   """
1047   if not os_obj.supported_variants:
1048     return
1049   try:
1050     variant = name.split("+", 1)[1]
1051   except IndexError:
1052     raise errors.OpPrereqError("OS name must include a variant",
1053                                errors.ECODE_INVAL)
1054
1055   if variant not in os_obj.supported_variants:
1056     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1057
1058
1059 def _GetNodeInstancesInner(cfg, fn):
1060   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1061
1062
1063 def _GetNodeInstances(cfg, node_name):
1064   """Returns a list of all primary and secondary instances on a node.
1065
1066   """
1067
1068   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1069
1070
1071 def _GetNodePrimaryInstances(cfg, node_name):
1072   """Returns primary instances on a node.
1073
1074   """
1075   return _GetNodeInstancesInner(cfg,
1076                                 lambda inst: node_name == inst.primary_node)
1077
1078
1079 def _GetNodeSecondaryInstances(cfg, node_name):
1080   """Returns secondary instances on a node.
1081
1082   """
1083   return _GetNodeInstancesInner(cfg,
1084                                 lambda inst: node_name in inst.secondary_nodes)
1085
1086
1087 def _GetStorageTypeArgs(cfg, storage_type):
1088   """Returns the arguments for a storage type.
1089
1090   """
1091   # Special case for file storage
1092   if storage_type == constants.ST_FILE:
1093     # storage.FileStorage wants a list of storage directories
1094     return [[cfg.GetFileStorageDir()]]
1095
1096   return []
1097
1098
1099 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1100   faulty = []
1101
1102   for dev in instance.disks:
1103     cfg.SetDiskID(dev, node_name)
1104
1105   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1106   result.Raise("Failed to get disk status from node %s" % node_name,
1107                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1108
1109   for idx, bdev_status in enumerate(result.payload):
1110     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1111       faulty.append(idx)
1112
1113   return faulty
1114
1115
1116 class LUPostInitCluster(LogicalUnit):
1117   """Logical unit for running hooks after cluster initialization.
1118
1119   """
1120   HPATH = "cluster-init"
1121   HTYPE = constants.HTYPE_CLUSTER
1122   _OP_REQP = []
1123
1124   def BuildHooksEnv(self):
1125     """Build hooks env.
1126
1127     """
1128     env = {"OP_TARGET": self.cfg.GetClusterName()}
1129     mn = self.cfg.GetMasterNode()
1130     return env, [], [mn]
1131
1132   def Exec(self, feedback_fn):
1133     """Nothing to do.
1134
1135     """
1136     return True
1137
1138
1139 class LUDestroyCluster(LogicalUnit):
1140   """Logical unit for destroying the cluster.
1141
1142   """
1143   HPATH = "cluster-destroy"
1144   HTYPE = constants.HTYPE_CLUSTER
1145   _OP_REQP = []
1146
1147   def BuildHooksEnv(self):
1148     """Build hooks env.
1149
1150     """
1151     env = {"OP_TARGET": self.cfg.GetClusterName()}
1152     return env, [], []
1153
1154   def CheckPrereq(self):
1155     """Check prerequisites.
1156
1157     This checks whether the cluster is empty.
1158
1159     Any errors are signaled by raising errors.OpPrereqError.
1160
1161     """
1162     master = self.cfg.GetMasterNode()
1163
1164     nodelist = self.cfg.GetNodeList()
1165     if len(nodelist) != 1 or nodelist[0] != master:
1166       raise errors.OpPrereqError("There are still %d node(s) in"
1167                                  " this cluster." % (len(nodelist) - 1),
1168                                  errors.ECODE_INVAL)
1169     instancelist = self.cfg.GetInstanceList()
1170     if instancelist:
1171       raise errors.OpPrereqError("There are still %d instance(s) in"
1172                                  " this cluster." % len(instancelist),
1173                                  errors.ECODE_INVAL)
1174
1175   def Exec(self, feedback_fn):
1176     """Destroys the cluster.
1177
1178     """
1179     master = self.cfg.GetMasterNode()
1180     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1181
1182     # Run post hooks on master node before it's removed
1183     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1184     try:
1185       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1186     except:
1187       # pylint: disable-msg=W0702
1188       self.LogWarning("Errors occurred running hooks on %s" % master)
1189
1190     result = self.rpc.call_node_stop_master(master, False)
1191     result.Raise("Could not disable the master role")
1192
1193     if modify_ssh_setup:
1194       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1195       utils.CreateBackup(priv_key)
1196       utils.CreateBackup(pub_key)
1197
1198     return master
1199
1200
1201 def _VerifyCertificate(filename):
1202   """Verifies a certificate for LUVerifyCluster.
1203
1204   @type filename: string
1205   @param filename: Path to PEM file
1206
1207   """
1208   try:
1209     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1210                                            utils.ReadFile(filename))
1211   except Exception, err: # pylint: disable-msg=W0703
1212     return (LUVerifyCluster.ETYPE_ERROR,
1213             "Failed to load X509 certificate %s: %s" % (filename, err))
1214
1215   (errcode, msg) = \
1216     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1217                                 constants.SSL_CERT_EXPIRATION_ERROR)
1218
1219   if msg:
1220     fnamemsg = "While verifying %s: %s" % (filename, msg)
1221   else:
1222     fnamemsg = None
1223
1224   if errcode is None:
1225     return (None, fnamemsg)
1226   elif errcode == utils.CERT_WARNING:
1227     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1228   elif errcode == utils.CERT_ERROR:
1229     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1230
1231   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1232
1233
1234 class LUVerifyCluster(LogicalUnit):
1235   """Verifies the cluster status.
1236
1237   """
1238   HPATH = "cluster-verify"
1239   HTYPE = constants.HTYPE_CLUSTER
1240   _OP_REQP = [
1241     ("skip_checks", _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1242     ("verbose", _TBool),
1243     ("error_codes", _TBool),
1244     ("debug_simulate_errors", _TBool),
1245     ]
1246   REQ_BGL = False
1247
1248   TCLUSTER = "cluster"
1249   TNODE = "node"
1250   TINSTANCE = "instance"
1251
1252   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1253   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1254   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1255   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1256   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1257   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1258   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1259   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1260   ENODEDRBD = (TNODE, "ENODEDRBD")
1261   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1262   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1263   ENODEHV = (TNODE, "ENODEHV")
1264   ENODELVM = (TNODE, "ENODELVM")
1265   ENODEN1 = (TNODE, "ENODEN1")
1266   ENODENET = (TNODE, "ENODENET")
1267   ENODEOS = (TNODE, "ENODEOS")
1268   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1269   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1270   ENODERPC = (TNODE, "ENODERPC")
1271   ENODESSH = (TNODE, "ENODESSH")
1272   ENODEVERSION = (TNODE, "ENODEVERSION")
1273   ENODESETUP = (TNODE, "ENODESETUP")
1274   ENODETIME = (TNODE, "ENODETIME")
1275
1276   ETYPE_FIELD = "code"
1277   ETYPE_ERROR = "ERROR"
1278   ETYPE_WARNING = "WARNING"
1279
1280   class NodeImage(object):
1281     """A class representing the logical and physical status of a node.
1282
1283     @type name: string
1284     @ivar name: the node name to which this object refers
1285     @ivar volumes: a structure as returned from
1286         L{ganeti.backend.GetVolumeList} (runtime)
1287     @ivar instances: a list of running instances (runtime)
1288     @ivar pinst: list of configured primary instances (config)
1289     @ivar sinst: list of configured secondary instances (config)
1290     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1291         of this node (config)
1292     @ivar mfree: free memory, as reported by hypervisor (runtime)
1293     @ivar dfree: free disk, as reported by the node (runtime)
1294     @ivar offline: the offline status (config)
1295     @type rpc_fail: boolean
1296     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1297         not whether the individual keys were correct) (runtime)
1298     @type lvm_fail: boolean
1299     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1300     @type hyp_fail: boolean
1301     @ivar hyp_fail: whether the RPC call didn't return the instance list
1302     @type ghost: boolean
1303     @ivar ghost: whether this is a known node or not (config)
1304     @type os_fail: boolean
1305     @ivar os_fail: whether the RPC call didn't return valid OS data
1306     @type oslist: list
1307     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1308
1309     """
1310     def __init__(self, offline=False, name=None):
1311       self.name = name
1312       self.volumes = {}
1313       self.instances = []
1314       self.pinst = []
1315       self.sinst = []
1316       self.sbp = {}
1317       self.mfree = 0
1318       self.dfree = 0
1319       self.offline = offline
1320       self.rpc_fail = False
1321       self.lvm_fail = False
1322       self.hyp_fail = False
1323       self.ghost = False
1324       self.os_fail = False
1325       self.oslist = {}
1326
1327   def ExpandNames(self):
1328     self.needed_locks = {
1329       locking.LEVEL_NODE: locking.ALL_SET,
1330       locking.LEVEL_INSTANCE: locking.ALL_SET,
1331     }
1332     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1333
1334   def _Error(self, ecode, item, msg, *args, **kwargs):
1335     """Format an error message.
1336
1337     Based on the opcode's error_codes parameter, either format a
1338     parseable error code, or a simpler error string.
1339
1340     This must be called only from Exec and functions called from Exec.
1341
1342     """
1343     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1344     itype, etxt = ecode
1345     # first complete the msg
1346     if args:
1347       msg = msg % args
1348     # then format the whole message
1349     if self.op.error_codes:
1350       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1351     else:
1352       if item:
1353         item = " " + item
1354       else:
1355         item = ""
1356       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1357     # and finally report it via the feedback_fn
1358     self._feedback_fn("  - %s" % msg)
1359
1360   def _ErrorIf(self, cond, *args, **kwargs):
1361     """Log an error message if the passed condition is True.
1362
1363     """
1364     cond = bool(cond) or self.op.debug_simulate_errors
1365     if cond:
1366       self._Error(*args, **kwargs)
1367     # do not mark the operation as failed for WARN cases only
1368     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1369       self.bad = self.bad or cond
1370
1371   def _VerifyNode(self, ninfo, nresult):
1372     """Run multiple tests against a node.
1373
1374     Test list:
1375
1376       - compares ganeti version
1377       - checks vg existence and size > 20G
1378       - checks config file checksum
1379       - checks ssh to other nodes
1380
1381     @type ninfo: L{objects.Node}
1382     @param ninfo: the node to check
1383     @param nresult: the results from the node
1384     @rtype: boolean
1385     @return: whether overall this call was successful (and we can expect
1386          reasonable values in the respose)
1387
1388     """
1389     node = ninfo.name
1390     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1391
1392     # main result, nresult should be a non-empty dict
1393     test = not nresult or not isinstance(nresult, dict)
1394     _ErrorIf(test, self.ENODERPC, node,
1395                   "unable to verify node: no data returned")
1396     if test:
1397       return False
1398
1399     # compares ganeti version
1400     local_version = constants.PROTOCOL_VERSION
1401     remote_version = nresult.get("version", None)
1402     test = not (remote_version and
1403                 isinstance(remote_version, (list, tuple)) and
1404                 len(remote_version) == 2)
1405     _ErrorIf(test, self.ENODERPC, node,
1406              "connection to node returned invalid data")
1407     if test:
1408       return False
1409
1410     test = local_version != remote_version[0]
1411     _ErrorIf(test, self.ENODEVERSION, node,
1412              "incompatible protocol versions: master %s,"
1413              " node %s", local_version, remote_version[0])
1414     if test:
1415       return False
1416
1417     # node seems compatible, we can actually try to look into its results
1418
1419     # full package version
1420     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1421                   self.ENODEVERSION, node,
1422                   "software version mismatch: master %s, node %s",
1423                   constants.RELEASE_VERSION, remote_version[1],
1424                   code=self.ETYPE_WARNING)
1425
1426     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1427     if isinstance(hyp_result, dict):
1428       for hv_name, hv_result in hyp_result.iteritems():
1429         test = hv_result is not None
1430         _ErrorIf(test, self.ENODEHV, node,
1431                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1432
1433
1434     test = nresult.get(constants.NV_NODESETUP,
1435                            ["Missing NODESETUP results"])
1436     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1437              "; ".join(test))
1438
1439     return True
1440
1441   def _VerifyNodeTime(self, ninfo, nresult,
1442                       nvinfo_starttime, nvinfo_endtime):
1443     """Check the node time.
1444
1445     @type ninfo: L{objects.Node}
1446     @param ninfo: the node to check
1447     @param nresult: the remote results for the node
1448     @param nvinfo_starttime: the start time of the RPC call
1449     @param nvinfo_endtime: the end time of the RPC call
1450
1451     """
1452     node = ninfo.name
1453     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1454
1455     ntime = nresult.get(constants.NV_TIME, None)
1456     try:
1457       ntime_merged = utils.MergeTime(ntime)
1458     except (ValueError, TypeError):
1459       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1460       return
1461
1462     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1463       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1464     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1465       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1466     else:
1467       ntime_diff = None
1468
1469     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1470              "Node time diverges by at least %s from master node time",
1471              ntime_diff)
1472
1473   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1474     """Check the node time.
1475
1476     @type ninfo: L{objects.Node}
1477     @param ninfo: the node to check
1478     @param nresult: the remote results for the node
1479     @param vg_name: the configured VG name
1480
1481     """
1482     if vg_name is None:
1483       return
1484
1485     node = ninfo.name
1486     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1487
1488     # checks vg existence and size > 20G
1489     vglist = nresult.get(constants.NV_VGLIST, None)
1490     test = not vglist
1491     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1492     if not test:
1493       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1494                                             constants.MIN_VG_SIZE)
1495       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1496
1497     # check pv names
1498     pvlist = nresult.get(constants.NV_PVLIST, None)
1499     test = pvlist is None
1500     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1501     if not test:
1502       # check that ':' is not present in PV names, since it's a
1503       # special character for lvcreate (denotes the range of PEs to
1504       # use on the PV)
1505       for _, pvname, owner_vg in pvlist:
1506         test = ":" in pvname
1507         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1508                  " '%s' of VG '%s'", pvname, owner_vg)
1509
1510   def _VerifyNodeNetwork(self, ninfo, nresult):
1511     """Check the node time.
1512
1513     @type ninfo: L{objects.Node}
1514     @param ninfo: the node to check
1515     @param nresult: the remote results for the node
1516
1517     """
1518     node = ninfo.name
1519     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1520
1521     test = constants.NV_NODELIST not in nresult
1522     _ErrorIf(test, self.ENODESSH, node,
1523              "node hasn't returned node ssh connectivity data")
1524     if not test:
1525       if nresult[constants.NV_NODELIST]:
1526         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1527           _ErrorIf(True, self.ENODESSH, node,
1528                    "ssh communication with node '%s': %s", a_node, a_msg)
1529
1530     test = constants.NV_NODENETTEST not in nresult
1531     _ErrorIf(test, self.ENODENET, node,
1532              "node hasn't returned node tcp connectivity data")
1533     if not test:
1534       if nresult[constants.NV_NODENETTEST]:
1535         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1536         for anode in nlist:
1537           _ErrorIf(True, self.ENODENET, node,
1538                    "tcp communication with node '%s': %s",
1539                    anode, nresult[constants.NV_NODENETTEST][anode])
1540
1541     test = constants.NV_MASTERIP not in nresult
1542     _ErrorIf(test, self.ENODENET, node,
1543              "node hasn't returned node master IP reachability data")
1544     if not test:
1545       if not nresult[constants.NV_MASTERIP]:
1546         if node == self.master_node:
1547           msg = "the master node cannot reach the master IP (not configured?)"
1548         else:
1549           msg = "cannot reach the master IP"
1550         _ErrorIf(True, self.ENODENET, node, msg)
1551
1552
1553   def _VerifyInstance(self, instance, instanceconfig, node_image):
1554     """Verify an instance.
1555
1556     This function checks to see if the required block devices are
1557     available on the instance's node.
1558
1559     """
1560     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1561     node_current = instanceconfig.primary_node
1562
1563     node_vol_should = {}
1564     instanceconfig.MapLVsByNode(node_vol_should)
1565
1566     for node in node_vol_should:
1567       n_img = node_image[node]
1568       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1569         # ignore missing volumes on offline or broken nodes
1570         continue
1571       for volume in node_vol_should[node]:
1572         test = volume not in n_img.volumes
1573         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1574                  "volume %s missing on node %s", volume, node)
1575
1576     if instanceconfig.admin_up:
1577       pri_img = node_image[node_current]
1578       test = instance not in pri_img.instances and not pri_img.offline
1579       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1580                "instance not running on its primary node %s",
1581                node_current)
1582
1583     for node, n_img in node_image.items():
1584       if (not node == node_current):
1585         test = instance in n_img.instances
1586         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1587                  "instance should not run on node %s", node)
1588
1589   def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1590     """Verify if there are any unknown volumes in the cluster.
1591
1592     The .os, .swap and backup volumes are ignored. All other volumes are
1593     reported as unknown.
1594
1595     """
1596     for node, n_img in node_image.items():
1597       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1598         # skip non-healthy nodes
1599         continue
1600       for volume in n_img.volumes:
1601         test = (node not in node_vol_should or
1602                 volume not in node_vol_should[node])
1603         self._ErrorIf(test, self.ENODEORPHANLV, node,
1604                       "volume %s is unknown", volume)
1605
1606   def _VerifyOrphanInstances(self, instancelist, node_image):
1607     """Verify the list of running instances.
1608
1609     This checks what instances are running but unknown to the cluster.
1610
1611     """
1612     for node, n_img in node_image.items():
1613       for o_inst in n_img.instances:
1614         test = o_inst not in instancelist
1615         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1616                       "instance %s on node %s should not exist", o_inst, node)
1617
1618   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1619     """Verify N+1 Memory Resilience.
1620
1621     Check that if one single node dies we can still start all the
1622     instances it was primary for.
1623
1624     """
1625     for node, n_img in node_image.items():
1626       # This code checks that every node which is now listed as
1627       # secondary has enough memory to host all instances it is
1628       # supposed to should a single other node in the cluster fail.
1629       # FIXME: not ready for failover to an arbitrary node
1630       # FIXME: does not support file-backed instances
1631       # WARNING: we currently take into account down instances as well
1632       # as up ones, considering that even if they're down someone
1633       # might want to start them even in the event of a node failure.
1634       for prinode, instances in n_img.sbp.items():
1635         needed_mem = 0
1636         for instance in instances:
1637           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1638           if bep[constants.BE_AUTO_BALANCE]:
1639             needed_mem += bep[constants.BE_MEMORY]
1640         test = n_img.mfree < needed_mem
1641         self._ErrorIf(test, self.ENODEN1, node,
1642                       "not enough memory on to accommodate"
1643                       " failovers should peer node %s fail", prinode)
1644
1645   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1646                        master_files):
1647     """Verifies and computes the node required file checksums.
1648
1649     @type ninfo: L{objects.Node}
1650     @param ninfo: the node to check
1651     @param nresult: the remote results for the node
1652     @param file_list: required list of files
1653     @param local_cksum: dictionary of local files and their checksums
1654     @param master_files: list of files that only masters should have
1655
1656     """
1657     node = ninfo.name
1658     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1659
1660     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1661     test = not isinstance(remote_cksum, dict)
1662     _ErrorIf(test, self.ENODEFILECHECK, node,
1663              "node hasn't returned file checksum data")
1664     if test:
1665       return
1666
1667     for file_name in file_list:
1668       node_is_mc = ninfo.master_candidate
1669       must_have = (file_name not in master_files) or node_is_mc
1670       # missing
1671       test1 = file_name not in remote_cksum
1672       # invalid checksum
1673       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1674       # existing and good
1675       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1676       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1677                "file '%s' missing", file_name)
1678       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1679                "file '%s' has wrong checksum", file_name)
1680       # not candidate and this is not a must-have file
1681       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1682                "file '%s' should not exist on non master"
1683                " candidates (and the file is outdated)", file_name)
1684       # all good, except non-master/non-must have combination
1685       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1686                "file '%s' should not exist"
1687                " on non master candidates", file_name)
1688
1689   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1690     """Verifies and the node DRBD status.
1691
1692     @type ninfo: L{objects.Node}
1693     @param ninfo: the node to check
1694     @param nresult: the remote results for the node
1695     @param instanceinfo: the dict of instances
1696     @param drbd_map: the DRBD map as returned by
1697         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1698
1699     """
1700     node = ninfo.name
1701     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1702
1703     # compute the DRBD minors
1704     node_drbd = {}
1705     for minor, instance in drbd_map[node].items():
1706       test = instance not in instanceinfo
1707       _ErrorIf(test, self.ECLUSTERCFG, None,
1708                "ghost instance '%s' in temporary DRBD map", instance)
1709         # ghost instance should not be running, but otherwise we
1710         # don't give double warnings (both ghost instance and
1711         # unallocated minor in use)
1712       if test:
1713         node_drbd[minor] = (instance, False)
1714       else:
1715         instance = instanceinfo[instance]
1716         node_drbd[minor] = (instance.name, instance.admin_up)
1717
1718     # and now check them
1719     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1720     test = not isinstance(used_minors, (tuple, list))
1721     _ErrorIf(test, self.ENODEDRBD, node,
1722              "cannot parse drbd status file: %s", str(used_minors))
1723     if test:
1724       # we cannot check drbd status
1725       return
1726
1727     for minor, (iname, must_exist) in node_drbd.items():
1728       test = minor not in used_minors and must_exist
1729       _ErrorIf(test, self.ENODEDRBD, node,
1730                "drbd minor %d of instance %s is not active", minor, iname)
1731     for minor in used_minors:
1732       test = minor not in node_drbd
1733       _ErrorIf(test, self.ENODEDRBD, node,
1734                "unallocated drbd minor %d is in use", minor)
1735
1736   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1737     """Builds the node OS structures.
1738
1739     @type ninfo: L{objects.Node}
1740     @param ninfo: the node to check
1741     @param nresult: the remote results for the node
1742     @param nimg: the node image object
1743
1744     """
1745     node = ninfo.name
1746     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1747
1748     remote_os = nresult.get(constants.NV_OSLIST, None)
1749     test = (not isinstance(remote_os, list) or
1750             not compat.all(isinstance(v, list) and len(v) == 7
1751                            for v in remote_os))
1752
1753     _ErrorIf(test, self.ENODEOS, node,
1754              "node hasn't returned valid OS data")
1755
1756     nimg.os_fail = test
1757
1758     if test:
1759       return
1760
1761     os_dict = {}
1762
1763     for (name, os_path, status, diagnose,
1764          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1765
1766       if name not in os_dict:
1767         os_dict[name] = []
1768
1769       # parameters is a list of lists instead of list of tuples due to
1770       # JSON lacking a real tuple type, fix it:
1771       parameters = [tuple(v) for v in parameters]
1772       os_dict[name].append((os_path, status, diagnose,
1773                             set(variants), set(parameters), set(api_ver)))
1774
1775     nimg.oslist = os_dict
1776
1777   def _VerifyNodeOS(self, ninfo, nimg, base):
1778     """Verifies the node OS list.
1779
1780     @type ninfo: L{objects.Node}
1781     @param ninfo: the node to check
1782     @param nimg: the node image object
1783     @param base: the 'template' node we match against (e.g. from the master)
1784
1785     """
1786     node = ninfo.name
1787     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1788
1789     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1790
1791     for os_name, os_data in nimg.oslist.items():
1792       assert os_data, "Empty OS status for OS %s?!" % os_name
1793       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1794       _ErrorIf(not f_status, self.ENODEOS, node,
1795                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1796       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1797                "OS '%s' has multiple entries (first one shadows the rest): %s",
1798                os_name, utils.CommaJoin([v[0] for v in os_data]))
1799       # this will catched in backend too
1800       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1801                and not f_var, self.ENODEOS, node,
1802                "OS %s with API at least %d does not declare any variant",
1803                os_name, constants.OS_API_V15)
1804       # comparisons with the 'base' image
1805       test = os_name not in base.oslist
1806       _ErrorIf(test, self.ENODEOS, node,
1807                "Extra OS %s not present on reference node (%s)",
1808                os_name, base.name)
1809       if test:
1810         continue
1811       assert base.oslist[os_name], "Base node has empty OS status?"
1812       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1813       if not b_status:
1814         # base OS is invalid, skipping
1815         continue
1816       for kind, a, b in [("API version", f_api, b_api),
1817                          ("variants list", f_var, b_var),
1818                          ("parameters", f_param, b_param)]:
1819         _ErrorIf(a != b, self.ENODEOS, node,
1820                  "OS %s %s differs from reference node %s: %s vs. %s",
1821                  kind, os_name, base.name,
1822                  utils.CommaJoin(a), utils.CommaJoin(b))
1823
1824     # check any missing OSes
1825     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1826     _ErrorIf(missing, self.ENODEOS, node,
1827              "OSes present on reference node %s but missing on this node: %s",
1828              base.name, utils.CommaJoin(missing))
1829
1830   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1831     """Verifies and updates the node volume data.
1832
1833     This function will update a L{NodeImage}'s internal structures
1834     with data from the remote call.
1835
1836     @type ninfo: L{objects.Node}
1837     @param ninfo: the node to check
1838     @param nresult: the remote results for the node
1839     @param nimg: the node image object
1840     @param vg_name: the configured VG name
1841
1842     """
1843     node = ninfo.name
1844     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1845
1846     nimg.lvm_fail = True
1847     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1848     if vg_name is None:
1849       pass
1850     elif isinstance(lvdata, basestring):
1851       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1852                utils.SafeEncode(lvdata))
1853     elif not isinstance(lvdata, dict):
1854       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1855     else:
1856       nimg.volumes = lvdata
1857       nimg.lvm_fail = False
1858
1859   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1860     """Verifies and updates the node instance list.
1861
1862     If the listing was successful, then updates this node's instance
1863     list. Otherwise, it marks the RPC call as failed for the instance
1864     list key.
1865
1866     @type ninfo: L{objects.Node}
1867     @param ninfo: the node to check
1868     @param nresult: the remote results for the node
1869     @param nimg: the node image object
1870
1871     """
1872     idata = nresult.get(constants.NV_INSTANCELIST, None)
1873     test = not isinstance(idata, list)
1874     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1875                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1876     if test:
1877       nimg.hyp_fail = True
1878     else:
1879       nimg.instances = idata
1880
1881   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1882     """Verifies and computes a node information map
1883
1884     @type ninfo: L{objects.Node}
1885     @param ninfo: the node to check
1886     @param nresult: the remote results for the node
1887     @param nimg: the node image object
1888     @param vg_name: the configured VG name
1889
1890     """
1891     node = ninfo.name
1892     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1893
1894     # try to read free memory (from the hypervisor)
1895     hv_info = nresult.get(constants.NV_HVINFO, None)
1896     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1897     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1898     if not test:
1899       try:
1900         nimg.mfree = int(hv_info["memory_free"])
1901       except (ValueError, TypeError):
1902         _ErrorIf(True, self.ENODERPC, node,
1903                  "node returned invalid nodeinfo, check hypervisor")
1904
1905     # FIXME: devise a free space model for file based instances as well
1906     if vg_name is not None:
1907       test = (constants.NV_VGLIST not in nresult or
1908               vg_name not in nresult[constants.NV_VGLIST])
1909       _ErrorIf(test, self.ENODELVM, node,
1910                "node didn't return data for the volume group '%s'"
1911                " - it is either missing or broken", vg_name)
1912       if not test:
1913         try:
1914           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1915         except (ValueError, TypeError):
1916           _ErrorIf(True, self.ENODERPC, node,
1917                    "node returned invalid LVM info, check LVM status")
1918
1919   def BuildHooksEnv(self):
1920     """Build hooks env.
1921
1922     Cluster-Verify hooks just ran in the post phase and their failure makes
1923     the output be logged in the verify output and the verification to fail.
1924
1925     """
1926     all_nodes = self.cfg.GetNodeList()
1927     env = {
1928       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1929       }
1930     for node in self.cfg.GetAllNodesInfo().values():
1931       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1932
1933     return env, [], all_nodes
1934
1935   def Exec(self, feedback_fn):
1936     """Verify integrity of cluster, performing various test on nodes.
1937
1938     """
1939     self.bad = False
1940     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1941     verbose = self.op.verbose
1942     self._feedback_fn = feedback_fn
1943     feedback_fn("* Verifying global settings")
1944     for msg in self.cfg.VerifyConfig():
1945       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1946
1947     # Check the cluster certificates
1948     for cert_filename in constants.ALL_CERT_FILES:
1949       (errcode, msg) = _VerifyCertificate(cert_filename)
1950       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1951
1952     vg_name = self.cfg.GetVGName()
1953     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1954     cluster = self.cfg.GetClusterInfo()
1955     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1956     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1957     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1958     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1959                         for iname in instancelist)
1960     i_non_redundant = [] # Non redundant instances
1961     i_non_a_balanced = [] # Non auto-balanced instances
1962     n_offline = 0 # Count of offline nodes
1963     n_drained = 0 # Count of nodes being drained
1964     node_vol_should = {}
1965
1966     # FIXME: verify OS list
1967     # do local checksums
1968     master_files = [constants.CLUSTER_CONF_FILE]
1969     master_node = self.master_node = self.cfg.GetMasterNode()
1970     master_ip = self.cfg.GetMasterIP()
1971
1972     file_names = ssconf.SimpleStore().GetFileList()
1973     file_names.extend(constants.ALL_CERT_FILES)
1974     file_names.extend(master_files)
1975     if cluster.modify_etc_hosts:
1976       file_names.append(constants.ETC_HOSTS)
1977
1978     local_checksums = utils.FingerprintFiles(file_names)
1979
1980     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1981     node_verify_param = {
1982       constants.NV_FILELIST: file_names,
1983       constants.NV_NODELIST: [node.name for node in nodeinfo
1984                               if not node.offline],
1985       constants.NV_HYPERVISOR: hypervisors,
1986       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1987                                   node.secondary_ip) for node in nodeinfo
1988                                  if not node.offline],
1989       constants.NV_INSTANCELIST: hypervisors,
1990       constants.NV_VERSION: None,
1991       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1992       constants.NV_NODESETUP: None,
1993       constants.NV_TIME: None,
1994       constants.NV_MASTERIP: (master_node, master_ip),
1995       constants.NV_OSLIST: None,
1996       }
1997
1998     if vg_name is not None:
1999       node_verify_param[constants.NV_VGLIST] = None
2000       node_verify_param[constants.NV_LVLIST] = vg_name
2001       node_verify_param[constants.NV_PVLIST] = [vg_name]
2002       node_verify_param[constants.NV_DRBDLIST] = None
2003
2004     # Build our expected cluster state
2005     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2006                                                  name=node.name))
2007                       for node in nodeinfo)
2008
2009     for instance in instancelist:
2010       inst_config = instanceinfo[instance]
2011
2012       for nname in inst_config.all_nodes:
2013         if nname not in node_image:
2014           # ghost node
2015           gnode = self.NodeImage(name=nname)
2016           gnode.ghost = True
2017           node_image[nname] = gnode
2018
2019       inst_config.MapLVsByNode(node_vol_should)
2020
2021       pnode = inst_config.primary_node
2022       node_image[pnode].pinst.append(instance)
2023
2024       for snode in inst_config.secondary_nodes:
2025         nimg = node_image[snode]
2026         nimg.sinst.append(instance)
2027         if pnode not in nimg.sbp:
2028           nimg.sbp[pnode] = []
2029         nimg.sbp[pnode].append(instance)
2030
2031     # At this point, we have the in-memory data structures complete,
2032     # except for the runtime information, which we'll gather next
2033
2034     # Due to the way our RPC system works, exact response times cannot be
2035     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2036     # time before and after executing the request, we can at least have a time
2037     # window.
2038     nvinfo_starttime = time.time()
2039     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2040                                            self.cfg.GetClusterName())
2041     nvinfo_endtime = time.time()
2042
2043     all_drbd_map = self.cfg.ComputeDRBDMap()
2044
2045     feedback_fn("* Verifying node status")
2046
2047     refos_img = None
2048
2049     for node_i in nodeinfo:
2050       node = node_i.name
2051       nimg = node_image[node]
2052
2053       if node_i.offline:
2054         if verbose:
2055           feedback_fn("* Skipping offline node %s" % (node,))
2056         n_offline += 1
2057         continue
2058
2059       if node == master_node:
2060         ntype = "master"
2061       elif node_i.master_candidate:
2062         ntype = "master candidate"
2063       elif node_i.drained:
2064         ntype = "drained"
2065         n_drained += 1
2066       else:
2067         ntype = "regular"
2068       if verbose:
2069         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2070
2071       msg = all_nvinfo[node].fail_msg
2072       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2073       if msg:
2074         nimg.rpc_fail = True
2075         continue
2076
2077       nresult = all_nvinfo[node].payload
2078
2079       nimg.call_ok = self._VerifyNode(node_i, nresult)
2080       self._VerifyNodeNetwork(node_i, nresult)
2081       self._VerifyNodeLVM(node_i, nresult, vg_name)
2082       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2083                             master_files)
2084       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
2085       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2086
2087       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2088       self._UpdateNodeInstances(node_i, nresult, nimg)
2089       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2090       self._UpdateNodeOS(node_i, nresult, nimg)
2091       if not nimg.os_fail:
2092         if refos_img is None:
2093           refos_img = nimg
2094         self._VerifyNodeOS(node_i, nimg, refos_img)
2095
2096     feedback_fn("* Verifying instance status")
2097     for instance in instancelist:
2098       if verbose:
2099         feedback_fn("* Verifying instance %s" % instance)
2100       inst_config = instanceinfo[instance]
2101       self._VerifyInstance(instance, inst_config, node_image)
2102       inst_nodes_offline = []
2103
2104       pnode = inst_config.primary_node
2105       pnode_img = node_image[pnode]
2106       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2107                self.ENODERPC, pnode, "instance %s, connection to"
2108                " primary node failed", instance)
2109
2110       if pnode_img.offline:
2111         inst_nodes_offline.append(pnode)
2112
2113       # If the instance is non-redundant we cannot survive losing its primary
2114       # node, so we are not N+1 compliant. On the other hand we have no disk
2115       # templates with more than one secondary so that situation is not well
2116       # supported either.
2117       # FIXME: does not support file-backed instances
2118       if not inst_config.secondary_nodes:
2119         i_non_redundant.append(instance)
2120       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2121                instance, "instance has multiple secondary nodes: %s",
2122                utils.CommaJoin(inst_config.secondary_nodes),
2123                code=self.ETYPE_WARNING)
2124
2125       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2126         i_non_a_balanced.append(instance)
2127
2128       for snode in inst_config.secondary_nodes:
2129         s_img = node_image[snode]
2130         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2131                  "instance %s, connection to secondary node failed", instance)
2132
2133         if s_img.offline:
2134           inst_nodes_offline.append(snode)
2135
2136       # warn that the instance lives on offline nodes
2137       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2138                "instance lives on offline node(s) %s",
2139                utils.CommaJoin(inst_nodes_offline))
2140       # ... or ghost nodes
2141       for node in inst_config.all_nodes:
2142         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2143                  "instance lives on ghost node %s", node)
2144
2145     feedback_fn("* Verifying orphan volumes")
2146     self._VerifyOrphanVolumes(node_vol_should, node_image)
2147
2148     feedback_fn("* Verifying orphan instances")
2149     self._VerifyOrphanInstances(instancelist, node_image)
2150
2151     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2152       feedback_fn("* Verifying N+1 Memory redundancy")
2153       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2154
2155     feedback_fn("* Other Notes")
2156     if i_non_redundant:
2157       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2158                   % len(i_non_redundant))
2159
2160     if i_non_a_balanced:
2161       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2162                   % len(i_non_a_balanced))
2163
2164     if n_offline:
2165       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2166
2167     if n_drained:
2168       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2169
2170     return not self.bad
2171
2172   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2173     """Analyze the post-hooks' result
2174
2175     This method analyses the hook result, handles it, and sends some
2176     nicely-formatted feedback back to the user.
2177
2178     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2179         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2180     @param hooks_results: the results of the multi-node hooks rpc call
2181     @param feedback_fn: function used send feedback back to the caller
2182     @param lu_result: previous Exec result
2183     @return: the new Exec result, based on the previous result
2184         and hook results
2185
2186     """
2187     # We only really run POST phase hooks, and are only interested in
2188     # their results
2189     if phase == constants.HOOKS_PHASE_POST:
2190       # Used to change hooks' output to proper indentation
2191       indent_re = re.compile('^', re.M)
2192       feedback_fn("* Hooks Results")
2193       assert hooks_results, "invalid result from hooks"
2194
2195       for node_name in hooks_results:
2196         res = hooks_results[node_name]
2197         msg = res.fail_msg
2198         test = msg and not res.offline
2199         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2200                       "Communication failure in hooks execution: %s", msg)
2201         if res.offline or msg:
2202           # No need to investigate payload if node is offline or gave an error.
2203           # override manually lu_result here as _ErrorIf only
2204           # overrides self.bad
2205           lu_result = 1
2206           continue
2207         for script, hkr, output in res.payload:
2208           test = hkr == constants.HKR_FAIL
2209           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2210                         "Script %s failed, output:", script)
2211           if test:
2212             output = indent_re.sub('      ', output)
2213             feedback_fn("%s" % output)
2214             lu_result = 0
2215
2216       return lu_result
2217
2218
2219 class LUVerifyDisks(NoHooksLU):
2220   """Verifies the cluster disks status.
2221
2222   """
2223   _OP_REQP = []
2224   REQ_BGL = False
2225
2226   def ExpandNames(self):
2227     self.needed_locks = {
2228       locking.LEVEL_NODE: locking.ALL_SET,
2229       locking.LEVEL_INSTANCE: locking.ALL_SET,
2230     }
2231     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2232
2233   def Exec(self, feedback_fn):
2234     """Verify integrity of cluster disks.
2235
2236     @rtype: tuple of three items
2237     @return: a tuple of (dict of node-to-node_error, list of instances
2238         which need activate-disks, dict of instance: (node, volume) for
2239         missing volumes
2240
2241     """
2242     result = res_nodes, res_instances, res_missing = {}, [], {}
2243
2244     vg_name = self.cfg.GetVGName()
2245     nodes = utils.NiceSort(self.cfg.GetNodeList())
2246     instances = [self.cfg.GetInstanceInfo(name)
2247                  for name in self.cfg.GetInstanceList()]
2248
2249     nv_dict = {}
2250     for inst in instances:
2251       inst_lvs = {}
2252       if (not inst.admin_up or
2253           inst.disk_template not in constants.DTS_NET_MIRROR):
2254         continue
2255       inst.MapLVsByNode(inst_lvs)
2256       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2257       for node, vol_list in inst_lvs.iteritems():
2258         for vol in vol_list:
2259           nv_dict[(node, vol)] = inst
2260
2261     if not nv_dict:
2262       return result
2263
2264     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2265
2266     for node in nodes:
2267       # node_volume
2268       node_res = node_lvs[node]
2269       if node_res.offline:
2270         continue
2271       msg = node_res.fail_msg
2272       if msg:
2273         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2274         res_nodes[node] = msg
2275         continue
2276
2277       lvs = node_res.payload
2278       for lv_name, (_, _, lv_online) in lvs.items():
2279         inst = nv_dict.pop((node, lv_name), None)
2280         if (not lv_online and inst is not None
2281             and inst.name not in res_instances):
2282           res_instances.append(inst.name)
2283
2284     # any leftover items in nv_dict are missing LVs, let's arrange the
2285     # data better
2286     for key, inst in nv_dict.iteritems():
2287       if inst.name not in res_missing:
2288         res_missing[inst.name] = []
2289       res_missing[inst.name].append(key)
2290
2291     return result
2292
2293
2294 class LURepairDiskSizes(NoHooksLU):
2295   """Verifies the cluster disks sizes.
2296
2297   """
2298   _OP_REQP = [("instances", _TListOf(_TNonEmptyString))]
2299   REQ_BGL = False
2300
2301   def ExpandNames(self):
2302     if self.op.instances:
2303       self.wanted_names = []
2304       for name in self.op.instances:
2305         full_name = _ExpandInstanceName(self.cfg, name)
2306         self.wanted_names.append(full_name)
2307       self.needed_locks = {
2308         locking.LEVEL_NODE: [],
2309         locking.LEVEL_INSTANCE: self.wanted_names,
2310         }
2311       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2312     else:
2313       self.wanted_names = None
2314       self.needed_locks = {
2315         locking.LEVEL_NODE: locking.ALL_SET,
2316         locking.LEVEL_INSTANCE: locking.ALL_SET,
2317         }
2318     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2319
2320   def DeclareLocks(self, level):
2321     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2322       self._LockInstancesNodes(primary_only=True)
2323
2324   def CheckPrereq(self):
2325     """Check prerequisites.
2326
2327     This only checks the optional instance list against the existing names.
2328
2329     """
2330     if self.wanted_names is None:
2331       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2332
2333     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2334                              in self.wanted_names]
2335
2336   def _EnsureChildSizes(self, disk):
2337     """Ensure children of the disk have the needed disk size.
2338
2339     This is valid mainly for DRBD8 and fixes an issue where the
2340     children have smaller disk size.
2341
2342     @param disk: an L{ganeti.objects.Disk} object
2343
2344     """
2345     if disk.dev_type == constants.LD_DRBD8:
2346       assert disk.children, "Empty children for DRBD8?"
2347       fchild = disk.children[0]
2348       mismatch = fchild.size < disk.size
2349       if mismatch:
2350         self.LogInfo("Child disk has size %d, parent %d, fixing",
2351                      fchild.size, disk.size)
2352         fchild.size = disk.size
2353
2354       # and we recurse on this child only, not on the metadev
2355       return self._EnsureChildSizes(fchild) or mismatch
2356     else:
2357       return False
2358
2359   def Exec(self, feedback_fn):
2360     """Verify the size of cluster disks.
2361
2362     """
2363     # TODO: check child disks too
2364     # TODO: check differences in size between primary/secondary nodes
2365     per_node_disks = {}
2366     for instance in self.wanted_instances:
2367       pnode = instance.primary_node
2368       if pnode not in per_node_disks:
2369         per_node_disks[pnode] = []
2370       for idx, disk in enumerate(instance.disks):
2371         per_node_disks[pnode].append((instance, idx, disk))
2372
2373     changed = []
2374     for node, dskl in per_node_disks.items():
2375       newl = [v[2].Copy() for v in dskl]
2376       for dsk in newl:
2377         self.cfg.SetDiskID(dsk, node)
2378       result = self.rpc.call_blockdev_getsizes(node, newl)
2379       if result.fail_msg:
2380         self.LogWarning("Failure in blockdev_getsizes call to node"
2381                         " %s, ignoring", node)
2382         continue
2383       if len(result.data) != len(dskl):
2384         self.LogWarning("Invalid result from node %s, ignoring node results",
2385                         node)
2386         continue
2387       for ((instance, idx, disk), size) in zip(dskl, result.data):
2388         if size is None:
2389           self.LogWarning("Disk %d of instance %s did not return size"
2390                           " information, ignoring", idx, instance.name)
2391           continue
2392         if not isinstance(size, (int, long)):
2393           self.LogWarning("Disk %d of instance %s did not return valid"
2394                           " size information, ignoring", idx, instance.name)
2395           continue
2396         size = size >> 20
2397         if size != disk.size:
2398           self.LogInfo("Disk %d of instance %s has mismatched size,"
2399                        " correcting: recorded %d, actual %d", idx,
2400                        instance.name, disk.size, size)
2401           disk.size = size
2402           self.cfg.Update(instance, feedback_fn)
2403           changed.append((instance.name, idx, size))
2404         if self._EnsureChildSizes(disk):
2405           self.cfg.Update(instance, feedback_fn)
2406           changed.append((instance.name, idx, disk.size))
2407     return changed
2408
2409
2410 class LURenameCluster(LogicalUnit):
2411   """Rename the cluster.
2412
2413   """
2414   HPATH = "cluster-rename"
2415   HTYPE = constants.HTYPE_CLUSTER
2416   _OP_REQP = [("name", _TNonEmptyString)]
2417
2418   def BuildHooksEnv(self):
2419     """Build hooks env.
2420
2421     """
2422     env = {
2423       "OP_TARGET": self.cfg.GetClusterName(),
2424       "NEW_NAME": self.op.name,
2425       }
2426     mn = self.cfg.GetMasterNode()
2427     all_nodes = self.cfg.GetNodeList()
2428     return env, [mn], all_nodes
2429
2430   def CheckPrereq(self):
2431     """Verify that the passed name is a valid one.
2432
2433     """
2434     hostname = utils.GetHostInfo(self.op.name)
2435
2436     new_name = hostname.name
2437     self.ip = new_ip = hostname.ip
2438     old_name = self.cfg.GetClusterName()
2439     old_ip = self.cfg.GetMasterIP()
2440     if new_name == old_name and new_ip == old_ip:
2441       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2442                                  " cluster has changed",
2443                                  errors.ECODE_INVAL)
2444     if new_ip != old_ip:
2445       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2446         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2447                                    " reachable on the network. Aborting." %
2448                                    new_ip, errors.ECODE_NOTUNIQUE)
2449
2450     self.op.name = new_name
2451
2452   def Exec(self, feedback_fn):
2453     """Rename the cluster.
2454
2455     """
2456     clustername = self.op.name
2457     ip = self.ip
2458
2459     # shutdown the master IP
2460     master = self.cfg.GetMasterNode()
2461     result = self.rpc.call_node_stop_master(master, False)
2462     result.Raise("Could not disable the master role")
2463
2464     try:
2465       cluster = self.cfg.GetClusterInfo()
2466       cluster.cluster_name = clustername
2467       cluster.master_ip = ip
2468       self.cfg.Update(cluster, feedback_fn)
2469
2470       # update the known hosts file
2471       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2472       node_list = self.cfg.GetNodeList()
2473       try:
2474         node_list.remove(master)
2475       except ValueError:
2476         pass
2477       result = self.rpc.call_upload_file(node_list,
2478                                          constants.SSH_KNOWN_HOSTS_FILE)
2479       for to_node, to_result in result.iteritems():
2480         msg = to_result.fail_msg
2481         if msg:
2482           msg = ("Copy of file %s to node %s failed: %s" %
2483                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2484           self.proc.LogWarning(msg)
2485
2486     finally:
2487       result = self.rpc.call_node_start_master(master, False, False)
2488       msg = result.fail_msg
2489       if msg:
2490         self.LogWarning("Could not re-enable the master role on"
2491                         " the master, please restart manually: %s", msg)
2492
2493
2494 def _RecursiveCheckIfLVMBased(disk):
2495   """Check if the given disk or its children are lvm-based.
2496
2497   @type disk: L{objects.Disk}
2498   @param disk: the disk to check
2499   @rtype: boolean
2500   @return: boolean indicating whether a LD_LV dev_type was found or not
2501
2502   """
2503   if disk.children:
2504     for chdisk in disk.children:
2505       if _RecursiveCheckIfLVMBased(chdisk):
2506         return True
2507   return disk.dev_type == constants.LD_LV
2508
2509
2510 class LUSetClusterParams(LogicalUnit):
2511   """Change the parameters of the cluster.
2512
2513   """
2514   HPATH = "cluster-modify"
2515   HTYPE = constants.HTYPE_CLUSTER
2516   _OP_REQP = [
2517     ("hvparams", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2518     ("os_hvp", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2519     ("osparams", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2520     ("enabled_hypervisors",
2521      _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2522     ]
2523   _OP_DEFS = [
2524     ("candidate_pool_size", None),
2525     ("uid_pool", None),
2526     ("add_uids", None),
2527     ("remove_uids", None),
2528     ("hvparams", None),
2529     ("os_hvp", None),
2530     ("osparams", None),
2531     ]
2532   REQ_BGL = False
2533
2534   def CheckArguments(self):
2535     """Check parameters
2536
2537     """
2538     if self.op.candidate_pool_size is not None:
2539       try:
2540         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2541       except (ValueError, TypeError), err:
2542         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2543                                    str(err), errors.ECODE_INVAL)
2544       if self.op.candidate_pool_size < 1:
2545         raise errors.OpPrereqError("At least one master candidate needed",
2546                                    errors.ECODE_INVAL)
2547
2548     _CheckBooleanOpField(self.op, "maintain_node_health")
2549
2550     if self.op.uid_pool:
2551       uidpool.CheckUidPool(self.op.uid_pool)
2552
2553     if self.op.add_uids:
2554       uidpool.CheckUidPool(self.op.add_uids)
2555
2556     if self.op.remove_uids:
2557       uidpool.CheckUidPool(self.op.remove_uids)
2558
2559   def ExpandNames(self):
2560     # FIXME: in the future maybe other cluster params won't require checking on
2561     # all nodes to be modified.
2562     self.needed_locks = {
2563       locking.LEVEL_NODE: locking.ALL_SET,
2564     }
2565     self.share_locks[locking.LEVEL_NODE] = 1
2566
2567   def BuildHooksEnv(self):
2568     """Build hooks env.
2569
2570     """
2571     env = {
2572       "OP_TARGET": self.cfg.GetClusterName(),
2573       "NEW_VG_NAME": self.op.vg_name,
2574       }
2575     mn = self.cfg.GetMasterNode()
2576     return env, [mn], [mn]
2577
2578   def CheckPrereq(self):
2579     """Check prerequisites.
2580
2581     This checks whether the given params don't conflict and
2582     if the given volume group is valid.
2583
2584     """
2585     if self.op.vg_name is not None and not self.op.vg_name:
2586       instances = self.cfg.GetAllInstancesInfo().values()
2587       for inst in instances:
2588         for disk in inst.disks:
2589           if _RecursiveCheckIfLVMBased(disk):
2590             raise errors.OpPrereqError("Cannot disable lvm storage while"
2591                                        " lvm-based instances exist",
2592                                        errors.ECODE_INVAL)
2593
2594     node_list = self.acquired_locks[locking.LEVEL_NODE]
2595
2596     # if vg_name not None, checks given volume group on all nodes
2597     if self.op.vg_name:
2598       vglist = self.rpc.call_vg_list(node_list)
2599       for node in node_list:
2600         msg = vglist[node].fail_msg
2601         if msg:
2602           # ignoring down node
2603           self.LogWarning("Error while gathering data on node %s"
2604                           " (ignoring node): %s", node, msg)
2605           continue
2606         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2607                                               self.op.vg_name,
2608                                               constants.MIN_VG_SIZE)
2609         if vgstatus:
2610           raise errors.OpPrereqError("Error on node '%s': %s" %
2611                                      (node, vgstatus), errors.ECODE_ENVIRON)
2612
2613     self.cluster = cluster = self.cfg.GetClusterInfo()
2614     # validate params changes
2615     if self.op.beparams:
2616       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2617       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2618
2619     if self.op.nicparams:
2620       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2621       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2622       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2623       nic_errors = []
2624
2625       # check all instances for consistency
2626       for instance in self.cfg.GetAllInstancesInfo().values():
2627         for nic_idx, nic in enumerate(instance.nics):
2628           params_copy = copy.deepcopy(nic.nicparams)
2629           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2630
2631           # check parameter syntax
2632           try:
2633             objects.NIC.CheckParameterSyntax(params_filled)
2634           except errors.ConfigurationError, err:
2635             nic_errors.append("Instance %s, nic/%d: %s" %
2636                               (instance.name, nic_idx, err))
2637
2638           # if we're moving instances to routed, check that they have an ip
2639           target_mode = params_filled[constants.NIC_MODE]
2640           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2641             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2642                               (instance.name, nic_idx))
2643       if nic_errors:
2644         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2645                                    "\n".join(nic_errors))
2646
2647     # hypervisor list/parameters
2648     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2649     if self.op.hvparams:
2650       for hv_name, hv_dict in self.op.hvparams.items():
2651         if hv_name not in self.new_hvparams:
2652           self.new_hvparams[hv_name] = hv_dict
2653         else:
2654           self.new_hvparams[hv_name].update(hv_dict)
2655
2656     # os hypervisor parameters
2657     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2658     if self.op.os_hvp:
2659       for os_name, hvs in self.op.os_hvp.items():
2660         if os_name not in self.new_os_hvp:
2661           self.new_os_hvp[os_name] = hvs
2662         else:
2663           for hv_name, hv_dict in hvs.items():
2664             if hv_name not in self.new_os_hvp[os_name]:
2665               self.new_os_hvp[os_name][hv_name] = hv_dict
2666             else:
2667               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2668
2669     # os parameters
2670     self.new_osp = objects.FillDict(cluster.osparams, {})
2671     if self.op.osparams:
2672       for os_name, osp in self.op.osparams.items():
2673         if os_name not in self.new_osp:
2674           self.new_osp[os_name] = {}
2675
2676         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2677                                                   use_none=True)
2678
2679         if not self.new_osp[os_name]:
2680           # we removed all parameters
2681           del self.new_osp[os_name]
2682         else:
2683           # check the parameter validity (remote check)
2684           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2685                          os_name, self.new_osp[os_name])
2686
2687     # changes to the hypervisor list
2688     if self.op.enabled_hypervisors is not None:
2689       self.hv_list = self.op.enabled_hypervisors
2690       for hv in self.hv_list:
2691         # if the hypervisor doesn't already exist in the cluster
2692         # hvparams, we initialize it to empty, and then (in both
2693         # cases) we make sure to fill the defaults, as we might not
2694         # have a complete defaults list if the hypervisor wasn't
2695         # enabled before
2696         if hv not in new_hvp:
2697           new_hvp[hv] = {}
2698         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2699         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2700     else:
2701       self.hv_list = cluster.enabled_hypervisors
2702
2703     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2704       # either the enabled list has changed, or the parameters have, validate
2705       for hv_name, hv_params in self.new_hvparams.items():
2706         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2707             (self.op.enabled_hypervisors and
2708              hv_name in self.op.enabled_hypervisors)):
2709           # either this is a new hypervisor, or its parameters have changed
2710           hv_class = hypervisor.GetHypervisor(hv_name)
2711           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2712           hv_class.CheckParameterSyntax(hv_params)
2713           _CheckHVParams(self, node_list, hv_name, hv_params)
2714
2715     if self.op.os_hvp:
2716       # no need to check any newly-enabled hypervisors, since the
2717       # defaults have already been checked in the above code-block
2718       for os_name, os_hvp in self.new_os_hvp.items():
2719         for hv_name, hv_params in os_hvp.items():
2720           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2721           # we need to fill in the new os_hvp on top of the actual hv_p
2722           cluster_defaults = self.new_hvparams.get(hv_name, {})
2723           new_osp = objects.FillDict(cluster_defaults, hv_params)
2724           hv_class = hypervisor.GetHypervisor(hv_name)
2725           hv_class.CheckParameterSyntax(new_osp)
2726           _CheckHVParams(self, node_list, hv_name, new_osp)
2727
2728
2729   def Exec(self, feedback_fn):
2730     """Change the parameters of the cluster.
2731
2732     """
2733     if self.op.vg_name is not None:
2734       new_volume = self.op.vg_name
2735       if not new_volume:
2736         new_volume = None
2737       if new_volume != self.cfg.GetVGName():
2738         self.cfg.SetVGName(new_volume)
2739       else:
2740         feedback_fn("Cluster LVM configuration already in desired"
2741                     " state, not changing")
2742     if self.op.hvparams:
2743       self.cluster.hvparams = self.new_hvparams
2744     if self.op.os_hvp:
2745       self.cluster.os_hvp = self.new_os_hvp
2746     if self.op.enabled_hypervisors is not None:
2747       self.cluster.hvparams = self.new_hvparams
2748       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2749     if self.op.beparams:
2750       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2751     if self.op.nicparams:
2752       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2753     if self.op.osparams:
2754       self.cluster.osparams = self.new_osp
2755
2756     if self.op.candidate_pool_size is not None:
2757       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2758       # we need to update the pool size here, otherwise the save will fail
2759       _AdjustCandidatePool(self, [])
2760
2761     if self.op.maintain_node_health is not None:
2762       self.cluster.maintain_node_health = self.op.maintain_node_health
2763
2764     if self.op.add_uids is not None:
2765       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2766
2767     if self.op.remove_uids is not None:
2768       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2769
2770     if self.op.uid_pool is not None:
2771       self.cluster.uid_pool = self.op.uid_pool
2772
2773     self.cfg.Update(self.cluster, feedback_fn)
2774
2775
2776 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2777   """Distribute additional files which are part of the cluster configuration.
2778
2779   ConfigWriter takes care of distributing the config and ssconf files, but
2780   there are more files which should be distributed to all nodes. This function
2781   makes sure those are copied.
2782
2783   @param lu: calling logical unit
2784   @param additional_nodes: list of nodes not in the config to distribute to
2785
2786   """
2787   # 1. Gather target nodes
2788   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2789   dist_nodes = lu.cfg.GetOnlineNodeList()
2790   if additional_nodes is not None:
2791     dist_nodes.extend(additional_nodes)
2792   if myself.name in dist_nodes:
2793     dist_nodes.remove(myself.name)
2794
2795   # 2. Gather files to distribute
2796   dist_files = set([constants.ETC_HOSTS,
2797                     constants.SSH_KNOWN_HOSTS_FILE,
2798                     constants.RAPI_CERT_FILE,
2799                     constants.RAPI_USERS_FILE,
2800                     constants.CONFD_HMAC_KEY,
2801                     constants.CLUSTER_DOMAIN_SECRET_FILE,
2802                    ])
2803
2804   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2805   for hv_name in enabled_hypervisors:
2806     hv_class = hypervisor.GetHypervisor(hv_name)
2807     dist_files.update(hv_class.GetAncillaryFiles())
2808
2809   # 3. Perform the files upload
2810   for fname in dist_files:
2811     if os.path.exists(fname):
2812       result = lu.rpc.call_upload_file(dist_nodes, fname)
2813       for to_node, to_result in result.items():
2814         msg = to_result.fail_msg
2815         if msg:
2816           msg = ("Copy of file %s to node %s failed: %s" %
2817                  (fname, to_node, msg))
2818           lu.proc.LogWarning(msg)
2819
2820
2821 class LURedistributeConfig(NoHooksLU):
2822   """Force the redistribution of cluster configuration.
2823
2824   This is a very simple LU.
2825
2826   """
2827   _OP_REQP = []
2828   REQ_BGL = False
2829
2830   def ExpandNames(self):
2831     self.needed_locks = {
2832       locking.LEVEL_NODE: locking.ALL_SET,
2833     }
2834     self.share_locks[locking.LEVEL_NODE] = 1
2835
2836   def Exec(self, feedback_fn):
2837     """Redistribute the configuration.
2838
2839     """
2840     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2841     _RedistributeAncillaryFiles(self)
2842
2843
2844 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2845   """Sleep and poll for an instance's disk to sync.
2846
2847   """
2848   if not instance.disks or disks is not None and not disks:
2849     return True
2850
2851   disks = _ExpandCheckDisks(instance, disks)
2852
2853   if not oneshot:
2854     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2855
2856   node = instance.primary_node
2857
2858   for dev in disks:
2859     lu.cfg.SetDiskID(dev, node)
2860
2861   # TODO: Convert to utils.Retry
2862
2863   retries = 0
2864   degr_retries = 10 # in seconds, as we sleep 1 second each time
2865   while True:
2866     max_time = 0
2867     done = True
2868     cumul_degraded = False
2869     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2870     msg = rstats.fail_msg
2871     if msg:
2872       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2873       retries += 1
2874       if retries >= 10:
2875         raise errors.RemoteError("Can't contact node %s for mirror data,"
2876                                  " aborting." % node)
2877       time.sleep(6)
2878       continue
2879     rstats = rstats.payload
2880     retries = 0
2881     for i, mstat in enumerate(rstats):
2882       if mstat is None:
2883         lu.LogWarning("Can't compute data for node %s/%s",
2884                            node, disks[i].iv_name)
2885         continue
2886
2887       cumul_degraded = (cumul_degraded or
2888                         (mstat.is_degraded and mstat.sync_percent is None))
2889       if mstat.sync_percent is not None:
2890         done = False
2891         if mstat.estimated_time is not None:
2892           rem_time = ("%s remaining (estimated)" %
2893                       utils.FormatSeconds(mstat.estimated_time))
2894           max_time = mstat.estimated_time
2895         else:
2896           rem_time = "no time estimate"
2897         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2898                         (disks[i].iv_name, mstat.sync_percent, rem_time))
2899
2900     # if we're done but degraded, let's do a few small retries, to
2901     # make sure we see a stable and not transient situation; therefore
2902     # we force restart of the loop
2903     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2904       logging.info("Degraded disks found, %d retries left", degr_retries)
2905       degr_retries -= 1
2906       time.sleep(1)
2907       continue
2908
2909     if done or oneshot:
2910       break
2911
2912     time.sleep(min(60, max_time))
2913
2914   if done:
2915     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2916   return not cumul_degraded
2917
2918
2919 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2920   """Check that mirrors are not degraded.
2921
2922   The ldisk parameter, if True, will change the test from the
2923   is_degraded attribute (which represents overall non-ok status for
2924   the device(s)) to the ldisk (representing the local storage status).
2925
2926   """
2927   lu.cfg.SetDiskID(dev, node)
2928
2929   result = True
2930
2931   if on_primary or dev.AssembleOnSecondary():
2932     rstats = lu.rpc.call_blockdev_find(node, dev)
2933     msg = rstats.fail_msg
2934     if msg:
2935       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2936       result = False
2937     elif not rstats.payload:
2938       lu.LogWarning("Can't find disk on node %s", node)
2939       result = False
2940     else:
2941       if ldisk:
2942         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2943       else:
2944         result = result and not rstats.payload.is_degraded
2945
2946   if dev.children:
2947     for child in dev.children:
2948       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2949
2950   return result
2951
2952
2953 class LUDiagnoseOS(NoHooksLU):
2954   """Logical unit for OS diagnose/query.
2955
2956   """
2957   _OP_REQP = [
2958     ("output_fields", _TListOf(_TNonEmptyString)),
2959     ("names", _TListOf(_TNonEmptyString)),
2960     ]
2961   REQ_BGL = False
2962   _FIELDS_STATIC = utils.FieldSet()
2963   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2964                                    "parameters", "api_versions")
2965
2966   def CheckArguments(self):
2967     if self.op.names:
2968       raise errors.OpPrereqError("Selective OS query not supported",
2969                                  errors.ECODE_INVAL)
2970
2971     _CheckOutputFields(static=self._FIELDS_STATIC,
2972                        dynamic=self._FIELDS_DYNAMIC,
2973                        selected=self.op.output_fields)
2974
2975   def ExpandNames(self):
2976     # Lock all nodes, in shared mode
2977     # Temporary removal of locks, should be reverted later
2978     # TODO: reintroduce locks when they are lighter-weight
2979     self.needed_locks = {}
2980     #self.share_locks[locking.LEVEL_NODE] = 1
2981     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2982
2983   @staticmethod
2984   def _DiagnoseByOS(rlist):
2985     """Remaps a per-node return list into an a per-os per-node dictionary
2986
2987     @param rlist: a map with node names as keys and OS objects as values
2988
2989     @rtype: dict
2990     @return: a dictionary with osnames as keys and as value another
2991         map, with nodes as keys and tuples of (path, status, diagnose,
2992         variants, parameters, api_versions) as values, eg::
2993
2994           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2995                                      (/srv/..., False, "invalid api")],
2996                            "node2": [(/srv/..., True, "", [], [])]}
2997           }
2998
2999     """
3000     all_os = {}
3001     # we build here the list of nodes that didn't fail the RPC (at RPC
3002     # level), so that nodes with a non-responding node daemon don't
3003     # make all OSes invalid
3004     good_nodes = [node_name for node_name in rlist
3005                   if not rlist[node_name].fail_msg]
3006     for node_name, nr in rlist.items():
3007       if nr.fail_msg or not nr.payload:
3008         continue
3009       for (name, path, status, diagnose, variants,
3010            params, api_versions) in nr.payload:
3011         if name not in all_os:
3012           # build a list of nodes for this os containing empty lists
3013           # for each node in node_list
3014           all_os[name] = {}
3015           for nname in good_nodes:
3016             all_os[name][nname] = []
3017         # convert params from [name, help] to (name, help)
3018         params = [tuple(v) for v in params]
3019         all_os[name][node_name].append((path, status, diagnose,
3020                                         variants, params, api_versions))
3021     return all_os
3022
3023   def Exec(self, feedback_fn):
3024     """Compute the list of OSes.
3025
3026     """
3027     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3028     node_data = self.rpc.call_os_diagnose(valid_nodes)
3029     pol = self._DiagnoseByOS(node_data)
3030     output = []
3031
3032     for os_name, os_data in pol.items():
3033       row = []
3034       valid = True
3035       (variants, params, api_versions) = null_state = (set(), set(), set())
3036       for idx, osl in enumerate(os_data.values()):
3037         valid = bool(valid and osl and osl[0][1])
3038         if not valid:
3039           (variants, params, api_versions) = null_state
3040           break
3041         node_variants, node_params, node_api = osl[0][3:6]
3042         if idx == 0: # first entry
3043           variants = set(node_variants)
3044           params = set(node_params)
3045           api_versions = set(node_api)
3046         else: # keep consistency
3047           variants.intersection_update(node_variants)
3048           params.intersection_update(node_params)
3049           api_versions.intersection_update(node_api)
3050
3051       for field in self.op.output_fields:
3052         if field == "name":
3053           val = os_name
3054         elif field == "valid":
3055           val = valid
3056         elif field == "node_status":
3057           # this is just a copy of the dict
3058           val = {}
3059           for node_name, nos_list in os_data.items():
3060             val[node_name] = nos_list
3061         elif field == "variants":
3062           val = list(variants)
3063         elif field == "parameters":
3064           val = list(params)
3065         elif field == "api_versions":
3066           val = list(api_versions)
3067         else:
3068           raise errors.ParameterError(field)
3069         row.append(val)
3070       output.append(row)
3071
3072     return output
3073
3074
3075 class LURemoveNode(LogicalUnit):
3076   """Logical unit for removing a node.
3077
3078   """
3079   HPATH = "node-remove"
3080   HTYPE = constants.HTYPE_NODE
3081   _OP_REQP = [("node_name", _TNonEmptyString)]
3082
3083   def BuildHooksEnv(self):
3084     """Build hooks env.
3085
3086     This doesn't run on the target node in the pre phase as a failed
3087     node would then be impossible to remove.
3088
3089     """
3090     env = {
3091       "OP_TARGET": self.op.node_name,
3092       "NODE_NAME": self.op.node_name,
3093       }
3094     all_nodes = self.cfg.GetNodeList()
3095     try:
3096       all_nodes.remove(self.op.node_name)
3097     except ValueError:
3098       logging.warning("Node %s which is about to be removed not found"
3099                       " in the all nodes list", self.op.node_name)
3100     return env, all_nodes, all_nodes
3101
3102   def CheckPrereq(self):
3103     """Check prerequisites.
3104
3105     This checks:
3106      - the node exists in the configuration
3107      - it does not have primary or secondary instances
3108      - it's not the master
3109
3110     Any errors are signaled by raising errors.OpPrereqError.
3111
3112     """
3113     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3114     node = self.cfg.GetNodeInfo(self.op.node_name)
3115     assert node is not None
3116
3117     instance_list = self.cfg.GetInstanceList()
3118
3119     masternode = self.cfg.GetMasterNode()
3120     if node.name == masternode:
3121       raise errors.OpPrereqError("Node is the master node,"
3122                                  " you need to failover first.",
3123                                  errors.ECODE_INVAL)
3124
3125     for instance_name in instance_list:
3126       instance = self.cfg.GetInstanceInfo(instance_name)
3127       if node.name in instance.all_nodes:
3128         raise errors.OpPrereqError("Instance %s is still running on the node,"
3129                                    " please remove first." % instance_name,
3130                                    errors.ECODE_INVAL)
3131     self.op.node_name = node.name
3132     self.node = node
3133
3134   def Exec(self, feedback_fn):
3135     """Removes the node from the cluster.
3136
3137     """
3138     node = self.node
3139     logging.info("Stopping the node daemon and removing configs from node %s",
3140                  node.name)
3141
3142     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3143
3144     # Promote nodes to master candidate as needed
3145     _AdjustCandidatePool(self, exceptions=[node.name])
3146     self.context.RemoveNode(node.name)
3147
3148     # Run post hooks on the node before it's removed
3149     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3150     try:
3151       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3152     except:
3153       # pylint: disable-msg=W0702
3154       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3155
3156     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3157     msg = result.fail_msg
3158     if msg:
3159       self.LogWarning("Errors encountered on the remote node while leaving"
3160                       " the cluster: %s", msg)
3161
3162     # Remove node from our /etc/hosts
3163     if self.cfg.GetClusterInfo().modify_etc_hosts:
3164       # FIXME: this should be done via an rpc call to node daemon
3165       utils.RemoveHostFromEtcHosts(node.name)
3166       _RedistributeAncillaryFiles(self)
3167
3168
3169 class LUQueryNodes(NoHooksLU):
3170   """Logical unit for querying nodes.
3171
3172   """
3173   # pylint: disable-msg=W0142
3174   _OP_REQP = [
3175     ("output_fields", _TListOf(_TNonEmptyString)),
3176     ("names", _TListOf(_TNonEmptyString)),
3177     ("use_locking", _TBool),
3178     ]
3179   REQ_BGL = False
3180
3181   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3182                     "master_candidate", "offline", "drained"]
3183
3184   _FIELDS_DYNAMIC = utils.FieldSet(
3185     "dtotal", "dfree",
3186     "mtotal", "mnode", "mfree",
3187     "bootid",
3188     "ctotal", "cnodes", "csockets",
3189     )
3190
3191   _FIELDS_STATIC = utils.FieldSet(*[
3192     "pinst_cnt", "sinst_cnt",
3193     "pinst_list", "sinst_list",
3194     "pip", "sip", "tags",
3195     "master",
3196     "role"] + _SIMPLE_FIELDS
3197     )
3198
3199   def CheckArguments(self):
3200     _CheckOutputFields(static=self._FIELDS_STATIC,
3201                        dynamic=self._FIELDS_DYNAMIC,
3202                        selected=self.op.output_fields)
3203
3204   def ExpandNames(self):
3205     self.needed_locks = {}
3206     self.share_locks[locking.LEVEL_NODE] = 1
3207
3208     if self.op.names:
3209       self.wanted = _GetWantedNodes(self, self.op.names)
3210     else:
3211       self.wanted = locking.ALL_SET
3212
3213     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3214     self.do_locking = self.do_node_query and self.op.use_locking
3215     if self.do_locking:
3216       # if we don't request only static fields, we need to lock the nodes
3217       self.needed_locks[locking.LEVEL_NODE] = self.wanted
3218
3219   def Exec(self, feedback_fn):
3220     """Computes the list of nodes and their attributes.
3221
3222     """
3223     all_info = self.cfg.GetAllNodesInfo()
3224     if self.do_locking:
3225       nodenames = self.acquired_locks[locking.LEVEL_NODE]
3226     elif self.wanted != locking.ALL_SET:
3227       nodenames = self.wanted
3228       missing = set(nodenames).difference(all_info.keys())
3229       if missing:
3230         raise errors.OpExecError(
3231           "Some nodes were removed before retrieving their data: %s" % missing)
3232     else:
3233       nodenames = all_info.keys()
3234
3235     nodenames = utils.NiceSort(nodenames)
3236     nodelist = [all_info[name] for name in nodenames]
3237
3238     # begin data gathering
3239
3240     if self.do_node_query:
3241       live_data = {}
3242       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3243                                           self.cfg.GetHypervisorType())
3244       for name in nodenames:
3245         nodeinfo = node_data[name]
3246         if not nodeinfo.fail_msg and nodeinfo.payload:
3247           nodeinfo = nodeinfo.payload
3248           fn = utils.TryConvert
3249           live_data[name] = {
3250             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3251             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3252             "mfree": fn(int, nodeinfo.get('memory_free', None)),
3253             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3254             "dfree": fn(int, nodeinfo.get('vg_free', None)),
3255             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3256             "bootid": nodeinfo.get('bootid', None),
3257             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3258             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3259             }
3260         else:
3261           live_data[name] = {}
3262     else:
3263       live_data = dict.fromkeys(nodenames, {})
3264
3265     node_to_primary = dict([(name, set()) for name in nodenames])
3266     node_to_secondary = dict([(name, set()) for name in nodenames])
3267
3268     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3269                              "sinst_cnt", "sinst_list"))
3270     if inst_fields & frozenset(self.op.output_fields):
3271       inst_data = self.cfg.GetAllInstancesInfo()
3272
3273       for inst in inst_data.values():
3274         if inst.primary_node in node_to_primary:
3275           node_to_primary[inst.primary_node].add(inst.name)
3276         for secnode in inst.secondary_nodes:
3277           if secnode in node_to_secondary:
3278             node_to_secondary[secnode].add(inst.name)
3279
3280     master_node = self.cfg.GetMasterNode()
3281
3282     # end data gathering
3283
3284     output = []
3285     for node in nodelist:
3286       node_output = []
3287       for field in self.op.output_fields:
3288         if field in self._SIMPLE_FIELDS:
3289           val = getattr(node, field)
3290         elif field == "pinst_list":
3291           val = list(node_to_primary[node.name])
3292         elif field == "sinst_list":
3293           val = list(node_to_secondary[node.name])
3294         elif field == "pinst_cnt":
3295           val = len(node_to_primary[node.name])
3296         elif field == "sinst_cnt":
3297           val = len(node_to_secondary[node.name])
3298         elif field == "pip":
3299           val = node.primary_ip
3300         elif field == "sip":
3301           val = node.secondary_ip
3302         elif field == "tags":
3303           val = list(node.GetTags())
3304         elif field == "master":
3305           val = node.name == master_node
3306         elif self._FIELDS_DYNAMIC.Matches(field):
3307           val = live_data[node.name].get(field, None)
3308         elif field == "role":
3309           if node.name == master_node:
3310             val = "M"
3311           elif node.master_candidate:
3312             val = "C"
3313           elif node.drained:
3314             val = "D"
3315           elif node.offline:
3316             val = "O"
3317           else:
3318             val = "R"
3319         else:
3320           raise errors.ParameterError(field)
3321         node_output.append(val)
3322       output.append(node_output)
3323
3324     return output
3325
3326
3327 class LUQueryNodeVolumes(NoHooksLU):
3328   """Logical unit for getting volumes on node(s).
3329
3330   """
3331   _OP_REQP = [
3332     ("nodes", _TListOf(_TNonEmptyString)),
3333     ("output_fields", _TListOf(_TNonEmptyString)),
3334     ]
3335   REQ_BGL = False
3336   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3337   _FIELDS_STATIC = utils.FieldSet("node")
3338
3339   def CheckArguments(self):
3340     _CheckOutputFields(static=self._FIELDS_STATIC,
3341                        dynamic=self._FIELDS_DYNAMIC,
3342                        selected=self.op.output_fields)
3343
3344   def ExpandNames(self):
3345     self.needed_locks = {}
3346     self.share_locks[locking.LEVEL_NODE] = 1
3347     if not self.op.nodes:
3348       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3349     else:
3350       self.needed_locks[locking.LEVEL_NODE] = \
3351         _GetWantedNodes(self, self.op.nodes)
3352
3353   def Exec(self, feedback_fn):
3354     """Computes the list of nodes and their attributes.
3355
3356     """
3357     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3358     volumes = self.rpc.call_node_volumes(nodenames)
3359
3360     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3361              in self.cfg.GetInstanceList()]
3362
3363     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3364
3365     output = []
3366     for node in nodenames:
3367       nresult = volumes[node]
3368       if nresult.offline:
3369         continue
3370       msg = nresult.fail_msg
3371       if msg:
3372         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3373         continue
3374
3375       node_vols = nresult.payload[:]
3376       node_vols.sort(key=lambda vol: vol['dev'])
3377
3378       for vol in node_vols:
3379         node_output = []
3380         for field in self.op.output_fields:
3381           if field == "node":
3382             val = node
3383           elif field == "phys":
3384             val = vol['dev']
3385           elif field == "vg":
3386             val = vol['vg']
3387           elif field == "name":
3388             val = vol['name']
3389           elif field == "size":
3390             val = int(float(vol['size']))
3391           elif field == "instance":
3392             for inst in ilist:
3393               if node not in lv_by_node[inst]:
3394                 continue
3395               if vol['name'] in lv_by_node[inst][node]:
3396                 val = inst.name
3397                 break
3398             else:
3399               val = '-'
3400           else:
3401             raise errors.ParameterError(field)
3402           node_output.append(str(val))
3403
3404         output.append(node_output)
3405
3406     return output
3407
3408
3409 class LUQueryNodeStorage(NoHooksLU):
3410   """Logical unit for getting information on storage units on node(s).
3411
3412   """
3413   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3414   _OP_REQP = [
3415     ("nodes", _TListOf(_TNonEmptyString)),
3416     ("storage_type", _CheckStorageType),
3417     ("output_fields", _TListOf(_TNonEmptyString)),
3418     ]
3419   _OP_DEFS = [("name", None)]
3420   REQ_BGL = False
3421
3422   def CheckArguments(self):
3423     _CheckOutputFields(static=self._FIELDS_STATIC,
3424                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3425                        selected=self.op.output_fields)
3426
3427   def ExpandNames(self):
3428     self.needed_locks = {}
3429     self.share_locks[locking.LEVEL_NODE] = 1
3430
3431     if self.op.nodes:
3432       self.needed_locks[locking.LEVEL_NODE] = \
3433         _GetWantedNodes(self, self.op.nodes)
3434     else:
3435       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3436
3437   def Exec(self, feedback_fn):
3438     """Computes the list of nodes and their attributes.
3439
3440     """
3441     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3442
3443     # Always get name to sort by
3444     if constants.SF_NAME in self.op.output_fields:
3445       fields = self.op.output_fields[:]
3446     else:
3447       fields = [constants.SF_NAME] + self.op.output_fields
3448
3449     # Never ask for node or type as it's only known to the LU
3450     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3451       while extra in fields:
3452         fields.remove(extra)
3453
3454     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3455     name_idx = field_idx[constants.SF_NAME]
3456
3457     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3458     data = self.rpc.call_storage_list(self.nodes,
3459                                       self.op.storage_type, st_args,
3460                                       self.op.name, fields)
3461
3462     result = []
3463
3464     for node in utils.NiceSort(self.nodes):
3465       nresult = data[node]
3466       if nresult.offline:
3467         continue
3468
3469       msg = nresult.fail_msg
3470       if msg:
3471         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3472         continue
3473
3474       rows = dict([(row[name_idx], row) for row in nresult.payload])
3475
3476       for name in utils.NiceSort(rows.keys()):
3477         row = rows[name]
3478
3479         out = []
3480
3481         for field in self.op.output_fields:
3482           if field == constants.SF_NODE:
3483             val = node
3484           elif field == constants.SF_TYPE:
3485             val = self.op.storage_type
3486           elif field in field_idx:
3487             val = row[field_idx[field]]
3488           else:
3489             raise errors.ParameterError(field)
3490
3491           out.append(val)
3492
3493         result.append(out)
3494
3495     return result
3496
3497
3498 class LUModifyNodeStorage(NoHooksLU):
3499   """Logical unit for modifying a storage volume on a node.
3500
3501   """
3502   _OP_REQP = [
3503     ("node_name", _TNonEmptyString),
3504     ("storage_type", _CheckStorageType),
3505     ("name", _TNonEmptyString),
3506     ("changes", _TDict),
3507     ]
3508   REQ_BGL = False
3509
3510   def CheckArguments(self):
3511     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3512
3513     storage_type = self.op.storage_type
3514
3515     try:
3516       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3517     except KeyError:
3518       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3519                                  " modified" % storage_type,
3520                                  errors.ECODE_INVAL)
3521
3522     diff = set(self.op.changes.keys()) - modifiable
3523     if diff:
3524       raise errors.OpPrereqError("The following fields can not be modified for"
3525                                  " storage units of type '%s': %r" %
3526                                  (storage_type, list(diff)),
3527                                  errors.ECODE_INVAL)
3528
3529   def ExpandNames(self):
3530     self.needed_locks = {
3531       locking.LEVEL_NODE: self.op.node_name,
3532       }
3533
3534   def Exec(self, feedback_fn):
3535     """Computes the list of nodes and their attributes.
3536
3537     """
3538     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3539     result = self.rpc.call_storage_modify(self.op.node_name,
3540                                           self.op.storage_type, st_args,
3541                                           self.op.name, self.op.changes)
3542     result.Raise("Failed to modify storage unit '%s' on %s" %
3543                  (self.op.name, self.op.node_name))
3544
3545
3546 class LUAddNode(LogicalUnit):
3547   """Logical unit for adding node to the cluster.
3548
3549   """
3550   HPATH = "node-add"
3551   HTYPE = constants.HTYPE_NODE
3552   _OP_REQP = [
3553     ("node_name", _TNonEmptyString),
3554     ]
3555   _OP_DEFS = [("secondary_ip", None)]
3556
3557   def CheckArguments(self):
3558     # validate/normalize the node name
3559     self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3560
3561   def BuildHooksEnv(self):
3562     """Build hooks env.
3563
3564     This will run on all nodes before, and on all nodes + the new node after.
3565
3566     """
3567     env = {
3568       "OP_TARGET": self.op.node_name,
3569       "NODE_NAME": self.op.node_name,
3570       "NODE_PIP": self.op.primary_ip,
3571       "NODE_SIP": self.op.secondary_ip,
3572       }
3573     nodes_0 = self.cfg.GetNodeList()
3574     nodes_1 = nodes_0 + [self.op.node_name, ]
3575     return env, nodes_0, nodes_1
3576
3577   def CheckPrereq(self):
3578     """Check prerequisites.
3579
3580     This checks:
3581      - the new node is not already in the config
3582      - it is resolvable
3583      - its parameters (single/dual homed) matches the cluster
3584
3585     Any errors are signaled by raising errors.OpPrereqError.
3586
3587     """
3588     node_name = self.op.node_name
3589     cfg = self.cfg
3590
3591     dns_data = utils.GetHostInfo(node_name)
3592
3593     node = dns_data.name
3594     primary_ip = self.op.primary_ip = dns_data.ip
3595     if self.op.secondary_ip is None:
3596       self.op.secondary_ip = primary_ip
3597     if not utils.IsValidIP4(self.op.secondary_ip):
3598       raise errors.OpPrereqError("Invalid secondary IP given",
3599                                  errors.ECODE_INVAL)
3600     secondary_ip = self.op.secondary_ip
3601
3602     node_list = cfg.GetNodeList()
3603     if not self.op.readd and node in node_list:
3604       raise errors.OpPrereqError("Node %s is already in the configuration" %
3605                                  node, errors.ECODE_EXISTS)
3606     elif self.op.readd and node not in node_list:
3607       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3608                                  errors.ECODE_NOENT)
3609
3610     self.changed_primary_ip = False
3611
3612     for existing_node_name in node_list:
3613       existing_node = cfg.GetNodeInfo(existing_node_name)
3614
3615       if self.op.readd and node == existing_node_name:
3616         if existing_node.secondary_ip != secondary_ip:
3617           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3618                                      " address configuration as before",
3619                                      errors.ECODE_INVAL)
3620         if existing_node.primary_ip != primary_ip:
3621           self.changed_primary_ip = True
3622
3623         continue
3624
3625       if (existing_node.primary_ip == primary_ip or
3626           existing_node.secondary_ip == primary_ip or
3627           existing_node.primary_ip == secondary_ip or
3628           existing_node.secondary_ip == secondary_ip):
3629         raise errors.OpPrereqError("New node ip address(es) conflict with"
3630                                    " existing node %s" % existing_node.name,
3631                                    errors.ECODE_NOTUNIQUE)
3632
3633     # check that the type of the node (single versus dual homed) is the
3634     # same as for the master
3635     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3636     master_singlehomed = myself.secondary_ip == myself.primary_ip
3637     newbie_singlehomed = secondary_ip == primary_ip
3638     if master_singlehomed != newbie_singlehomed:
3639       if master_singlehomed:
3640         raise errors.OpPrereqError("The master has no private ip but the"
3641                                    " new node has one",
3642                                    errors.ECODE_INVAL)
3643       else:
3644         raise errors.OpPrereqError("The master has a private ip but the"
3645                                    " new node doesn't have one",
3646                                    errors.ECODE_INVAL)
3647
3648     # checks reachability
3649     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3650       raise errors.OpPrereqError("Node not reachable by ping",
3651                                  errors.ECODE_ENVIRON)
3652
3653     if not newbie_singlehomed:
3654       # check reachability from my secondary ip to newbie's secondary ip
3655       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3656                            source=myself.secondary_ip):
3657         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3658                                    " based ping to noded port",
3659                                    errors.ECODE_ENVIRON)
3660
3661     if self.op.readd:
3662       exceptions = [node]
3663     else:
3664       exceptions = []
3665
3666     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3667
3668     if self.op.readd:
3669       self.new_node = self.cfg.GetNodeInfo(node)
3670       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3671     else:
3672       self.new_node = objects.Node(name=node,
3673                                    primary_ip=primary_ip,
3674                                    secondary_ip=secondary_ip,
3675                                    master_candidate=self.master_candidate,
3676                                    offline=False, drained=False)
3677
3678   def Exec(self, feedback_fn):
3679     """Adds the new node to the cluster.
3680
3681     """
3682     new_node = self.new_node
3683     node = new_node.name
3684
3685     # for re-adds, reset the offline/drained/master-candidate flags;
3686     # we need to reset here, otherwise offline would prevent RPC calls
3687     # later in the procedure; this also means that if the re-add
3688     # fails, we are left with a non-offlined, broken node
3689     if self.op.readd:
3690       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3691       self.LogInfo("Readding a node, the offline/drained flags were reset")
3692       # if we demote the node, we do cleanup later in the procedure
3693       new_node.master_candidate = self.master_candidate
3694       if self.changed_primary_ip:
3695         new_node.primary_ip = self.op.primary_ip
3696
3697     # notify the user about any possible mc promotion
3698     if new_node.master_candidate:
3699       self.LogInfo("Node will be a master candidate")
3700
3701     # check connectivity
3702     result = self.rpc.call_version([node])[node]
3703     result.Raise("Can't get version information from node %s" % node)
3704     if constants.PROTOCOL_VERSION == result.payload:
3705       logging.info("Communication to node %s fine, sw version %s match",
3706                    node, result.payload)
3707     else:
3708       raise errors.OpExecError("Version mismatch master version %s,"
3709                                " node version %s" %
3710                                (constants.PROTOCOL_VERSION, result.payload))
3711
3712     # setup ssh on node
3713     if self.cfg.GetClusterInfo().modify_ssh_setup:
3714       logging.info("Copy ssh key to node %s", node)
3715       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3716       keyarray = []
3717       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3718                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3719                   priv_key, pub_key]
3720
3721       for i in keyfiles:
3722         keyarray.append(utils.ReadFile(i))
3723
3724       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3725                                       keyarray[2], keyarray[3], keyarray[4],
3726                                       keyarray[5])
3727       result.Raise("Cannot transfer ssh keys to the new node")
3728
3729     # Add node to our /etc/hosts, and add key to known_hosts
3730     if self.cfg.GetClusterInfo().modify_etc_hosts:
3731       # FIXME: this should be done via an rpc call to node daemon
3732       utils.AddHostToEtcHosts(new_node.name)
3733
3734     if new_node.secondary_ip != new_node.primary_ip:
3735       result = self.rpc.call_node_has_ip_address(new_node.name,
3736                                                  new_node.secondary_ip)
3737       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3738                    prereq=True, ecode=errors.ECODE_ENVIRON)
3739       if not result.payload:
3740         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3741                                  " you gave (%s). Please fix and re-run this"
3742                                  " command." % new_node.secondary_ip)
3743
3744     node_verify_list = [self.cfg.GetMasterNode()]
3745     node_verify_param = {
3746       constants.NV_NODELIST: [node],
3747       # TODO: do a node-net-test as well?
3748     }
3749
3750     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3751                                        self.cfg.GetClusterName())
3752     for verifier in node_verify_list:
3753       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3754       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3755       if nl_payload:
3756         for failed in nl_payload:
3757           feedback_fn("ssh/hostname verification failed"
3758                       " (checking from %s): %s" %
3759                       (verifier, nl_payload[failed]))
3760         raise errors.OpExecError("ssh/hostname verification failed.")
3761
3762     if self.op.readd:
3763       _RedistributeAncillaryFiles(self)
3764       self.context.ReaddNode(new_node)
3765       # make sure we redistribute the config
3766       self.cfg.Update(new_node, feedback_fn)
3767       # and make sure the new node will not have old files around
3768       if not new_node.master_candidate:
3769         result = self.rpc.call_node_demote_from_mc(new_node.name)
3770         msg = result.fail_msg
3771         if msg:
3772           self.LogWarning("Node failed to demote itself from master"
3773                           " candidate status: %s" % msg)
3774     else:
3775       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3776       self.context.AddNode(new_node, self.proc.GetECId())
3777
3778
3779 class LUSetNodeParams(LogicalUnit):
3780   """Modifies the parameters of a node.
3781
3782   """
3783   HPATH = "node-modify"
3784   HTYPE = constants.HTYPE_NODE
3785   _OP_REQP = [("node_name", _TNonEmptyString)]
3786   REQ_BGL = False
3787
3788   def CheckArguments(self):
3789     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3790     _CheckBooleanOpField(self.op, 'master_candidate')
3791     _CheckBooleanOpField(self.op, 'offline')
3792     _CheckBooleanOpField(self.op, 'drained')
3793     _CheckBooleanOpField(self.op, 'auto_promote')
3794     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3795     if all_mods.count(None) == 3:
3796       raise errors.OpPrereqError("Please pass at least one modification",
3797                                  errors.ECODE_INVAL)
3798     if all_mods.count(True) > 1:
3799       raise errors.OpPrereqError("Can't set the node into more than one"
3800                                  " state at the same time",
3801                                  errors.ECODE_INVAL)
3802
3803     # Boolean value that tells us whether we're offlining or draining the node
3804     self.offline_or_drain = (self.op.offline == True or
3805                              self.op.drained == True)
3806     self.deoffline_or_drain = (self.op.offline == False or
3807                                self.op.drained == False)
3808     self.might_demote = (self.op.master_candidate == False or
3809                          self.offline_or_drain)
3810
3811     self.lock_all = self.op.auto_promote and self.might_demote
3812
3813
3814   def ExpandNames(self):
3815     if self.lock_all:
3816       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3817     else:
3818       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3819
3820   def BuildHooksEnv(self):
3821     """Build hooks env.
3822
3823     This runs on the master node.
3824
3825     """
3826     env = {
3827       "OP_TARGET": self.op.node_name,
3828       "MASTER_CANDIDATE": str(self.op.master_candidate),
3829       "OFFLINE": str(self.op.offline),
3830       "DRAINED": str(self.op.drained),
3831       }
3832     nl = [self.cfg.GetMasterNode(),
3833           self.op.node_name]
3834     return env, nl, nl
3835
3836   def CheckPrereq(self):
3837     """Check prerequisites.
3838
3839     This only checks the instance list against the existing names.
3840
3841     """
3842     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3843
3844     if (self.op.master_candidate is not None or
3845         self.op.drained is not None or
3846         self.op.offline is not None):
3847       # we can't change the master's node flags
3848       if self.op.node_name == self.cfg.GetMasterNode():
3849         raise errors.OpPrereqError("The master role can be changed"
3850                                    " only via masterfailover",
3851                                    errors.ECODE_INVAL)
3852
3853
3854     if node.master_candidate and self.might_demote and not self.lock_all:
3855       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3856       # check if after removing the current node, we're missing master
3857       # candidates
3858       (mc_remaining, mc_should, _) = \
3859           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3860       if mc_remaining < mc_should:
3861         raise errors.OpPrereqError("Not enough master candidates, please"
3862                                    " pass auto_promote to allow promotion",
3863                                    errors.ECODE_INVAL)
3864
3865     if (self.op.master_candidate == True and
3866         ((node.offline and not self.op.offline == False) or
3867          (node.drained and not self.op.drained == False))):
3868       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3869                                  " to master_candidate" % node.name,
3870                                  errors.ECODE_INVAL)
3871
3872     # If we're being deofflined/drained, we'll MC ourself if needed
3873     if (self.deoffline_or_drain and not self.offline_or_drain and not
3874         self.op.master_candidate == True and not node.master_candidate):
3875       self.op.master_candidate = _DecideSelfPromotion(self)
3876       if self.op.master_candidate:
3877         self.LogInfo("Autopromoting node to master candidate")
3878
3879     return
3880
3881   def Exec(self, feedback_fn):
3882     """Modifies a node.
3883
3884     """
3885     node = self.node
3886
3887     result = []
3888     changed_mc = False
3889
3890     if self.op.offline is not None:
3891       node.offline = self.op.offline
3892       result.append(("offline", str(self.op.offline)))
3893       if self.op.offline == True:
3894         if node.master_candidate:
3895           node.master_candidate = False
3896           changed_mc = True
3897           result.append(("master_candidate", "auto-demotion due to offline"))
3898         if node.drained:
3899           node.drained = False
3900           result.append(("drained", "clear drained status due to offline"))
3901
3902     if self.op.master_candidate is not None:
3903       node.master_candidate = self.op.master_candidate
3904       changed_mc = True
3905       result.append(("master_candidate", str(self.op.master_candidate)))
3906       if self.op.master_candidate == False:
3907         rrc = self.rpc.call_node_demote_from_mc(node.name)
3908         msg = rrc.fail_msg
3909         if msg:
3910           self.LogWarning("Node failed to demote itself: %s" % msg)
3911
3912     if self.op.drained is not None:
3913       node.drained = self.op.drained
3914       result.append(("drained", str(self.op.drained)))
3915       if self.op.drained == True:
3916         if node.master_candidate:
3917           node.master_candidate = False
3918           changed_mc = True
3919           result.append(("master_candidate", "auto-demotion due to drain"))
3920           rrc = self.rpc.call_node_demote_from_mc(node.name)
3921           msg = rrc.fail_msg
3922           if msg:
3923             self.LogWarning("Node failed to demote itself: %s" % msg)
3924         if node.offline:
3925           node.offline = False
3926           result.append(("offline", "clear offline status due to drain"))
3927
3928     # we locked all nodes, we adjust the CP before updating this node
3929     if self.lock_all:
3930       _AdjustCandidatePool(self, [node.name])
3931
3932     # this will trigger configuration file update, if needed
3933     self.cfg.Update(node, feedback_fn)
3934
3935     # this will trigger job queue propagation or cleanup
3936     if changed_mc:
3937       self.context.ReaddNode(node)
3938
3939     return result
3940
3941
3942 class LUPowercycleNode(NoHooksLU):
3943   """Powercycles a node.
3944
3945   """
3946   _OP_REQP = [
3947     ("node_name", _TNonEmptyString),
3948     ("force", _TBool),
3949     ]
3950   REQ_BGL = False
3951
3952   def CheckArguments(self):
3953     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3954     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3955       raise errors.OpPrereqError("The node is the master and the force"
3956                                  " parameter was not set",
3957                                  errors.ECODE_INVAL)
3958
3959   def ExpandNames(self):
3960     """Locking for PowercycleNode.
3961
3962     This is a last-resort option and shouldn't block on other
3963     jobs. Therefore, we grab no locks.
3964
3965     """
3966     self.needed_locks = {}
3967
3968   def Exec(self, feedback_fn):
3969     """Reboots a node.
3970
3971     """
3972     result = self.rpc.call_node_powercycle(self.op.node_name,
3973                                            self.cfg.GetHypervisorType())
3974     result.Raise("Failed to schedule the reboot")
3975     return result.payload
3976
3977
3978 class LUQueryClusterInfo(NoHooksLU):
3979   """Query cluster configuration.
3980
3981   """
3982   _OP_REQP = []
3983   REQ_BGL = False
3984
3985   def ExpandNames(self):
3986     self.needed_locks = {}
3987
3988   def Exec(self, feedback_fn):
3989     """Return cluster config.
3990
3991     """
3992     cluster = self.cfg.GetClusterInfo()
3993     os_hvp = {}
3994
3995     # Filter just for enabled hypervisors
3996     for os_name, hv_dict in cluster.os_hvp.items():
3997       os_hvp[os_name] = {}
3998       for hv_name, hv_params in hv_dict.items():
3999         if hv_name in cluster.enabled_hypervisors:
4000           os_hvp[os_name][hv_name] = hv_params
4001
4002     result = {
4003       "software_version": constants.RELEASE_VERSION,
4004       "protocol_version": constants.PROTOCOL_VERSION,
4005       "config_version": constants.CONFIG_VERSION,
4006       "os_api_version": max(constants.OS_API_VERSIONS),
4007       "export_version": constants.EXPORT_VERSION,
4008       "architecture": (platform.architecture()[0], platform.machine()),
4009       "name": cluster.cluster_name,
4010       "master": cluster.master_node,
4011       "default_hypervisor": cluster.enabled_hypervisors[0],
4012       "enabled_hypervisors": cluster.enabled_hypervisors,
4013       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4014                         for hypervisor_name in cluster.enabled_hypervisors]),
4015       "os_hvp": os_hvp,
4016       "beparams": cluster.beparams,
4017       "osparams": cluster.osparams,
4018       "nicparams": cluster.nicparams,
4019       "candidate_pool_size": cluster.candidate_pool_size,
4020       "master_netdev": cluster.master_netdev,
4021       "volume_group_name": cluster.volume_group_name,
4022       "file_storage_dir": cluster.file_storage_dir,
4023       "maintain_node_health": cluster.maintain_node_health,
4024       "ctime": cluster.ctime,
4025       "mtime": cluster.mtime,
4026       "uuid": cluster.uuid,
4027       "tags": list(cluster.GetTags()),
4028       "uid_pool": cluster.uid_pool,
4029       }
4030
4031     return result
4032
4033
4034 class LUQueryConfigValues(NoHooksLU):
4035   """Return configuration values.
4036
4037   """
4038   _OP_REQP = []
4039   REQ_BGL = False
4040   _FIELDS_DYNAMIC = utils.FieldSet()
4041   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4042                                   "watcher_pause")
4043
4044   def CheckArguments(self):
4045     _CheckOutputFields(static=self._FIELDS_STATIC,
4046                        dynamic=self._FIELDS_DYNAMIC,
4047                        selected=self.op.output_fields)
4048
4049   def ExpandNames(self):
4050     self.needed_locks = {}
4051
4052   def Exec(self, feedback_fn):
4053     """Dump a representation of the cluster config to the standard output.
4054
4055     """
4056     values = []
4057     for field in self.op.output_fields:
4058       if field == "cluster_name":
4059         entry = self.cfg.GetClusterName()
4060       elif field == "master_node":
4061         entry = self.cfg.GetMasterNode()
4062       elif field == "drain_flag":
4063         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4064       elif field == "watcher_pause":
4065         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4066       else:
4067         raise errors.ParameterError(field)
4068       values.append(entry)
4069     return values
4070
4071
4072 class LUActivateInstanceDisks(NoHooksLU):
4073   """Bring up an instance's disks.
4074
4075   """
4076   _OP_REQP = [("instance_name", _TNonEmptyString)]
4077   _OP_DEFS = [("ignore_size", False)]
4078   REQ_BGL = False
4079
4080   def ExpandNames(self):
4081     self._ExpandAndLockInstance()
4082     self.needed_locks[locking.LEVEL_NODE] = []
4083     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4084
4085   def DeclareLocks(self, level):
4086     if level == locking.LEVEL_NODE:
4087       self._LockInstancesNodes()
4088
4089   def CheckPrereq(self):
4090     """Check prerequisites.
4091
4092     This checks that the instance is in the cluster.
4093
4094     """
4095     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4096     assert self.instance is not None, \
4097       "Cannot retrieve locked instance %s" % self.op.instance_name
4098     _CheckNodeOnline(self, self.instance.primary_node)
4099
4100   def Exec(self, feedback_fn):
4101     """Activate the disks.
4102
4103     """
4104     disks_ok, disks_info = \
4105               _AssembleInstanceDisks(self, self.instance,
4106                                      ignore_size=self.op.ignore_size)
4107     if not disks_ok:
4108       raise errors.OpExecError("Cannot activate block devices")
4109
4110     return disks_info
4111
4112
4113 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4114                            ignore_size=False):
4115   """Prepare the block devices for an instance.
4116
4117   This sets up the block devices on all nodes.
4118
4119   @type lu: L{LogicalUnit}
4120   @param lu: the logical unit on whose behalf we execute
4121   @type instance: L{objects.Instance}
4122   @param instance: the instance for whose disks we assemble
4123   @type disks: list of L{objects.Disk} or None
4124   @param disks: which disks to assemble (or all, if None)
4125   @type ignore_secondaries: boolean
4126   @param ignore_secondaries: if true, errors on secondary nodes
4127       won't result in an error return from the function
4128   @type ignore_size: boolean
4129   @param ignore_size: if true, the current known size of the disk
4130       will not be used during the disk activation, useful for cases
4131       when the size is wrong
4132   @return: False if the operation failed, otherwise a list of
4133       (host, instance_visible_name, node_visible_name)
4134       with the mapping from node devices to instance devices
4135
4136   """
4137   device_info = []
4138   disks_ok = True
4139   iname = instance.name
4140   disks = _ExpandCheckDisks(instance, disks)
4141
4142   # With the two passes mechanism we try to reduce the window of
4143   # opportunity for the race condition of switching DRBD to primary
4144   # before handshaking occured, but we do not eliminate it
4145
4146   # The proper fix would be to wait (with some limits) until the
4147   # connection has been made and drbd transitions from WFConnection
4148   # into any other network-connected state (Connected, SyncTarget,
4149   # SyncSource, etc.)
4150
4151   # 1st pass, assemble on all nodes in secondary mode
4152   for inst_disk in disks:
4153     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4154       if ignore_size:
4155         node_disk = node_disk.Copy()
4156         node_disk.UnsetSize()
4157       lu.cfg.SetDiskID(node_disk, node)
4158       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4159       msg = result.fail_msg
4160       if msg:
4161         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4162                            " (is_primary=False, pass=1): %s",
4163                            inst_disk.iv_name, node, msg)
4164         if not ignore_secondaries:
4165           disks_ok = False
4166
4167   # FIXME: race condition on drbd migration to primary
4168
4169   # 2nd pass, do only the primary node
4170   for inst_disk in disks:
4171     dev_path = None
4172
4173     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4174       if node != instance.primary_node:
4175         continue
4176       if ignore_size:
4177         node_disk = node_disk.Copy()
4178         node_disk.UnsetSize()
4179       lu.cfg.SetDiskID(node_disk, node)
4180       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4181       msg = result.fail_msg
4182       if msg:
4183         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4184                            " (is_primary=True, pass=2): %s",
4185                            inst_disk.iv_name, node, msg)
4186         disks_ok = False
4187       else:
4188         dev_path = result.payload
4189
4190     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4191
4192   # leave the disks configured for the primary node
4193   # this is a workaround that would be fixed better by
4194   # improving the logical/physical id handling
4195   for disk in disks:
4196     lu.cfg.SetDiskID(disk, instance.primary_node)
4197
4198   return disks_ok, device_info
4199
4200
4201 def _StartInstanceDisks(lu, instance, force):
4202   """Start the disks of an instance.
4203
4204   """
4205   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4206                                            ignore_secondaries=force)
4207   if not disks_ok:
4208     _ShutdownInstanceDisks(lu, instance)
4209     if force is not None and not force:
4210       lu.proc.LogWarning("", hint="If the message above refers to a"
4211                          " secondary node,"
4212                          " you can retry the operation using '--force'.")
4213     raise errors.OpExecError("Disk consistency error")
4214
4215
4216 class LUDeactivateInstanceDisks(NoHooksLU):
4217   """Shutdown an instance's disks.
4218
4219   """
4220   _OP_REQP = [("instance_name", _TNonEmptyString)]
4221   REQ_BGL = False
4222
4223   def ExpandNames(self):
4224     self._ExpandAndLockInstance()
4225     self.needed_locks[locking.LEVEL_NODE] = []
4226     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4227
4228   def DeclareLocks(self, level):
4229     if level == locking.LEVEL_NODE:
4230       self._LockInstancesNodes()
4231
4232   def CheckPrereq(self):
4233     """Check prerequisites.
4234
4235     This checks that the instance is in the cluster.
4236
4237     """
4238     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4239     assert self.instance is not None, \
4240       "Cannot retrieve locked instance %s" % self.op.instance_name
4241
4242   def Exec(self, feedback_fn):
4243     """Deactivate the disks
4244
4245     """
4246     instance = self.instance
4247     _SafeShutdownInstanceDisks(self, instance)
4248
4249
4250 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4251   """Shutdown block devices of an instance.
4252
4253   This function checks if an instance is running, before calling
4254   _ShutdownInstanceDisks.
4255
4256   """
4257   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4258   _ShutdownInstanceDisks(lu, instance, disks=disks)
4259
4260
4261 def _ExpandCheckDisks(instance, disks):
4262   """Return the instance disks selected by the disks list
4263
4264   @type disks: list of L{objects.Disk} or None
4265   @param disks: selected disks
4266   @rtype: list of L{objects.Disk}
4267   @return: selected instance disks to act on
4268
4269   """
4270   if disks is None:
4271     return instance.disks
4272   else:
4273     if not set(disks).issubset(instance.disks):
4274       raise errors.ProgrammerError("Can only act on disks belonging to the"
4275                                    " target instance")
4276     return disks
4277
4278
4279 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4280   """Shutdown block devices of an instance.
4281
4282   This does the shutdown on all nodes of the instance.
4283
4284   If the ignore_primary is false, errors on the primary node are
4285   ignored.
4286
4287   """
4288   all_result = True
4289   disks = _ExpandCheckDisks(instance, disks)
4290
4291   for disk in disks:
4292     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4293       lu.cfg.SetDiskID(top_disk, node)
4294       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4295       msg = result.fail_msg
4296       if msg:
4297         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4298                       disk.iv_name, node, msg)
4299         if not ignore_primary or node != instance.primary_node:
4300           all_result = False
4301   return all_result
4302
4303
4304 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4305   """Checks if a node has enough free memory.
4306
4307   This function check if a given node has the needed amount of free
4308   memory. In case the node has less memory or we cannot get the
4309   information from the node, this function raise an OpPrereqError
4310   exception.
4311
4312   @type lu: C{LogicalUnit}
4313   @param lu: a logical unit from which we get configuration data
4314   @type node: C{str}
4315   @param node: the node to check
4316   @type reason: C{str}
4317   @param reason: string to use in the error message
4318   @type requested: C{int}
4319   @param requested: the amount of memory in MiB to check for
4320   @type hypervisor_name: C{str}
4321   @param hypervisor_name: the hypervisor to ask for memory stats
4322   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4323       we cannot check the node
4324
4325   """
4326   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4327   nodeinfo[node].Raise("Can't get data from node %s" % node,
4328                        prereq=True, ecode=errors.ECODE_ENVIRON)
4329   free_mem = nodeinfo[node].payload.get('memory_free', None)
4330   if not isinstance(free_mem, int):
4331     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4332                                " was '%s'" % (node, free_mem),
4333                                errors.ECODE_ENVIRON)
4334   if requested > free_mem:
4335     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4336                                " needed %s MiB, available %s MiB" %
4337                                (node, reason, requested, free_mem),
4338                                errors.ECODE_NORES)
4339
4340
4341 def _CheckNodesFreeDisk(lu, nodenames, requested):
4342   """Checks if nodes have enough free disk space in the default VG.
4343
4344   This function check if all given nodes have the needed amount of
4345   free disk. In case any node has less disk or we cannot get the
4346   information from the node, this function raise an OpPrereqError
4347   exception.
4348
4349   @type lu: C{LogicalUnit}
4350   @param lu: a logical unit from which we get configuration data
4351   @type nodenames: C{list}
4352   @param nodenames: the list of node names to check
4353   @type requested: C{int}
4354   @param requested: the amount of disk in MiB to check for
4355   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4356       we cannot check the node
4357
4358   """
4359   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4360                                    lu.cfg.GetHypervisorType())
4361   for node in nodenames:
4362     info = nodeinfo[node]
4363     info.Raise("Cannot get current information from node %s" % node,
4364                prereq=True, ecode=errors.ECODE_ENVIRON)
4365     vg_free = info.payload.get("vg_free", None)
4366     if not isinstance(vg_free, int):
4367       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4368                                  " result was '%s'" % (node, vg_free),
4369                                  errors.ECODE_ENVIRON)
4370     if requested > vg_free:
4371       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4372                                  " required %d MiB, available %d MiB" %
4373                                  (node, requested, vg_free),
4374                                  errors.ECODE_NORES)
4375
4376
4377 class LUStartupInstance(LogicalUnit):
4378   """Starts an instance.
4379
4380   """
4381   HPATH = "instance-start"
4382   HTYPE = constants.HTYPE_INSTANCE
4383   _OP_REQP = [
4384     ("instance_name", _TNonEmptyString),
4385     ("force", _TBool),
4386     ("beparams", _TDict),
4387     ("hvparams", _TDict),
4388     ]
4389   _OP_DEFS = [
4390     ("beparams", _EmptyDict),
4391     ("hvparams", _EmptyDict),
4392     ]
4393   REQ_BGL = False
4394
4395   def CheckArguments(self):
4396     # extra beparams
4397     if self.op.beparams:
4398       # fill the beparams dict
4399       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4400
4401   def ExpandNames(self):
4402     self._ExpandAndLockInstance()
4403
4404   def BuildHooksEnv(self):
4405     """Build hooks env.
4406
4407     This runs on master, primary and secondary nodes of the instance.
4408
4409     """
4410     env = {
4411       "FORCE": self.op.force,
4412       }
4413     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4414     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4415     return env, nl, nl
4416
4417   def CheckPrereq(self):
4418     """Check prerequisites.
4419
4420     This checks that the instance is in the cluster.
4421
4422     """
4423     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4424     assert self.instance is not None, \
4425       "Cannot retrieve locked instance %s" % self.op.instance_name
4426
4427     # extra hvparams
4428     if self.op.hvparams:
4429       # check hypervisor parameter syntax (locally)
4430       cluster = self.cfg.GetClusterInfo()
4431       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4432       filled_hvp = cluster.FillHV(instance)
4433       filled_hvp.update(self.op.hvparams)
4434       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4435       hv_type.CheckParameterSyntax(filled_hvp)
4436       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4437
4438     _CheckNodeOnline(self, instance.primary_node)
4439
4440     bep = self.cfg.GetClusterInfo().FillBE(instance)
4441     # check bridges existence
4442     _CheckInstanceBridgesExist(self, instance)
4443
4444     remote_info = self.rpc.call_instance_info(instance.primary_node,
4445                                               instance.name,
4446                                               instance.hypervisor)
4447     remote_info.Raise("Error checking node %s" % instance.primary_node,
4448                       prereq=True, ecode=errors.ECODE_ENVIRON)
4449     if not remote_info.payload: # not running already
4450       _CheckNodeFreeMemory(self, instance.primary_node,
4451                            "starting instance %s" % instance.name,
4452                            bep[constants.BE_MEMORY], instance.hypervisor)
4453
4454   def Exec(self, feedback_fn):
4455     """Start the instance.
4456
4457     """
4458     instance = self.instance
4459     force = self.op.force
4460
4461     self.cfg.MarkInstanceUp(instance.name)
4462
4463     node_current = instance.primary_node
4464
4465     _StartInstanceDisks(self, instance, force)
4466
4467     result = self.rpc.call_instance_start(node_current, instance,
4468                                           self.op.hvparams, self.op.beparams)
4469     msg = result.fail_msg
4470     if msg:
4471       _ShutdownInstanceDisks(self, instance)
4472       raise errors.OpExecError("Could not start instance: %s" % msg)
4473
4474
4475 class LURebootInstance(LogicalUnit):
4476   """Reboot an instance.
4477
4478   """
4479   HPATH = "instance-reboot"
4480   HTYPE = constants.HTYPE_INSTANCE
4481   _OP_REQP = [
4482     ("instance_name", _TNonEmptyString),
4483     ("ignore_secondaries", _TBool),
4484     ("reboot_type", _TElemOf(constants.REBOOT_TYPES)),
4485     ]
4486   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4487   REQ_BGL = False
4488
4489   def ExpandNames(self):
4490     self._ExpandAndLockInstance()
4491
4492   def BuildHooksEnv(self):
4493     """Build hooks env.
4494
4495     This runs on master, primary and secondary nodes of the instance.
4496
4497     """
4498     env = {
4499       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4500       "REBOOT_TYPE": self.op.reboot_type,
4501       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4502       }
4503     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4504     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4505     return env, nl, nl
4506
4507   def CheckPrereq(self):
4508     """Check prerequisites.
4509
4510     This checks that the instance is in the cluster.
4511
4512     """
4513     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4514     assert self.instance is not None, \
4515       "Cannot retrieve locked instance %s" % self.op.instance_name
4516
4517     _CheckNodeOnline(self, instance.primary_node)
4518
4519     # check bridges existence
4520     _CheckInstanceBridgesExist(self, instance)
4521
4522   def Exec(self, feedback_fn):
4523     """Reboot the instance.
4524
4525     """
4526     instance = self.instance
4527     ignore_secondaries = self.op.ignore_secondaries
4528     reboot_type = self.op.reboot_type
4529
4530     node_current = instance.primary_node
4531
4532     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4533                        constants.INSTANCE_REBOOT_HARD]:
4534       for disk in instance.disks:
4535         self.cfg.SetDiskID(disk, node_current)
4536       result = self.rpc.call_instance_reboot(node_current, instance,
4537                                              reboot_type,
4538                                              self.op.shutdown_timeout)
4539       result.Raise("Could not reboot instance")
4540     else:
4541       result = self.rpc.call_instance_shutdown(node_current, instance,
4542                                                self.op.shutdown_timeout)
4543       result.Raise("Could not shutdown instance for full reboot")
4544       _ShutdownInstanceDisks(self, instance)
4545       _StartInstanceDisks(self, instance, ignore_secondaries)
4546       result = self.rpc.call_instance_start(node_current, instance, None, None)
4547       msg = result.fail_msg
4548       if msg:
4549         _ShutdownInstanceDisks(self, instance)
4550         raise errors.OpExecError("Could not start instance for"
4551                                  " full reboot: %s" % msg)
4552
4553     self.cfg.MarkInstanceUp(instance.name)
4554
4555
4556 class LUShutdownInstance(LogicalUnit):
4557   """Shutdown an instance.
4558
4559   """
4560   HPATH = "instance-stop"
4561   HTYPE = constants.HTYPE_INSTANCE
4562   _OP_REQP = [("instance_name", _TNonEmptyString)]
4563   _OP_DEFS = [("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4564   REQ_BGL = False
4565
4566   def ExpandNames(self):
4567     self._ExpandAndLockInstance()
4568
4569   def BuildHooksEnv(self):
4570     """Build hooks env.
4571
4572     This runs on master, primary and secondary nodes of the instance.
4573
4574     """
4575     env = _BuildInstanceHookEnvByObject(self, self.instance)
4576     env["TIMEOUT"] = self.op.timeout
4577     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4578     return env, nl, nl
4579
4580   def CheckPrereq(self):
4581     """Check prerequisites.
4582
4583     This checks that the instance is in the cluster.
4584
4585     """
4586     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4587     assert self.instance is not None, \
4588       "Cannot retrieve locked instance %s" % self.op.instance_name
4589     _CheckNodeOnline(self, self.instance.primary_node)
4590
4591   def Exec(self, feedback_fn):
4592     """Shutdown the instance.
4593
4594     """
4595     instance = self.instance
4596     node_current = instance.primary_node
4597     timeout = self.op.timeout
4598     self.cfg.MarkInstanceDown(instance.name)
4599     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4600     msg = result.fail_msg
4601     if msg:
4602       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4603
4604     _ShutdownInstanceDisks(self, instance)
4605
4606
4607 class LUReinstallInstance(LogicalUnit):
4608   """Reinstall an instance.
4609
4610   """
4611   HPATH = "instance-reinstall"
4612   HTYPE = constants.HTYPE_INSTANCE
4613   _OP_REQP = [("instance_name", _TNonEmptyString)]
4614   _OP_DEFS = [
4615     ("os_type", None),
4616     ("force_variant", False),
4617     ]
4618   REQ_BGL = False
4619
4620   def ExpandNames(self):
4621     self._ExpandAndLockInstance()
4622
4623   def BuildHooksEnv(self):
4624     """Build hooks env.
4625
4626     This runs on master, primary and secondary nodes of the instance.
4627
4628     """
4629     env = _BuildInstanceHookEnvByObject(self, self.instance)
4630     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4631     return env, nl, nl
4632
4633   def CheckPrereq(self):
4634     """Check prerequisites.
4635
4636     This checks that the instance is in the cluster and is not running.
4637
4638     """
4639     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4640     assert instance is not None, \
4641       "Cannot retrieve locked instance %s" % self.op.instance_name
4642     _CheckNodeOnline(self, instance.primary_node)
4643
4644     if instance.disk_template == constants.DT_DISKLESS:
4645       raise errors.OpPrereqError("Instance '%s' has no disks" %
4646                                  self.op.instance_name,
4647                                  errors.ECODE_INVAL)
4648     _CheckInstanceDown(self, instance, "cannot reinstall")
4649
4650     if self.op.os_type is not None:
4651       # OS verification
4652       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4653       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4654
4655     self.instance = instance
4656
4657   def Exec(self, feedback_fn):
4658     """Reinstall the instance.
4659
4660     """
4661     inst = self.instance
4662
4663     if self.op.os_type is not None:
4664       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4665       inst.os = self.op.os_type
4666       self.cfg.Update(inst, feedback_fn)
4667
4668     _StartInstanceDisks(self, inst, None)
4669     try:
4670       feedback_fn("Running the instance OS create scripts...")
4671       # FIXME: pass debug option from opcode to backend
4672       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4673                                              self.op.debug_level)
4674       result.Raise("Could not install OS for instance %s on node %s" %
4675                    (inst.name, inst.primary_node))
4676     finally:
4677       _ShutdownInstanceDisks(self, inst)
4678
4679
4680 class LURecreateInstanceDisks(LogicalUnit):
4681   """Recreate an instance's missing disks.
4682
4683   """
4684   HPATH = "instance-recreate-disks"
4685   HTYPE = constants.HTYPE_INSTANCE
4686   _OP_REQP = [
4687     ("instance_name", _TNonEmptyString),
4688     ("disks", _TListOf(_TPositiveInt)),
4689     ]
4690   REQ_BGL = False
4691
4692   def ExpandNames(self):
4693     self._ExpandAndLockInstance()
4694
4695   def BuildHooksEnv(self):
4696     """Build hooks env.
4697
4698     This runs on master, primary and secondary nodes of the instance.
4699
4700     """
4701     env = _BuildInstanceHookEnvByObject(self, self.instance)
4702     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4703     return env, nl, nl
4704
4705   def CheckPrereq(self):
4706     """Check prerequisites.
4707
4708     This checks that the instance is in the cluster and is not running.
4709
4710     """
4711     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4712     assert instance is not None, \
4713       "Cannot retrieve locked instance %s" % self.op.instance_name
4714     _CheckNodeOnline(self, instance.primary_node)
4715
4716     if instance.disk_template == constants.DT_DISKLESS:
4717       raise errors.OpPrereqError("Instance '%s' has no disks" %
4718                                  self.op.instance_name, errors.ECODE_INVAL)
4719     _CheckInstanceDown(self, instance, "cannot recreate disks")
4720
4721     if not self.op.disks:
4722       self.op.disks = range(len(instance.disks))
4723     else:
4724       for idx in self.op.disks:
4725         if idx >= len(instance.disks):
4726           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4727                                      errors.ECODE_INVAL)
4728
4729     self.instance = instance
4730
4731   def Exec(self, feedback_fn):
4732     """Recreate the disks.
4733
4734     """
4735     to_skip = []
4736     for idx, _ in enumerate(self.instance.disks):
4737       if idx not in self.op.disks: # disk idx has not been passed in
4738         to_skip.append(idx)
4739         continue
4740
4741     _CreateDisks(self, self.instance, to_skip=to_skip)
4742
4743
4744 class LURenameInstance(LogicalUnit):
4745   """Rename an instance.
4746
4747   """
4748   HPATH = "instance-rename"
4749   HTYPE = constants.HTYPE_INSTANCE
4750   _OP_REQP = [
4751     ("instance_name", _TNonEmptyString),
4752     ("new_name", _TNonEmptyString),
4753     ("ignore_ip", _TBool),
4754     ("check_name", _TBool),
4755     ]
4756   _OP_DEFS = [("ignore_ip", False), ("check_name", True)]
4757
4758   def BuildHooksEnv(self):
4759     """Build hooks env.
4760
4761     This runs on master, primary and secondary nodes of the instance.
4762
4763     """
4764     env = _BuildInstanceHookEnvByObject(self, self.instance)
4765     env["INSTANCE_NEW_NAME"] = self.op.new_name
4766     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4767     return env, nl, nl
4768
4769   def CheckPrereq(self):
4770     """Check prerequisites.
4771
4772     This checks that the instance is in the cluster and is not running.
4773
4774     """
4775     self.op.instance_name = _ExpandInstanceName(self.cfg,
4776                                                 self.op.instance_name)
4777     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4778     assert instance is not None
4779     _CheckNodeOnline(self, instance.primary_node)
4780     _CheckInstanceDown(self, instance, "cannot rename")
4781     self.instance = instance
4782
4783     # new name verification
4784     if self.op.check_name:
4785       name_info = utils.GetHostInfo(self.op.new_name)
4786       self.op.new_name = name_info.name
4787
4788     new_name = self.op.new_name
4789
4790     instance_list = self.cfg.GetInstanceList()
4791     if new_name in instance_list:
4792       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4793                                  new_name, errors.ECODE_EXISTS)
4794
4795     if not self.op.ignore_ip:
4796       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4797         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4798                                    (name_info.ip, new_name),
4799                                    errors.ECODE_NOTUNIQUE)
4800
4801   def Exec(self, feedback_fn):
4802     """Reinstall the instance.
4803
4804     """
4805     inst = self.instance
4806     old_name = inst.name
4807
4808     if inst.disk_template == constants.DT_FILE:
4809       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4810
4811     self.cfg.RenameInstance(inst.name, self.op.new_name)
4812     # Change the instance lock. This is definitely safe while we hold the BGL
4813     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4814     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4815
4816     # re-read the instance from the configuration after rename
4817     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4818
4819     if inst.disk_template == constants.DT_FILE:
4820       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4821       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4822                                                      old_file_storage_dir,
4823                                                      new_file_storage_dir)
4824       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4825                    " (but the instance has been renamed in Ganeti)" %
4826                    (inst.primary_node, old_file_storage_dir,
4827                     new_file_storage_dir))
4828
4829     _StartInstanceDisks(self, inst, None)
4830     try:
4831       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4832                                                  old_name, self.op.debug_level)
4833       msg = result.fail_msg
4834       if msg:
4835         msg = ("Could not run OS rename script for instance %s on node %s"
4836                " (but the instance has been renamed in Ganeti): %s" %
4837                (inst.name, inst.primary_node, msg))
4838         self.proc.LogWarning(msg)
4839     finally:
4840       _ShutdownInstanceDisks(self, inst)
4841
4842
4843 class LURemoveInstance(LogicalUnit):
4844   """Remove an instance.
4845
4846   """
4847   HPATH = "instance-remove"
4848   HTYPE = constants.HTYPE_INSTANCE
4849   _OP_REQP = [
4850     ("instance_name", _TNonEmptyString),
4851     ("ignore_failures", _TBool),
4852     ]
4853   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4854   REQ_BGL = False
4855
4856   def ExpandNames(self):
4857     self._ExpandAndLockInstance()
4858     self.needed_locks[locking.LEVEL_NODE] = []
4859     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4860
4861   def DeclareLocks(self, level):
4862     if level == locking.LEVEL_NODE:
4863       self._LockInstancesNodes()
4864
4865   def BuildHooksEnv(self):
4866     """Build hooks env.
4867
4868     This runs on master, primary and secondary nodes of the instance.
4869
4870     """
4871     env = _BuildInstanceHookEnvByObject(self, self.instance)
4872     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4873     nl = [self.cfg.GetMasterNode()]
4874     nl_post = list(self.instance.all_nodes) + nl
4875     return env, nl, nl_post
4876
4877   def CheckPrereq(self):
4878     """Check prerequisites.
4879
4880     This checks that the instance is in the cluster.
4881
4882     """
4883     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4884     assert self.instance is not None, \
4885       "Cannot retrieve locked instance %s" % self.op.instance_name
4886
4887   def Exec(self, feedback_fn):
4888     """Remove the instance.
4889
4890     """
4891     instance = self.instance
4892     logging.info("Shutting down instance %s on node %s",
4893                  instance.name, instance.primary_node)
4894
4895     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4896                                              self.op.shutdown_timeout)
4897     msg = result.fail_msg
4898     if msg:
4899       if self.op.ignore_failures:
4900         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4901       else:
4902         raise errors.OpExecError("Could not shutdown instance %s on"
4903                                  " node %s: %s" %
4904                                  (instance.name, instance.primary_node, msg))
4905
4906     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4907
4908
4909 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4910   """Utility function to remove an instance.
4911
4912   """
4913   logging.info("Removing block devices for instance %s", instance.name)
4914
4915   if not _RemoveDisks(lu, instance):
4916     if not ignore_failures:
4917       raise errors.OpExecError("Can't remove instance's disks")
4918     feedback_fn("Warning: can't remove instance's disks")
4919
4920   logging.info("Removing instance %s out of cluster config", instance.name)
4921
4922   lu.cfg.RemoveInstance(instance.name)
4923
4924   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4925     "Instance lock removal conflict"
4926
4927   # Remove lock for the instance
4928   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4929
4930
4931 class LUQueryInstances(NoHooksLU):
4932   """Logical unit for querying instances.
4933
4934   """
4935   # pylint: disable-msg=W0142
4936   _OP_REQP = [
4937     ("output_fields", _TListOf(_TNonEmptyString)),
4938     ("names", _TListOf(_TNonEmptyString)),
4939     ("use_locking", _TBool),
4940     ]
4941   REQ_BGL = False
4942   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4943                     "serial_no", "ctime", "mtime", "uuid"]
4944   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4945                                     "admin_state",
4946                                     "disk_template", "ip", "mac", "bridge",
4947                                     "nic_mode", "nic_link",
4948                                     "sda_size", "sdb_size", "vcpus", "tags",
4949                                     "network_port", "beparams",
4950                                     r"(disk)\.(size)/([0-9]+)",
4951                                     r"(disk)\.(sizes)", "disk_usage",
4952                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4953                                     r"(nic)\.(bridge)/([0-9]+)",
4954                                     r"(nic)\.(macs|ips|modes|links|bridges)",
4955                                     r"(disk|nic)\.(count)",
4956                                     "hvparams",
4957                                     ] + _SIMPLE_FIELDS +
4958                                   ["hv/%s" % name
4959                                    for name in constants.HVS_PARAMETERS
4960                                    if name not in constants.HVC_GLOBALS] +
4961                                   ["be/%s" % name
4962                                    for name in constants.BES_PARAMETERS])
4963   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4964
4965
4966   def CheckArguments(self):
4967     _CheckOutputFields(static=self._FIELDS_STATIC,
4968                        dynamic=self._FIELDS_DYNAMIC,
4969                        selected=self.op.output_fields)
4970
4971   def ExpandNames(self):
4972     self.needed_locks = {}
4973     self.share_locks[locking.LEVEL_INSTANCE] = 1
4974     self.share_locks[locking.LEVEL_NODE] = 1
4975
4976     if self.op.names:
4977       self.wanted = _GetWantedInstances(self, self.op.names)
4978     else:
4979       self.wanted = locking.ALL_SET
4980
4981     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4982     self.do_locking = self.do_node_query and self.op.use_locking
4983     if self.do_locking:
4984       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4985       self.needed_locks[locking.LEVEL_NODE] = []
4986       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4987
4988   def DeclareLocks(self, level):
4989     if level == locking.LEVEL_NODE and self.do_locking:
4990       self._LockInstancesNodes()
4991
4992   def Exec(self, feedback_fn):
4993     """Computes the list of nodes and their attributes.
4994
4995     """
4996     # pylint: disable-msg=R0912
4997     # way too many branches here
4998     all_info = self.cfg.GetAllInstancesInfo()
4999     if self.wanted == locking.ALL_SET:
5000       # caller didn't specify instance names, so ordering is not important
5001       if self.do_locking:
5002         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5003       else:
5004         instance_names = all_info.keys()
5005       instance_names = utils.NiceSort(instance_names)
5006     else:
5007       # caller did specify names, so we must keep the ordering
5008       if self.do_locking:
5009         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5010       else:
5011         tgt_set = all_info.keys()
5012       missing = set(self.wanted).difference(tgt_set)
5013       if missing:
5014         raise errors.OpExecError("Some instances were removed before"
5015                                  " retrieving their data: %s" % missing)
5016       instance_names = self.wanted
5017
5018     instance_list = [all_info[iname] for iname in instance_names]
5019
5020     # begin data gathering
5021
5022     nodes = frozenset([inst.primary_node for inst in instance_list])
5023     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5024
5025     bad_nodes = []
5026     off_nodes = []
5027     if self.do_node_query:
5028       live_data = {}
5029       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5030       for name in nodes:
5031         result = node_data[name]
5032         if result.offline:
5033           # offline nodes will be in both lists
5034           off_nodes.append(name)
5035         if result.fail_msg:
5036           bad_nodes.append(name)
5037         else:
5038           if result.payload:
5039             live_data.update(result.payload)
5040           # else no instance is alive
5041     else:
5042       live_data = dict([(name, {}) for name in instance_names])
5043
5044     # end data gathering
5045
5046     HVPREFIX = "hv/"
5047     BEPREFIX = "be/"
5048     output = []
5049     cluster = self.cfg.GetClusterInfo()
5050     for instance in instance_list:
5051       iout = []
5052       i_hv = cluster.FillHV(instance, skip_globals=True)
5053       i_be = cluster.FillBE(instance)
5054       i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5055       for field in self.op.output_fields:
5056         st_match = self._FIELDS_STATIC.Matches(field)
5057         if field in self._SIMPLE_FIELDS:
5058           val = getattr(instance, field)
5059         elif field == "pnode":
5060           val = instance.primary_node
5061         elif field == "snodes":
5062           val = list(instance.secondary_nodes)
5063         elif field == "admin_state":
5064           val = instance.admin_up
5065         elif field == "oper_state":
5066           if instance.primary_node in bad_nodes:
5067             val = None
5068           else:
5069             val = bool(live_data.get(instance.name))
5070         elif field == "status":
5071           if instance.primary_node in off_nodes:
5072             val = "ERROR_nodeoffline"
5073           elif instance.primary_node in bad_nodes:
5074             val = "ERROR_nodedown"
5075           else:
5076             running = bool(live_data.get(instance.name))
5077             if running:
5078               if instance.admin_up:
5079                 val = "running"
5080               else:
5081                 val = "ERROR_up"
5082             else:
5083               if instance.admin_up:
5084                 val = "ERROR_down"
5085               else:
5086                 val = "ADMIN_down"
5087         elif field == "oper_ram":
5088           if instance.primary_node in bad_nodes:
5089             val = None
5090           elif instance.name in live_data:
5091             val = live_data[instance.name].get("memory", "?")
5092           else:
5093             val = "-"
5094         elif field == "vcpus":
5095           val = i_be[constants.BE_VCPUS]
5096         elif field == "disk_template":
5097           val = instance.disk_template
5098         elif field == "ip":
5099           if instance.nics:
5100             val = instance.nics[0].ip
5101           else:
5102             val = None
5103         elif field == "nic_mode":
5104           if instance.nics:
5105             val = i_nicp[0][constants.NIC_MODE]
5106           else:
5107             val = None
5108         elif field == "nic_link":
5109           if instance.nics:
5110             val = i_nicp[0][constants.NIC_LINK]
5111           else:
5112             val = None
5113         elif field == "bridge":
5114           if (instance.nics and
5115               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5116             val = i_nicp[0][constants.NIC_LINK]
5117           else:
5118             val = None
5119         elif field == "mac":
5120           if instance.nics:
5121             val = instance.nics[0].mac
5122           else:
5123             val = None
5124         elif field == "sda_size" or field == "sdb_size":
5125           idx = ord(field[2]) - ord('a')
5126           try:
5127             val = instance.FindDisk(idx).size
5128           except errors.OpPrereqError:
5129             val = None
5130         elif field == "disk_usage": # total disk usage per node
5131           disk_sizes = [{'size': disk.size} for disk in instance.disks]
5132           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5133         elif field == "tags":
5134           val = list(instance.GetTags())
5135         elif field == "hvparams":
5136           val = i_hv
5137         elif (field.startswith(HVPREFIX) and
5138               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5139               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5140           val = i_hv.get(field[len(HVPREFIX):], None)
5141         elif field == "beparams":
5142           val = i_be
5143         elif (field.startswith(BEPREFIX) and
5144               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5145           val = i_be.get(field[len(BEPREFIX):], None)
5146         elif st_match and st_match.groups():
5147           # matches a variable list
5148           st_groups = st_match.groups()
5149           if st_groups and st_groups[0] == "disk":
5150             if st_groups[1] == "count":
5151               val = len(instance.disks)
5152             elif st_groups[1] == "sizes":
5153               val = [disk.size for disk in instance.disks]
5154             elif st_groups[1] == "size":
5155               try:
5156                 val = instance.FindDisk(st_groups[2]).size
5157               except errors.OpPrereqError:
5158                 val = None
5159             else:
5160               assert False, "Unhandled disk parameter"
5161           elif st_groups[0] == "nic":
5162             if st_groups[1] == "count":
5163               val = len(instance.nics)
5164             elif st_groups[1] == "macs":
5165               val = [nic.mac for nic in instance.nics]
5166             elif st_groups[1] == "ips":
5167               val = [nic.ip for nic in instance.nics]
5168             elif st_groups[1] == "modes":
5169               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5170             elif st_groups[1] == "links":
5171               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5172             elif st_groups[1] == "bridges":
5173               val = []
5174               for nicp in i_nicp:
5175                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5176                   val.append(nicp[constants.NIC_LINK])
5177                 else:
5178                   val.append(None)
5179             else:
5180               # index-based item
5181               nic_idx = int(st_groups[2])
5182               if nic_idx >= len(instance.nics):
5183                 val = None
5184               else:
5185                 if st_groups[1] == "mac":
5186                   val = instance.nics[nic_idx].mac
5187                 elif st_groups[1] == "ip":
5188                   val = instance.nics[nic_idx].ip
5189                 elif st_groups[1] == "mode":
5190                   val = i_nicp[nic_idx][constants.NIC_MODE]
5191                 elif st_groups[1] == "link":
5192                   val = i_nicp[nic_idx][constants.NIC_LINK]
5193                 elif st_groups[1] == "bridge":
5194                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5195                   if nic_mode == constants.NIC_MODE_BRIDGED:
5196                     val = i_nicp[nic_idx][constants.NIC_LINK]
5197                   else:
5198                     val = None
5199                 else:
5200                   assert False, "Unhandled NIC parameter"
5201           else:
5202             assert False, ("Declared but unhandled variable parameter '%s'" %
5203                            field)
5204         else:
5205           assert False, "Declared but unhandled parameter '%s'" % field
5206         iout.append(val)
5207       output.append(iout)
5208
5209     return output
5210
5211
5212 class LUFailoverInstance(LogicalUnit):
5213   """Failover an instance.
5214
5215   """
5216   HPATH = "instance-failover"
5217   HTYPE = constants.HTYPE_INSTANCE
5218   _OP_REQP = [
5219     ("instance_name", _TNonEmptyString),
5220     ("ignore_consistency", _TBool),
5221     ]
5222   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5223   REQ_BGL = False
5224
5225   def ExpandNames(self):
5226     self._ExpandAndLockInstance()
5227     self.needed_locks[locking.LEVEL_NODE] = []
5228     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5229
5230   def DeclareLocks(self, level):
5231     if level == locking.LEVEL_NODE:
5232       self._LockInstancesNodes()
5233
5234   def BuildHooksEnv(self):
5235     """Build hooks env.
5236
5237     This runs on master, primary and secondary nodes of the instance.
5238
5239     """
5240     instance = self.instance
5241     source_node = instance.primary_node
5242     target_node = instance.secondary_nodes[0]
5243     env = {
5244       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5245       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5246       "OLD_PRIMARY": source_node,
5247       "OLD_SECONDARY": target_node,
5248       "NEW_PRIMARY": target_node,
5249       "NEW_SECONDARY": source_node,
5250       }
5251     env.update(_BuildInstanceHookEnvByObject(self, instance))
5252     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5253     nl_post = list(nl)
5254     nl_post.append(source_node)
5255     return env, nl, nl_post
5256
5257   def CheckPrereq(self):
5258     """Check prerequisites.
5259
5260     This checks that the instance is in the cluster.
5261
5262     """
5263     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5264     assert self.instance is not None, \
5265       "Cannot retrieve locked instance %s" % self.op.instance_name
5266
5267     bep = self.cfg.GetClusterInfo().FillBE(instance)
5268     if instance.disk_template not in constants.DTS_NET_MIRROR:
5269       raise errors.OpPrereqError("Instance's disk layout is not"
5270                                  " network mirrored, cannot failover.",
5271                                  errors.ECODE_STATE)
5272
5273     secondary_nodes = instance.secondary_nodes
5274     if not secondary_nodes:
5275       raise errors.ProgrammerError("no secondary node but using "
5276                                    "a mirrored disk template")
5277
5278     target_node = secondary_nodes[0]
5279     _CheckNodeOnline(self, target_node)
5280     _CheckNodeNotDrained(self, target_node)
5281     if instance.admin_up:
5282       # check memory requirements on the secondary node
5283       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5284                            instance.name, bep[constants.BE_MEMORY],
5285                            instance.hypervisor)
5286     else:
5287       self.LogInfo("Not checking memory on the secondary node as"
5288                    " instance will not be started")
5289
5290     # check bridge existance
5291     _CheckInstanceBridgesExist(self, instance, node=target_node)
5292
5293   def Exec(self, feedback_fn):
5294     """Failover an instance.
5295
5296     The failover is done by shutting it down on its present node and
5297     starting it on the secondary.
5298
5299     """
5300     instance = self.instance
5301
5302     source_node = instance.primary_node
5303     target_node = instance.secondary_nodes[0]
5304
5305     if instance.admin_up:
5306       feedback_fn("* checking disk consistency between source and target")
5307       for dev in instance.disks:
5308         # for drbd, these are drbd over lvm
5309         if not _CheckDiskConsistency(self, dev, target_node, False):
5310           if not self.op.ignore_consistency:
5311             raise errors.OpExecError("Disk %s is degraded on target node,"
5312                                      " aborting failover." % dev.iv_name)
5313     else:
5314       feedback_fn("* not checking disk consistency as instance is not running")
5315
5316     feedback_fn("* shutting down instance on source node")
5317     logging.info("Shutting down instance %s on node %s",
5318                  instance.name, source_node)
5319
5320     result = self.rpc.call_instance_shutdown(source_node, instance,
5321                                              self.op.shutdown_timeout)
5322     msg = result.fail_msg
5323     if msg:
5324       if self.op.ignore_consistency:
5325         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5326                              " Proceeding anyway. Please make sure node"
5327                              " %s is down. Error details: %s",
5328                              instance.name, source_node, source_node, msg)
5329       else:
5330         raise errors.OpExecError("Could not shutdown instance %s on"
5331                                  " node %s: %s" %
5332                                  (instance.name, source_node, msg))
5333
5334     feedback_fn("* deactivating the instance's disks on source node")
5335     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5336       raise errors.OpExecError("Can't shut down the instance's disks.")
5337
5338     instance.primary_node = target_node
5339     # distribute new instance config to the other nodes
5340     self.cfg.Update(instance, feedback_fn)
5341
5342     # Only start the instance if it's marked as up
5343     if instance.admin_up:
5344       feedback_fn("* activating the instance's disks on target node")
5345       logging.info("Starting instance %s on node %s",
5346                    instance.name, target_node)
5347
5348       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5349                                            ignore_secondaries=True)
5350       if not disks_ok:
5351         _ShutdownInstanceDisks(self, instance)
5352         raise errors.OpExecError("Can't activate the instance's disks")
5353
5354       feedback_fn("* starting the instance on the target node")
5355       result = self.rpc.call_instance_start(target_node, instance, None, None)
5356       msg = result.fail_msg
5357       if msg:
5358         _ShutdownInstanceDisks(self, instance)
5359         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5360                                  (instance.name, target_node, msg))
5361
5362
5363 class LUMigrateInstance(LogicalUnit):
5364   """Migrate an instance.
5365
5366   This is migration without shutting down, compared to the failover,
5367   which is done with shutdown.
5368
5369   """
5370   HPATH = "instance-migrate"
5371   HTYPE = constants.HTYPE_INSTANCE
5372   _OP_REQP = [
5373     ("instance_name", _TNonEmptyString),
5374     ("live", _TBool),
5375     ("cleanup", _TBool),
5376     ]
5377
5378   REQ_BGL = False
5379
5380   def ExpandNames(self):
5381     self._ExpandAndLockInstance()
5382
5383     self.needed_locks[locking.LEVEL_NODE] = []
5384     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5385
5386     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5387                                        self.op.live, self.op.cleanup)
5388     self.tasklets = [self._migrater]
5389
5390   def DeclareLocks(self, level):
5391     if level == locking.LEVEL_NODE:
5392       self._LockInstancesNodes()
5393
5394   def BuildHooksEnv(self):
5395     """Build hooks env.
5396
5397     This runs on master, primary and secondary nodes of the instance.
5398
5399     """
5400     instance = self._migrater.instance
5401     source_node = instance.primary_node
5402     target_node = instance.secondary_nodes[0]
5403     env = _BuildInstanceHookEnvByObject(self, instance)
5404     env["MIGRATE_LIVE"] = self.op.live
5405     env["MIGRATE_CLEANUP"] = self.op.cleanup
5406     env.update({
5407         "OLD_PRIMARY": source_node,
5408         "OLD_SECONDARY": target_node,
5409         "NEW_PRIMARY": target_node,
5410         "NEW_SECONDARY": source_node,
5411         })
5412     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5413     nl_post = list(nl)
5414     nl_post.append(source_node)
5415     return env, nl, nl_post
5416
5417
5418 class LUMoveInstance(LogicalUnit):
5419   """Move an instance by data-copying.
5420
5421   """
5422   HPATH = "instance-move"
5423   HTYPE = constants.HTYPE_INSTANCE
5424   _OP_REQP = [
5425     ("instance_name", _TNonEmptyString),
5426     ("target_node", _TNonEmptyString),
5427     ]
5428   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5429   REQ_BGL = False
5430
5431   def ExpandNames(self):
5432     self._ExpandAndLockInstance()
5433     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5434     self.op.target_node = target_node
5435     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5436     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5437
5438   def DeclareLocks(self, level):
5439     if level == locking.LEVEL_NODE:
5440       self._LockInstancesNodes(primary_only=True)
5441
5442   def BuildHooksEnv(self):
5443     """Build hooks env.
5444
5445     This runs on master, primary and secondary nodes of the instance.
5446
5447     """
5448     env = {
5449       "TARGET_NODE": self.op.target_node,
5450       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5451       }
5452     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5453     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5454                                        self.op.target_node]
5455     return env, nl, nl
5456
5457   def CheckPrereq(self):
5458     """Check prerequisites.
5459
5460     This checks that the instance is in the cluster.
5461
5462     """
5463     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5464     assert self.instance is not None, \
5465       "Cannot retrieve locked instance %s" % self.op.instance_name
5466
5467     node = self.cfg.GetNodeInfo(self.op.target_node)
5468     assert node is not None, \
5469       "Cannot retrieve locked node %s" % self.op.target_node
5470
5471     self.target_node = target_node = node.name
5472
5473     if target_node == instance.primary_node:
5474       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5475                                  (instance.name, target_node),
5476                                  errors.ECODE_STATE)
5477
5478     bep = self.cfg.GetClusterInfo().FillBE(instance)
5479
5480     for idx, dsk in enumerate(instance.disks):
5481       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5482         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5483                                    " cannot copy" % idx, errors.ECODE_STATE)
5484
5485     _CheckNodeOnline(self, target_node)
5486     _CheckNodeNotDrained(self, target_node)
5487
5488     if instance.admin_up:
5489       # check memory requirements on the secondary node
5490       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5491                            instance.name, bep[constants.BE_MEMORY],
5492                            instance.hypervisor)
5493     else:
5494       self.LogInfo("Not checking memory on the secondary node as"
5495                    " instance will not be started")
5496
5497     # check bridge existance
5498     _CheckInstanceBridgesExist(self, instance, node=target_node)
5499
5500   def Exec(self, feedback_fn):
5501     """Move an instance.
5502
5503     The move is done by shutting it down on its present node, copying
5504     the data over (slow) and starting it on the new node.
5505
5506     """
5507     instance = self.instance
5508
5509     source_node = instance.primary_node
5510     target_node = self.target_node
5511
5512     self.LogInfo("Shutting down instance %s on source node %s",
5513                  instance.name, source_node)
5514
5515     result = self.rpc.call_instance_shutdown(source_node, instance,
5516                                              self.op.shutdown_timeout)
5517     msg = result.fail_msg
5518     if msg:
5519       if self.op.ignore_consistency:
5520         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5521                              " Proceeding anyway. Please make sure node"
5522                              " %s is down. Error details: %s",
5523                              instance.name, source_node, source_node, msg)
5524       else:
5525         raise errors.OpExecError("Could not shutdown instance %s on"
5526                                  " node %s: %s" %
5527                                  (instance.name, source_node, msg))
5528
5529     # create the target disks
5530     try:
5531       _CreateDisks(self, instance, target_node=target_node)
5532     except errors.OpExecError:
5533       self.LogWarning("Device creation failed, reverting...")
5534       try:
5535         _RemoveDisks(self, instance, target_node=target_node)
5536       finally:
5537         self.cfg.ReleaseDRBDMinors(instance.name)
5538         raise
5539
5540     cluster_name = self.cfg.GetClusterInfo().cluster_name
5541
5542     errs = []
5543     # activate, get path, copy the data over
5544     for idx, disk in enumerate(instance.disks):
5545       self.LogInfo("Copying data for disk %d", idx)
5546       result = self.rpc.call_blockdev_assemble(target_node, disk,
5547                                                instance.name, True)
5548       if result.fail_msg:
5549         self.LogWarning("Can't assemble newly created disk %d: %s",
5550                         idx, result.fail_msg)
5551         errs.append(result.fail_msg)
5552         break
5553       dev_path = result.payload
5554       result = self.rpc.call_blockdev_export(source_node, disk,
5555                                              target_node, dev_path,
5556                                              cluster_name)
5557       if result.fail_msg:
5558         self.LogWarning("Can't copy data over for disk %d: %s",
5559                         idx, result.fail_msg)
5560         errs.append(result.fail_msg)
5561         break
5562
5563     if errs:
5564       self.LogWarning("Some disks failed to copy, aborting")
5565       try:
5566         _RemoveDisks(self, instance, target_node=target_node)
5567       finally:
5568         self.cfg.ReleaseDRBDMinors(instance.name)
5569         raise errors.OpExecError("Errors during disk copy: %s" %
5570                                  (",".join(errs),))
5571
5572     instance.primary_node = target_node
5573     self.cfg.Update(instance, feedback_fn)
5574
5575     self.LogInfo("Removing the disks on the original node")
5576     _RemoveDisks(self, instance, target_node=source_node)
5577
5578     # Only start the instance if it's marked as up
5579     if instance.admin_up:
5580       self.LogInfo("Starting instance %s on node %s",
5581                    instance.name, target_node)
5582
5583       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5584                                            ignore_secondaries=True)
5585       if not disks_ok:
5586         _ShutdownInstanceDisks(self, instance)
5587         raise errors.OpExecError("Can't activate the instance's disks")
5588
5589       result = self.rpc.call_instance_start(target_node, instance, None, None)
5590       msg = result.fail_msg
5591       if msg:
5592         _ShutdownInstanceDisks(self, instance)
5593         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5594                                  (instance.name, target_node, msg))
5595
5596
5597 class LUMigrateNode(LogicalUnit):
5598   """Migrate all instances from a node.
5599
5600   """
5601   HPATH = "node-migrate"
5602   HTYPE = constants.HTYPE_NODE
5603   _OP_REQP = [
5604     ("node_name", _TNonEmptyString),
5605     ("live", _TBool),
5606     ]
5607   REQ_BGL = False
5608
5609   def ExpandNames(self):
5610     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5611
5612     self.needed_locks = {
5613       locking.LEVEL_NODE: [self.op.node_name],
5614       }
5615
5616     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5617
5618     # Create tasklets for migrating instances for all instances on this node
5619     names = []
5620     tasklets = []
5621
5622     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5623       logging.debug("Migrating instance %s", inst.name)
5624       names.append(inst.name)
5625
5626       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5627
5628     self.tasklets = tasklets
5629
5630     # Declare instance locks
5631     self.needed_locks[locking.LEVEL_INSTANCE] = names
5632
5633   def DeclareLocks(self, level):
5634     if level == locking.LEVEL_NODE:
5635       self._LockInstancesNodes()
5636
5637   def BuildHooksEnv(self):
5638     """Build hooks env.
5639
5640     This runs on the master, the primary and all the secondaries.
5641
5642     """
5643     env = {
5644       "NODE_NAME": self.op.node_name,
5645       }
5646
5647     nl = [self.cfg.GetMasterNode()]
5648
5649     return (env, nl, nl)
5650
5651
5652 class TLMigrateInstance(Tasklet):
5653   def __init__(self, lu, instance_name, live, cleanup):
5654     """Initializes this class.
5655
5656     """
5657     Tasklet.__init__(self, lu)
5658
5659     # Parameters
5660     self.instance_name = instance_name
5661     self.live = live
5662     self.cleanup = cleanup
5663
5664   def CheckPrereq(self):
5665     """Check prerequisites.
5666
5667     This checks that the instance is in the cluster.
5668
5669     """
5670     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5671     instance = self.cfg.GetInstanceInfo(instance_name)
5672     assert instance is not None
5673
5674     if instance.disk_template != constants.DT_DRBD8:
5675       raise errors.OpPrereqError("Instance's disk layout is not"
5676                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5677
5678     secondary_nodes = instance.secondary_nodes
5679     if not secondary_nodes:
5680       raise errors.ConfigurationError("No secondary node but using"
5681                                       " drbd8 disk template")
5682
5683     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5684
5685     target_node = secondary_nodes[0]
5686     # check memory requirements on the secondary node
5687     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5688                          instance.name, i_be[constants.BE_MEMORY],
5689                          instance.hypervisor)
5690
5691     # check bridge existance
5692     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5693
5694     if not self.cleanup:
5695       _CheckNodeNotDrained(self.lu, target_node)
5696       result = self.rpc.call_instance_migratable(instance.primary_node,
5697                                                  instance)
5698       result.Raise("Can't migrate, please use failover",
5699                    prereq=True, ecode=errors.ECODE_STATE)
5700
5701     self.instance = instance
5702
5703   def _WaitUntilSync(self):
5704     """Poll with custom rpc for disk sync.
5705
5706     This uses our own step-based rpc call.
5707
5708     """
5709     self.feedback_fn("* wait until resync is done")
5710     all_done = False
5711     while not all_done:
5712       all_done = True
5713       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5714                                             self.nodes_ip,
5715                                             self.instance.disks)
5716       min_percent = 100
5717       for node, nres in result.items():
5718         nres.Raise("Cannot resync disks on node %s" % node)
5719         node_done, node_percent = nres.payload
5720         all_done = all_done and node_done
5721         if node_percent is not None:
5722           min_percent = min(min_percent, node_percent)
5723       if not all_done:
5724         if min_percent < 100:
5725           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5726         time.sleep(2)
5727
5728   def _EnsureSecondary(self, node):
5729     """Demote a node to secondary.
5730
5731     """
5732     self.feedback_fn("* switching node %s to secondary mode" % node)
5733
5734     for dev in self.instance.disks:
5735       self.cfg.SetDiskID(dev, node)
5736
5737     result = self.rpc.call_blockdev_close(node, self.instance.name,
5738                                           self.instance.disks)
5739     result.Raise("Cannot change disk to secondary on node %s" % node)
5740
5741   def _GoStandalone(self):
5742     """Disconnect from the network.
5743
5744     """
5745     self.feedback_fn("* changing into standalone mode")
5746     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5747                                                self.instance.disks)
5748     for node, nres in result.items():
5749       nres.Raise("Cannot disconnect disks node %s" % node)
5750
5751   def _GoReconnect(self, multimaster):
5752     """Reconnect to the network.
5753
5754     """
5755     if multimaster:
5756       msg = "dual-master"
5757     else:
5758       msg = "single-master"
5759     self.feedback_fn("* changing disks into %s mode" % msg)
5760     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5761                                            self.instance.disks,
5762                                            self.instance.name, multimaster)
5763     for node, nres in result.items():
5764       nres.Raise("Cannot change disks config on node %s" % node)
5765
5766   def _ExecCleanup(self):
5767     """Try to cleanup after a failed migration.
5768
5769     The cleanup is done by:
5770       - check that the instance is running only on one node
5771         (and update the config if needed)
5772       - change disks on its secondary node to secondary
5773       - wait until disks are fully synchronized
5774       - disconnect from the network
5775       - change disks into single-master mode
5776       - wait again until disks are fully synchronized
5777
5778     """
5779     instance = self.instance
5780     target_node = self.target_node
5781     source_node = self.source_node
5782
5783     # check running on only one node
5784     self.feedback_fn("* checking where the instance actually runs"
5785                      " (if this hangs, the hypervisor might be in"
5786                      " a bad state)")
5787     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5788     for node, result in ins_l.items():
5789       result.Raise("Can't contact node %s" % node)
5790
5791     runningon_source = instance.name in ins_l[source_node].payload
5792     runningon_target = instance.name in ins_l[target_node].payload
5793
5794     if runningon_source and runningon_target:
5795       raise errors.OpExecError("Instance seems to be running on two nodes,"
5796                                " or the hypervisor is confused. You will have"
5797                                " to ensure manually that it runs only on one"
5798                                " and restart this operation.")
5799
5800     if not (runningon_source or runningon_target):
5801       raise errors.OpExecError("Instance does not seem to be running at all."
5802                                " In this case, it's safer to repair by"
5803                                " running 'gnt-instance stop' to ensure disk"
5804                                " shutdown, and then restarting it.")
5805
5806     if runningon_target:
5807       # the migration has actually succeeded, we need to update the config
5808       self.feedback_fn("* instance running on secondary node (%s),"
5809                        " updating config" % target_node)
5810       instance.primary_node = target_node
5811       self.cfg.Update(instance, self.feedback_fn)
5812       demoted_node = source_node
5813     else:
5814       self.feedback_fn("* instance confirmed to be running on its"
5815                        " primary node (%s)" % source_node)
5816       demoted_node = target_node
5817
5818     self._EnsureSecondary(demoted_node)
5819     try:
5820       self._WaitUntilSync()
5821     except errors.OpExecError:
5822       # we ignore here errors, since if the device is standalone, it
5823       # won't be able to sync
5824       pass
5825     self._GoStandalone()
5826     self._GoReconnect(False)
5827     self._WaitUntilSync()
5828
5829     self.feedback_fn("* done")
5830
5831   def _RevertDiskStatus(self):
5832     """Try to revert the disk status after a failed migration.
5833
5834     """
5835     target_node = self.target_node
5836     try:
5837       self._EnsureSecondary(target_node)
5838       self._GoStandalone()
5839       self._GoReconnect(False)
5840       self._WaitUntilSync()
5841     except errors.OpExecError, err:
5842       self.lu.LogWarning("Migration failed and I can't reconnect the"
5843                          " drives: error '%s'\n"
5844                          "Please look and recover the instance status" %
5845                          str(err))
5846
5847   def _AbortMigration(self):
5848     """Call the hypervisor code to abort a started migration.
5849
5850     """
5851     instance = self.instance
5852     target_node = self.target_node
5853     migration_info = self.migration_info
5854
5855     abort_result = self.rpc.call_finalize_migration(target_node,
5856                                                     instance,
5857                                                     migration_info,
5858                                                     False)
5859     abort_msg = abort_result.fail_msg
5860     if abort_msg:
5861       logging.error("Aborting migration failed on target node %s: %s",
5862                     target_node, abort_msg)
5863       # Don't raise an exception here, as we stil have to try to revert the
5864       # disk status, even if this step failed.
5865
5866   def _ExecMigration(self):
5867     """Migrate an instance.
5868
5869     The migrate is done by:
5870       - change the disks into dual-master mode
5871       - wait until disks are fully synchronized again
5872       - migrate the instance
5873       - change disks on the new secondary node (the old primary) to secondary
5874       - wait until disks are fully synchronized
5875       - change disks into single-master mode
5876
5877     """
5878     instance = self.instance
5879     target_node = self.target_node
5880     source_node = self.source_node
5881
5882     self.feedback_fn("* checking disk consistency between source and target")
5883     for dev in instance.disks:
5884       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5885         raise errors.OpExecError("Disk %s is degraded or not fully"
5886                                  " synchronized on target node,"
5887                                  " aborting migrate." % dev.iv_name)
5888
5889     # First get the migration information from the remote node
5890     result = self.rpc.call_migration_info(source_node, instance)
5891     msg = result.fail_msg
5892     if msg:
5893       log_err = ("Failed fetching source migration information from %s: %s" %
5894                  (source_node, msg))
5895       logging.error(log_err)
5896       raise errors.OpExecError(log_err)
5897
5898     self.migration_info = migration_info = result.payload
5899
5900     # Then switch the disks to master/master mode
5901     self._EnsureSecondary(target_node)
5902     self._GoStandalone()
5903     self._GoReconnect(True)
5904     self._WaitUntilSync()
5905
5906     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5907     result = self.rpc.call_accept_instance(target_node,
5908                                            instance,
5909                                            migration_info,
5910                                            self.nodes_ip[target_node])
5911
5912     msg = result.fail_msg
5913     if msg:
5914       logging.error("Instance pre-migration failed, trying to revert"
5915                     " disk status: %s", msg)
5916       self.feedback_fn("Pre-migration failed, aborting")
5917       self._AbortMigration()
5918       self._RevertDiskStatus()
5919       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5920                                (instance.name, msg))
5921
5922     self.feedback_fn("* migrating instance to %s" % target_node)
5923     time.sleep(10)
5924     result = self.rpc.call_instance_migrate(source_node, instance,
5925                                             self.nodes_ip[target_node],
5926                                             self.live)
5927     msg = result.fail_msg
5928     if msg:
5929       logging.error("Instance migration failed, trying to revert"
5930                     " disk status: %s", msg)
5931       self.feedback_fn("Migration failed, aborting")
5932       self._AbortMigration()
5933       self._RevertDiskStatus()
5934       raise errors.OpExecError("Could not migrate instance %s: %s" %
5935                                (instance.name, msg))
5936     time.sleep(10)
5937
5938     instance.primary_node = target_node
5939     # distribute new instance config to the other nodes
5940     self.cfg.Update(instance, self.feedback_fn)
5941
5942     result = self.rpc.call_finalize_migration(target_node,
5943                                               instance,
5944                                               migration_info,
5945                                               True)
5946     msg = result.fail_msg
5947     if msg:
5948       logging.error("Instance migration succeeded, but finalization failed:"
5949                     " %s", msg)
5950       raise errors.OpExecError("Could not finalize instance migration: %s" %
5951                                msg)
5952
5953     self._EnsureSecondary(source_node)
5954     self._WaitUntilSync()
5955     self._GoStandalone()
5956     self._GoReconnect(False)
5957     self._WaitUntilSync()
5958
5959     self.feedback_fn("* done")
5960
5961   def Exec(self, feedback_fn):
5962     """Perform the migration.
5963
5964     """
5965     feedback_fn("Migrating instance %s" % self.instance.name)
5966
5967     self.feedback_fn = feedback_fn
5968
5969     self.source_node = self.instance.primary_node
5970     self.target_node = self.instance.secondary_nodes[0]
5971     self.all_nodes = [self.source_node, self.target_node]
5972     self.nodes_ip = {
5973       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5974       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5975       }
5976
5977     if self.cleanup:
5978       return self._ExecCleanup()
5979     else:
5980       return self._ExecMigration()
5981
5982
5983 def _CreateBlockDev(lu, node, instance, device, force_create,
5984                     info, force_open):
5985   """Create a tree of block devices on a given node.
5986
5987   If this device type has to be created on secondaries, create it and
5988   all its children.
5989
5990   If not, just recurse to children keeping the same 'force' value.
5991
5992   @param lu: the lu on whose behalf we execute
5993   @param node: the node on which to create the device
5994   @type instance: L{objects.Instance}
5995   @param instance: the instance which owns the device
5996   @type device: L{objects.Disk}
5997   @param device: the device to create
5998   @type force_create: boolean
5999   @param force_create: whether to force creation of this device; this
6000       will be change to True whenever we find a device which has
6001       CreateOnSecondary() attribute
6002   @param info: the extra 'metadata' we should attach to the device
6003       (this will be represented as a LVM tag)
6004   @type force_open: boolean
6005   @param force_open: this parameter will be passes to the
6006       L{backend.BlockdevCreate} function where it specifies
6007       whether we run on primary or not, and it affects both
6008       the child assembly and the device own Open() execution
6009
6010   """
6011   if device.CreateOnSecondary():
6012     force_create = True
6013
6014   if device.children:
6015     for child in device.children:
6016       _CreateBlockDev(lu, node, instance, child, force_create,
6017                       info, force_open)
6018
6019   if not force_create:
6020     return
6021
6022   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6023
6024
6025 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6026   """Create a single block device on a given node.
6027
6028   This will not recurse over children of the device, so they must be
6029   created in advance.
6030
6031   @param lu: the lu on whose behalf we execute
6032   @param node: the node on which to create the device
6033   @type instance: L{objects.Instance}
6034   @param instance: the instance which owns the device
6035   @type device: L{objects.Disk}
6036   @param device: the device to create
6037   @param info: the extra 'metadata' we should attach to the device
6038       (this will be represented as a LVM tag)
6039   @type force_open: boolean
6040   @param force_open: this parameter will be passes to the
6041       L{backend.BlockdevCreate} function where it specifies
6042       whether we run on primary or not, and it affects both
6043       the child assembly and the device own Open() execution
6044
6045   """
6046   lu.cfg.SetDiskID(device, node)
6047   result = lu.rpc.call_blockdev_create(node, device, device.size,
6048                                        instance.name, force_open, info)
6049   result.Raise("Can't create block device %s on"
6050                " node %s for instance %s" % (device, node, instance.name))
6051   if device.physical_id is None:
6052     device.physical_id = result.payload
6053
6054
6055 def _GenerateUniqueNames(lu, exts):
6056   """Generate a suitable LV name.
6057
6058   This will generate a logical volume name for the given instance.
6059
6060   """
6061   results = []
6062   for val in exts:
6063     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6064     results.append("%s%s" % (new_id, val))
6065   return results
6066
6067
6068 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6069                          p_minor, s_minor):
6070   """Generate a drbd8 device complete with its children.
6071
6072   """
6073   port = lu.cfg.AllocatePort()
6074   vgname = lu.cfg.GetVGName()
6075   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6076   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6077                           logical_id=(vgname, names[0]))
6078   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6079                           logical_id=(vgname, names[1]))
6080   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6081                           logical_id=(primary, secondary, port,
6082                                       p_minor, s_minor,
6083                                       shared_secret),
6084                           children=[dev_data, dev_meta],
6085                           iv_name=iv_name)
6086   return drbd_dev
6087
6088
6089 def _GenerateDiskTemplate(lu, template_name,
6090                           instance_name, primary_node,
6091                           secondary_nodes, disk_info,
6092                           file_storage_dir, file_driver,
6093                           base_index):
6094   """Generate the entire disk layout for a given template type.
6095
6096   """
6097   #TODO: compute space requirements
6098
6099   vgname = lu.cfg.GetVGName()
6100   disk_count = len(disk_info)
6101   disks = []
6102   if template_name == constants.DT_DISKLESS:
6103     pass
6104   elif template_name == constants.DT_PLAIN:
6105     if len(secondary_nodes) != 0:
6106       raise errors.ProgrammerError("Wrong template configuration")
6107
6108     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6109                                       for i in range(disk_count)])
6110     for idx, disk in enumerate(disk_info):
6111       disk_index = idx + base_index
6112       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6113                               logical_id=(vgname, names[idx]),
6114                               iv_name="disk/%d" % disk_index,
6115                               mode=disk["mode"])
6116       disks.append(disk_dev)
6117   elif template_name == constants.DT_DRBD8:
6118     if len(secondary_nodes) != 1:
6119       raise errors.ProgrammerError("Wrong template configuration")
6120     remote_node = secondary_nodes[0]
6121     minors = lu.cfg.AllocateDRBDMinor(
6122       [primary_node, remote_node] * len(disk_info), instance_name)
6123
6124     names = []
6125     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6126                                                for i in range(disk_count)]):
6127       names.append(lv_prefix + "_data")
6128       names.append(lv_prefix + "_meta")
6129     for idx, disk in enumerate(disk_info):
6130       disk_index = idx + base_index
6131       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6132                                       disk["size"], names[idx*2:idx*2+2],
6133                                       "disk/%d" % disk_index,
6134                                       minors[idx*2], minors[idx*2+1])
6135       disk_dev.mode = disk["mode"]
6136       disks.append(disk_dev)
6137   elif template_name == constants.DT_FILE:
6138     if len(secondary_nodes) != 0:
6139       raise errors.ProgrammerError("Wrong template configuration")
6140
6141     _RequireFileStorage()
6142
6143     for idx, disk in enumerate(disk_info):
6144       disk_index = idx + base_index
6145       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6146                               iv_name="disk/%d" % disk_index,
6147                               logical_id=(file_driver,
6148                                           "%s/disk%d" % (file_storage_dir,
6149                                                          disk_index)),
6150                               mode=disk["mode"])
6151       disks.append(disk_dev)
6152   else:
6153     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6154   return disks
6155
6156
6157 def _GetInstanceInfoText(instance):
6158   """Compute that text that should be added to the disk's metadata.
6159
6160   """
6161   return "originstname+%s" % instance.name
6162
6163
6164 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6165   """Create all disks for an instance.
6166
6167   This abstracts away some work from AddInstance.
6168
6169   @type lu: L{LogicalUnit}
6170   @param lu: the logical unit on whose behalf we execute
6171   @type instance: L{objects.Instance}
6172   @param instance: the instance whose disks we should create
6173   @type to_skip: list
6174   @param to_skip: list of indices to skip
6175   @type target_node: string
6176   @param target_node: if passed, overrides the target node for creation
6177   @rtype: boolean
6178   @return: the success of the creation
6179
6180   """
6181   info = _GetInstanceInfoText(instance)
6182   if target_node is None:
6183     pnode = instance.primary_node
6184     all_nodes = instance.all_nodes
6185   else:
6186     pnode = target_node
6187     all_nodes = [pnode]
6188
6189   if instance.disk_template == constants.DT_FILE:
6190     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6191     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6192
6193     result.Raise("Failed to create directory '%s' on"
6194                  " node %s" % (file_storage_dir, pnode))
6195
6196   # Note: this needs to be kept in sync with adding of disks in
6197   # LUSetInstanceParams
6198   for idx, device in enumerate(instance.disks):
6199     if to_skip and idx in to_skip:
6200       continue
6201     logging.info("Creating volume %s for instance %s",
6202                  device.iv_name, instance.name)
6203     #HARDCODE
6204     for node in all_nodes:
6205       f_create = node == pnode
6206       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6207
6208
6209 def _RemoveDisks(lu, instance, target_node=None):
6210   """Remove all disks for an instance.
6211
6212   This abstracts away some work from `AddInstance()` and
6213   `RemoveInstance()`. Note that in case some of the devices couldn't
6214   be removed, the removal will continue with the other ones (compare
6215   with `_CreateDisks()`).
6216
6217   @type lu: L{LogicalUnit}
6218   @param lu: the logical unit on whose behalf we execute
6219   @type instance: L{objects.Instance}
6220   @param instance: the instance whose disks we should remove
6221   @type target_node: string
6222   @param target_node: used to override the node on which to remove the disks
6223   @rtype: boolean
6224   @return: the success of the removal
6225
6226   """
6227   logging.info("Removing block devices for instance %s", instance.name)
6228
6229   all_result = True
6230   for device in instance.disks:
6231     if target_node:
6232       edata = [(target_node, device)]
6233     else:
6234       edata = device.ComputeNodeTree(instance.primary_node)
6235     for node, disk in edata:
6236       lu.cfg.SetDiskID(disk, node)
6237       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6238       if msg:
6239         lu.LogWarning("Could not remove block device %s on node %s,"
6240                       " continuing anyway: %s", device.iv_name, node, msg)
6241         all_result = False
6242
6243   if instance.disk_template == constants.DT_FILE:
6244     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6245     if target_node:
6246       tgt = target_node
6247     else:
6248       tgt = instance.primary_node
6249     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6250     if result.fail_msg:
6251       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6252                     file_storage_dir, instance.primary_node, result.fail_msg)
6253       all_result = False
6254
6255   return all_result
6256
6257
6258 def _ComputeDiskSize(disk_template, disks):
6259   """Compute disk size requirements in the volume group
6260
6261   """
6262   # Required free disk space as a function of disk and swap space
6263   req_size_dict = {
6264     constants.DT_DISKLESS: None,
6265     constants.DT_PLAIN: sum(d["size"] for d in disks),
6266     # 128 MB are added for drbd metadata for each disk
6267     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6268     constants.DT_FILE: None,
6269   }
6270
6271   if disk_template not in req_size_dict:
6272     raise errors.ProgrammerError("Disk template '%s' size requirement"
6273                                  " is unknown" %  disk_template)
6274
6275   return req_size_dict[disk_template]
6276
6277
6278 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6279   """Hypervisor parameter validation.
6280
6281   This function abstract the hypervisor parameter validation to be
6282   used in both instance create and instance modify.
6283
6284   @type lu: L{LogicalUnit}
6285   @param lu: the logical unit for which we check
6286   @type nodenames: list
6287   @param nodenames: the list of nodes on which we should check
6288   @type hvname: string
6289   @param hvname: the name of the hypervisor we should use
6290   @type hvparams: dict
6291   @param hvparams: the parameters which we need to check
6292   @raise errors.OpPrereqError: if the parameters are not valid
6293
6294   """
6295   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6296                                                   hvname,
6297                                                   hvparams)
6298   for node in nodenames:
6299     info = hvinfo[node]
6300     if info.offline:
6301       continue
6302     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6303
6304
6305 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6306   """OS parameters validation.
6307
6308   @type lu: L{LogicalUnit}
6309   @param lu: the logical unit for which we check
6310   @type required: boolean
6311   @param required: whether the validation should fail if the OS is not
6312       found
6313   @type nodenames: list
6314   @param nodenames: the list of nodes on which we should check
6315   @type osname: string
6316   @param osname: the name of the hypervisor we should use
6317   @type osparams: dict
6318   @param osparams: the parameters which we need to check
6319   @raise errors.OpPrereqError: if the parameters are not valid
6320
6321   """
6322   result = lu.rpc.call_os_validate(required, nodenames, osname,
6323                                    [constants.OS_VALIDATE_PARAMETERS],
6324                                    osparams)
6325   for node, nres in result.items():
6326     # we don't check for offline cases since this should be run only
6327     # against the master node and/or an instance's nodes
6328     nres.Raise("OS Parameters validation failed on node %s" % node)
6329     if not nres.payload:
6330       lu.LogInfo("OS %s not found on node %s, validation skipped",
6331                  osname, node)
6332
6333
6334 class LUCreateInstance(LogicalUnit):
6335   """Create an instance.
6336
6337   """
6338   HPATH = "instance-add"
6339   HTYPE = constants.HTYPE_INSTANCE
6340   _OP_REQP = [
6341     ("instance_name", _TNonEmptyString),
6342     ("mode", _TElemOf(constants.INSTANCE_CREATE_MODES)),
6343     ("start", _TBool),
6344     ("wait_for_sync", _TBool),
6345     ("ip_check", _TBool),
6346     ("disks", _TListOf(_TDict)),
6347     ("nics", _TListOf(_TDict)),
6348     ("hvparams", _TDict),
6349     ("beparams", _TDict),
6350     ("osparams", _TDict),
6351     ]
6352   _OP_DEFS = [
6353     ("name_check", True),
6354     ("no_install", False),
6355     ("os_type", None),
6356     ("force_variant", False),
6357     ("source_handshake", None),
6358     ("source_x509_ca", None),
6359     ("source_instance_name", None),
6360     ("src_node", None),
6361     ("src_path", None),
6362     ("pnode", None),
6363     ("snode", None),
6364     ("iallocator", None),
6365     ("hypervisor", None),
6366     ("disk_template", None),
6367     ("identify_defaults", None),
6368     ]
6369   REQ_BGL = False
6370
6371   def CheckArguments(self):
6372     """Check arguments.
6373
6374     """
6375     # do not require name_check to ease forward/backward compatibility
6376     # for tools
6377     if self.op.no_install and self.op.start:
6378       self.LogInfo("No-installation mode selected, disabling startup")
6379       self.op.start = False
6380     # validate/normalize the instance name
6381     self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6382     if self.op.ip_check and not self.op.name_check:
6383       # TODO: make the ip check more flexible and not depend on the name check
6384       raise errors.OpPrereqError("Cannot do ip checks without a name check",
6385                                  errors.ECODE_INVAL)
6386
6387     # check nics' parameter names
6388     for nic in self.op.nics:
6389       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6390
6391     # check disks. parameter names and consistent adopt/no-adopt strategy
6392     has_adopt = has_no_adopt = False
6393     for disk in self.op.disks:
6394       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6395       if "adopt" in disk:
6396         has_adopt = True
6397       else:
6398         has_no_adopt = True
6399     if has_adopt and has_no_adopt:
6400       raise errors.OpPrereqError("Either all disks are adopted or none is",
6401                                  errors.ECODE_INVAL)
6402     if has_adopt:
6403       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6404         raise errors.OpPrereqError("Disk adoption is not supported for the"
6405                                    " '%s' disk template" %
6406                                    self.op.disk_template,
6407                                    errors.ECODE_INVAL)
6408       if self.op.iallocator is not None:
6409         raise errors.OpPrereqError("Disk adoption not allowed with an"
6410                                    " iallocator script", errors.ECODE_INVAL)
6411       if self.op.mode == constants.INSTANCE_IMPORT:
6412         raise errors.OpPrereqError("Disk adoption not allowed for"
6413                                    " instance import", errors.ECODE_INVAL)
6414
6415     self.adopt_disks = has_adopt
6416
6417     # instance name verification
6418     if self.op.name_check:
6419       self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6420       self.op.instance_name = self.hostname1.name
6421       # used in CheckPrereq for ip ping check
6422       self.check_ip = self.hostname1.ip
6423     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6424       raise errors.OpPrereqError("Remote imports require names to be checked" %
6425                                  errors.ECODE_INVAL)
6426     else:
6427       self.check_ip = None
6428
6429     # file storage checks
6430     if (self.op.file_driver and
6431         not self.op.file_driver in constants.FILE_DRIVER):
6432       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6433                                  self.op.file_driver, errors.ECODE_INVAL)
6434
6435     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6436       raise errors.OpPrereqError("File storage directory path not absolute",
6437                                  errors.ECODE_INVAL)
6438
6439     ### Node/iallocator related checks
6440     if [self.op.iallocator, self.op.pnode].count(None) != 1:
6441       raise errors.OpPrereqError("One and only one of iallocator and primary"
6442                                  " node must be given",
6443                                  errors.ECODE_INVAL)
6444
6445     self._cds = _GetClusterDomainSecret()
6446
6447     if self.op.mode == constants.INSTANCE_IMPORT:
6448       # On import force_variant must be True, because if we forced it at
6449       # initial install, our only chance when importing it back is that it
6450       # works again!
6451       self.op.force_variant = True
6452
6453       if self.op.no_install:
6454         self.LogInfo("No-installation mode has no effect during import")
6455
6456     elif self.op.mode == constants.INSTANCE_CREATE:
6457       if self.op.os_type is None:
6458         raise errors.OpPrereqError("No guest OS specified",
6459                                    errors.ECODE_INVAL)
6460       if self.op.disk_template is None:
6461         raise errors.OpPrereqError("No disk template specified",
6462                                    errors.ECODE_INVAL)
6463
6464     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6465       # Check handshake to ensure both clusters have the same domain secret
6466       src_handshake = self.op.source_handshake
6467       if not src_handshake:
6468         raise errors.OpPrereqError("Missing source handshake",
6469                                    errors.ECODE_INVAL)
6470
6471       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6472                                                            src_handshake)
6473       if errmsg:
6474         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6475                                    errors.ECODE_INVAL)
6476
6477       # Load and check source CA
6478       self.source_x509_ca_pem = self.op.source_x509_ca
6479       if not self.source_x509_ca_pem:
6480         raise errors.OpPrereqError("Missing source X509 CA",
6481                                    errors.ECODE_INVAL)
6482
6483       try:
6484         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6485                                                     self._cds)
6486       except OpenSSL.crypto.Error, err:
6487         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6488                                    (err, ), errors.ECODE_INVAL)
6489
6490       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6491       if errcode is not None:
6492         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6493                                    errors.ECODE_INVAL)
6494
6495       self.source_x509_ca = cert
6496
6497       src_instance_name = self.op.source_instance_name
6498       if not src_instance_name:
6499         raise errors.OpPrereqError("Missing source instance name",
6500                                    errors.ECODE_INVAL)
6501
6502       self.source_instance_name = \
6503         utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6504
6505     else:
6506       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6507                                  self.op.mode, errors.ECODE_INVAL)
6508
6509   def ExpandNames(self):
6510     """ExpandNames for CreateInstance.
6511
6512     Figure out the right locks for instance creation.
6513
6514     """
6515     self.needed_locks = {}
6516
6517     instance_name = self.op.instance_name
6518     # this is just a preventive check, but someone might still add this
6519     # instance in the meantime, and creation will fail at lock-add time
6520     if instance_name in self.cfg.GetInstanceList():
6521       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6522                                  instance_name, errors.ECODE_EXISTS)
6523
6524     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6525
6526     if self.op.iallocator:
6527       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6528     else:
6529       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6530       nodelist = [self.op.pnode]
6531       if self.op.snode is not None:
6532         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6533         nodelist.append(self.op.snode)
6534       self.needed_locks[locking.LEVEL_NODE] = nodelist
6535
6536     # in case of import lock the source node too
6537     if self.op.mode == constants.INSTANCE_IMPORT:
6538       src_node = self.op.src_node
6539       src_path = self.op.src_path
6540
6541       if src_path is None:
6542         self.op.src_path = src_path = self.op.instance_name
6543
6544       if src_node is None:
6545         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6546         self.op.src_node = None
6547         if os.path.isabs(src_path):
6548           raise errors.OpPrereqError("Importing an instance from an absolute"
6549                                      " path requires a source node option.",
6550                                      errors.ECODE_INVAL)
6551       else:
6552         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6553         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6554           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6555         if not os.path.isabs(src_path):
6556           self.op.src_path = src_path = \
6557             utils.PathJoin(constants.EXPORT_DIR, src_path)
6558
6559   def _RunAllocator(self):
6560     """Run the allocator based on input opcode.
6561
6562     """
6563     nics = [n.ToDict() for n in self.nics]
6564     ial = IAllocator(self.cfg, self.rpc,
6565                      mode=constants.IALLOCATOR_MODE_ALLOC,
6566                      name=self.op.instance_name,
6567                      disk_template=self.op.disk_template,
6568                      tags=[],
6569                      os=self.op.os_type,
6570                      vcpus=self.be_full[constants.BE_VCPUS],
6571                      mem_size=self.be_full[constants.BE_MEMORY],
6572                      disks=self.disks,
6573                      nics=nics,
6574                      hypervisor=self.op.hypervisor,
6575                      )
6576
6577     ial.Run(self.op.iallocator)
6578
6579     if not ial.success:
6580       raise errors.OpPrereqError("Can't compute nodes using"
6581                                  " iallocator '%s': %s" %
6582                                  (self.op.iallocator, ial.info),
6583                                  errors.ECODE_NORES)
6584     if len(ial.result) != ial.required_nodes:
6585       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6586                                  " of nodes (%s), required %s" %
6587                                  (self.op.iallocator, len(ial.result),
6588                                   ial.required_nodes), errors.ECODE_FAULT)
6589     self.op.pnode = ial.result[0]
6590     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6591                  self.op.instance_name, self.op.iallocator,
6592                  utils.CommaJoin(ial.result))
6593     if ial.required_nodes == 2:
6594       self.op.snode = ial.result[1]
6595
6596   def BuildHooksEnv(self):
6597     """Build hooks env.
6598
6599     This runs on master, primary and secondary nodes of the instance.
6600
6601     """
6602     env = {
6603       "ADD_MODE": self.op.mode,
6604       }
6605     if self.op.mode == constants.INSTANCE_IMPORT:
6606       env["SRC_NODE"] = self.op.src_node
6607       env["SRC_PATH"] = self.op.src_path
6608       env["SRC_IMAGES"] = self.src_images
6609
6610     env.update(_BuildInstanceHookEnv(
6611       name=self.op.instance_name,
6612       primary_node=self.op.pnode,
6613       secondary_nodes=self.secondaries,
6614       status=self.op.start,
6615       os_type=self.op.os_type,
6616       memory=self.be_full[constants.BE_MEMORY],
6617       vcpus=self.be_full[constants.BE_VCPUS],
6618       nics=_NICListToTuple(self, self.nics),
6619       disk_template=self.op.disk_template,
6620       disks=[(d["size"], d["mode"]) for d in self.disks],
6621       bep=self.be_full,
6622       hvp=self.hv_full,
6623       hypervisor_name=self.op.hypervisor,
6624     ))
6625
6626     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6627           self.secondaries)
6628     return env, nl, nl
6629
6630   def _ReadExportInfo(self):
6631     """Reads the export information from disk.
6632
6633     It will override the opcode source node and path with the actual
6634     information, if these two were not specified before.
6635
6636     @return: the export information
6637
6638     """
6639     assert self.op.mode == constants.INSTANCE_IMPORT
6640
6641     src_node = self.op.src_node
6642     src_path = self.op.src_path
6643
6644     if src_node is None:
6645       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6646       exp_list = self.rpc.call_export_list(locked_nodes)
6647       found = False
6648       for node in exp_list:
6649         if exp_list[node].fail_msg:
6650           continue
6651         if src_path in exp_list[node].payload:
6652           found = True
6653           self.op.src_node = src_node = node
6654           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6655                                                        src_path)
6656           break
6657       if not found:
6658         raise errors.OpPrereqError("No export found for relative path %s" %
6659                                     src_path, errors.ECODE_INVAL)
6660
6661     _CheckNodeOnline(self, src_node)
6662     result = self.rpc.call_export_info(src_node, src_path)
6663     result.Raise("No export or invalid export found in dir %s" % src_path)
6664
6665     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6666     if not export_info.has_section(constants.INISECT_EXP):
6667       raise errors.ProgrammerError("Corrupted export config",
6668                                    errors.ECODE_ENVIRON)
6669
6670     ei_version = export_info.get(constants.INISECT_EXP, "version")
6671     if (int(ei_version) != constants.EXPORT_VERSION):
6672       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6673                                  (ei_version, constants.EXPORT_VERSION),
6674                                  errors.ECODE_ENVIRON)
6675     return export_info
6676
6677   def _ReadExportParams(self, einfo):
6678     """Use export parameters as defaults.
6679
6680     In case the opcode doesn't specify (as in override) some instance
6681     parameters, then try to use them from the export information, if
6682     that declares them.
6683
6684     """
6685     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6686
6687     if self.op.disk_template is None:
6688       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6689         self.op.disk_template = einfo.get(constants.INISECT_INS,
6690                                           "disk_template")
6691       else:
6692         raise errors.OpPrereqError("No disk template specified and the export"
6693                                    " is missing the disk_template information",
6694                                    errors.ECODE_INVAL)
6695
6696     if not self.op.disks:
6697       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6698         disks = []
6699         # TODO: import the disk iv_name too
6700         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6701           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6702           disks.append({"size": disk_sz})
6703         self.op.disks = disks
6704       else:
6705         raise errors.OpPrereqError("No disk info specified and the export"
6706                                    " is missing the disk information",
6707                                    errors.ECODE_INVAL)
6708
6709     if (not self.op.nics and
6710         einfo.has_option(constants.INISECT_INS, "nic_count")):
6711       nics = []
6712       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6713         ndict = {}
6714         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6715           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6716           ndict[name] = v
6717         nics.append(ndict)
6718       self.op.nics = nics
6719
6720     if (self.op.hypervisor is None and
6721         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6722       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6723     if einfo.has_section(constants.INISECT_HYP):
6724       # use the export parameters but do not override the ones
6725       # specified by the user
6726       for name, value in einfo.items(constants.INISECT_HYP):
6727         if name not in self.op.hvparams:
6728           self.op.hvparams[name] = value
6729
6730     if einfo.has_section(constants.INISECT_BEP):
6731       # use the parameters, without overriding
6732       for name, value in einfo.items(constants.INISECT_BEP):
6733         if name not in self.op.beparams:
6734           self.op.beparams[name] = value
6735     else:
6736       # try to read the parameters old style, from the main section
6737       for name in constants.BES_PARAMETERS:
6738         if (name not in self.op.beparams and
6739             einfo.has_option(constants.INISECT_INS, name)):
6740           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6741
6742     if einfo.has_section(constants.INISECT_OSP):
6743       # use the parameters, without overriding
6744       for name, value in einfo.items(constants.INISECT_OSP):
6745         if name not in self.op.osparams:
6746           self.op.osparams[name] = value
6747
6748   def _RevertToDefaults(self, cluster):
6749     """Revert the instance parameters to the default values.
6750
6751     """
6752     # hvparams
6753     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6754     for name in self.op.hvparams.keys():
6755       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6756         del self.op.hvparams[name]
6757     # beparams
6758     be_defs = cluster.SimpleFillBE({})
6759     for name in self.op.beparams.keys():
6760       if name in be_defs and be_defs[name] == self.op.beparams[name]:
6761         del self.op.beparams[name]
6762     # nic params
6763     nic_defs = cluster.SimpleFillNIC({})
6764     for nic in self.op.nics:
6765       for name in constants.NICS_PARAMETERS:
6766         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6767           del nic[name]
6768     # osparams
6769     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6770     for name in self.op.osparams.keys():
6771       if name in os_defs and os_defs[name] == self.op.osparams[name]:
6772         del self.op.osparams[name]
6773
6774   def CheckPrereq(self):
6775     """Check prerequisites.
6776
6777     """
6778     if self.op.mode == constants.INSTANCE_IMPORT:
6779       export_info = self._ReadExportInfo()
6780       self._ReadExportParams(export_info)
6781
6782     _CheckDiskTemplate(self.op.disk_template)
6783
6784     if (not self.cfg.GetVGName() and
6785         self.op.disk_template not in constants.DTS_NOT_LVM):
6786       raise errors.OpPrereqError("Cluster does not support lvm-based"
6787                                  " instances", errors.ECODE_STATE)
6788
6789     if self.op.hypervisor is None:
6790       self.op.hypervisor = self.cfg.GetHypervisorType()
6791
6792     cluster = self.cfg.GetClusterInfo()
6793     enabled_hvs = cluster.enabled_hypervisors
6794     if self.op.hypervisor not in enabled_hvs:
6795       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6796                                  " cluster (%s)" % (self.op.hypervisor,
6797                                   ",".join(enabled_hvs)),
6798                                  errors.ECODE_STATE)
6799
6800     # check hypervisor parameter syntax (locally)
6801     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6802     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6803                                       self.op.hvparams)
6804     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6805     hv_type.CheckParameterSyntax(filled_hvp)
6806     self.hv_full = filled_hvp
6807     # check that we don't specify global parameters on an instance
6808     _CheckGlobalHvParams(self.op.hvparams)
6809
6810     # fill and remember the beparams dict
6811     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6812     self.be_full = cluster.SimpleFillBE(self.op.beparams)
6813
6814     # build os parameters
6815     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6816
6817     # now that hvp/bep are in final format, let's reset to defaults,
6818     # if told to do so
6819     if self.op.identify_defaults:
6820       self._RevertToDefaults(cluster)
6821
6822     # NIC buildup
6823     self.nics = []
6824     for idx, nic in enumerate(self.op.nics):
6825       nic_mode_req = nic.get("mode", None)
6826       nic_mode = nic_mode_req
6827       if nic_mode is None:
6828         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6829
6830       # in routed mode, for the first nic, the default ip is 'auto'
6831       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6832         default_ip_mode = constants.VALUE_AUTO
6833       else:
6834         default_ip_mode = constants.VALUE_NONE
6835
6836       # ip validity checks
6837       ip = nic.get("ip", default_ip_mode)
6838       if ip is None or ip.lower() == constants.VALUE_NONE:
6839         nic_ip = None
6840       elif ip.lower() == constants.VALUE_AUTO:
6841         if not self.op.name_check:
6842           raise errors.OpPrereqError("IP address set to auto but name checks"
6843                                      " have been skipped. Aborting.",
6844                                      errors.ECODE_INVAL)
6845         nic_ip = self.hostname1.ip
6846       else:
6847         if not utils.IsValidIP4(ip):
6848           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6849                                      " like a valid IP" % ip,
6850                                      errors.ECODE_INVAL)
6851         nic_ip = ip
6852
6853       # TODO: check the ip address for uniqueness
6854       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6855         raise errors.OpPrereqError("Routed nic mode requires an ip address",
6856                                    errors.ECODE_INVAL)
6857
6858       # MAC address verification
6859       mac = nic.get("mac", constants.VALUE_AUTO)
6860       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6861         mac = utils.NormalizeAndValidateMac(mac)
6862
6863         try:
6864           self.cfg.ReserveMAC(mac, self.proc.GetECId())
6865         except errors.ReservationError:
6866           raise errors.OpPrereqError("MAC address %s already in use"
6867                                      " in cluster" % mac,
6868                                      errors.ECODE_NOTUNIQUE)
6869
6870       # bridge verification
6871       bridge = nic.get("bridge", None)
6872       link = nic.get("link", None)
6873       if bridge and link:
6874         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6875                                    " at the same time", errors.ECODE_INVAL)
6876       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6877         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6878                                    errors.ECODE_INVAL)
6879       elif bridge:
6880         link = bridge
6881
6882       nicparams = {}
6883       if nic_mode_req:
6884         nicparams[constants.NIC_MODE] = nic_mode_req
6885       if link:
6886         nicparams[constants.NIC_LINK] = link
6887
6888       check_params = cluster.SimpleFillNIC(nicparams)
6889       objects.NIC.CheckParameterSyntax(check_params)
6890       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6891
6892     # disk checks/pre-build
6893     self.disks = []
6894     for disk in self.op.disks:
6895       mode = disk.get("mode", constants.DISK_RDWR)
6896       if mode not in constants.DISK_ACCESS_SET:
6897         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6898                                    mode, errors.ECODE_INVAL)
6899       size = disk.get("size", None)
6900       if size is None:
6901         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6902       try:
6903         size = int(size)
6904       except (TypeError, ValueError):
6905         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6906                                    errors.ECODE_INVAL)
6907       new_disk = {"size": size, "mode": mode}
6908       if "adopt" in disk:
6909         new_disk["adopt"] = disk["adopt"]
6910       self.disks.append(new_disk)
6911
6912     if self.op.mode == constants.INSTANCE_IMPORT:
6913
6914       # Check that the new instance doesn't have less disks than the export
6915       instance_disks = len(self.disks)
6916       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6917       if instance_disks < export_disks:
6918         raise errors.OpPrereqError("Not enough disks to import."
6919                                    " (instance: %d, export: %d)" %
6920                                    (instance_disks, export_disks),
6921                                    errors.ECODE_INVAL)
6922
6923       disk_images = []
6924       for idx in range(export_disks):
6925         option = 'disk%d_dump' % idx
6926         if export_info.has_option(constants.INISECT_INS, option):
6927           # FIXME: are the old os-es, disk sizes, etc. useful?
6928           export_name = export_info.get(constants.INISECT_INS, option)
6929           image = utils.PathJoin(self.op.src_path, export_name)
6930           disk_images.append(image)
6931         else:
6932           disk_images.append(False)
6933
6934       self.src_images = disk_images
6935
6936       old_name = export_info.get(constants.INISECT_INS, 'name')
6937       try:
6938         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6939       except (TypeError, ValueError), err:
6940         raise errors.OpPrereqError("Invalid export file, nic_count is not"
6941                                    " an integer: %s" % str(err),
6942                                    errors.ECODE_STATE)
6943       if self.op.instance_name == old_name:
6944         for idx, nic in enumerate(self.nics):
6945           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6946             nic_mac_ini = 'nic%d_mac' % idx
6947             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6948
6949     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6950
6951     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6952     if self.op.ip_check:
6953       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6954         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6955                                    (self.check_ip, self.op.instance_name),
6956                                    errors.ECODE_NOTUNIQUE)
6957
6958     #### mac address generation
6959     # By generating here the mac address both the allocator and the hooks get
6960     # the real final mac address rather than the 'auto' or 'generate' value.
6961     # There is a race condition between the generation and the instance object
6962     # creation, which means that we know the mac is valid now, but we're not
6963     # sure it will be when we actually add the instance. If things go bad
6964     # adding the instance will abort because of a duplicate mac, and the
6965     # creation job will fail.
6966     for nic in self.nics:
6967       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6968         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6969
6970     #### allocator run
6971
6972     if self.op.iallocator is not None:
6973       self._RunAllocator()
6974
6975     #### node related checks
6976
6977     # check primary node
6978     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6979     assert self.pnode is not None, \
6980       "Cannot retrieve locked node %s" % self.op.pnode
6981     if pnode.offline:
6982       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6983                                  pnode.name, errors.ECODE_STATE)
6984     if pnode.drained:
6985       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6986                                  pnode.name, errors.ECODE_STATE)
6987
6988     self.secondaries = []
6989
6990     # mirror node verification
6991     if self.op.disk_template in constants.DTS_NET_MIRROR:
6992       if self.op.snode is None:
6993         raise errors.OpPrereqError("The networked disk templates need"
6994                                    " a mirror node", errors.ECODE_INVAL)
6995       if self.op.snode == pnode.name:
6996         raise errors.OpPrereqError("The secondary node cannot be the"
6997                                    " primary node.", errors.ECODE_INVAL)
6998       _CheckNodeOnline(self, self.op.snode)
6999       _CheckNodeNotDrained(self, self.op.snode)
7000       self.secondaries.append(self.op.snode)
7001
7002     nodenames = [pnode.name] + self.secondaries
7003
7004     req_size = _ComputeDiskSize(self.op.disk_template,
7005                                 self.disks)
7006
7007     # Check lv size requirements, if not adopting
7008     if req_size is not None and not self.adopt_disks:
7009       _CheckNodesFreeDisk(self, nodenames, req_size)
7010
7011     if self.adopt_disks: # instead, we must check the adoption data
7012       all_lvs = set([i["adopt"] for i in self.disks])
7013       if len(all_lvs) != len(self.disks):
7014         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7015                                    errors.ECODE_INVAL)
7016       for lv_name in all_lvs:
7017         try:
7018           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7019         except errors.ReservationError:
7020           raise errors.OpPrereqError("LV named %s used by another instance" %
7021                                      lv_name, errors.ECODE_NOTUNIQUE)
7022
7023       node_lvs = self.rpc.call_lv_list([pnode.name],
7024                                        self.cfg.GetVGName())[pnode.name]
7025       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7026       node_lvs = node_lvs.payload
7027       delta = all_lvs.difference(node_lvs.keys())
7028       if delta:
7029         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7030                                    utils.CommaJoin(delta),
7031                                    errors.ECODE_INVAL)
7032       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7033       if online_lvs:
7034         raise errors.OpPrereqError("Online logical volumes found, cannot"
7035                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7036                                    errors.ECODE_STATE)
7037       # update the size of disk based on what is found
7038       for dsk in self.disks:
7039         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7040
7041     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7042
7043     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7044     # check OS parameters (remotely)
7045     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7046
7047     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7048
7049     # memory check on primary node
7050     if self.op.start:
7051       _CheckNodeFreeMemory(self, self.pnode.name,
7052                            "creating instance %s" % self.op.instance_name,
7053                            self.be_full[constants.BE_MEMORY],
7054                            self.op.hypervisor)
7055
7056     self.dry_run_result = list(nodenames)
7057
7058   def Exec(self, feedback_fn):
7059     """Create and add the instance to the cluster.
7060
7061     """
7062     instance = self.op.instance_name
7063     pnode_name = self.pnode.name
7064
7065     ht_kind = self.op.hypervisor
7066     if ht_kind in constants.HTS_REQ_PORT:
7067       network_port = self.cfg.AllocatePort()
7068     else:
7069       network_port = None
7070
7071     if constants.ENABLE_FILE_STORAGE:
7072       # this is needed because os.path.join does not accept None arguments
7073       if self.op.file_storage_dir is None:
7074         string_file_storage_dir = ""
7075       else:
7076         string_file_storage_dir = self.op.file_storage_dir
7077
7078       # build the full file storage dir path
7079       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7080                                         string_file_storage_dir, instance)
7081     else:
7082       file_storage_dir = ""
7083
7084     disks = _GenerateDiskTemplate(self,
7085                                   self.op.disk_template,
7086                                   instance, pnode_name,
7087                                   self.secondaries,
7088                                   self.disks,
7089                                   file_storage_dir,
7090                                   self.op.file_driver,
7091                                   0)
7092
7093     iobj = objects.Instance(name=instance, os=self.op.os_type,
7094                             primary_node=pnode_name,
7095                             nics=self.nics, disks=disks,
7096                             disk_template=self.op.disk_template,
7097                             admin_up=False,
7098                             network_port=network_port,
7099                             beparams=self.op.beparams,
7100                             hvparams=self.op.hvparams,
7101                             hypervisor=self.op.hypervisor,
7102                             osparams=self.op.osparams,
7103                             )
7104
7105     if self.adopt_disks:
7106       # rename LVs to the newly-generated names; we need to construct
7107       # 'fake' LV disks with the old data, plus the new unique_id
7108       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7109       rename_to = []
7110       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7111         rename_to.append(t_dsk.logical_id)
7112         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7113         self.cfg.SetDiskID(t_dsk, pnode_name)
7114       result = self.rpc.call_blockdev_rename(pnode_name,
7115                                              zip(tmp_disks, rename_to))
7116       result.Raise("Failed to rename adoped LVs")
7117     else:
7118       feedback_fn("* creating instance disks...")
7119       try:
7120         _CreateDisks(self, iobj)
7121       except errors.OpExecError:
7122         self.LogWarning("Device creation failed, reverting...")
7123         try:
7124           _RemoveDisks(self, iobj)
7125         finally:
7126           self.cfg.ReleaseDRBDMinors(instance)
7127           raise
7128
7129     feedback_fn("adding instance %s to cluster config" % instance)
7130
7131     self.cfg.AddInstance(iobj, self.proc.GetECId())
7132
7133     # Declare that we don't want to remove the instance lock anymore, as we've
7134     # added the instance to the config
7135     del self.remove_locks[locking.LEVEL_INSTANCE]
7136     # Unlock all the nodes
7137     if self.op.mode == constants.INSTANCE_IMPORT:
7138       nodes_keep = [self.op.src_node]
7139       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7140                        if node != self.op.src_node]
7141       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7142       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7143     else:
7144       self.context.glm.release(locking.LEVEL_NODE)
7145       del self.acquired_locks[locking.LEVEL_NODE]
7146
7147     if self.op.wait_for_sync:
7148       disk_abort = not _WaitForSync(self, iobj)
7149     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7150       # make sure the disks are not degraded (still sync-ing is ok)
7151       time.sleep(15)
7152       feedback_fn("* checking mirrors status")
7153       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7154     else:
7155       disk_abort = False
7156
7157     if disk_abort:
7158       _RemoveDisks(self, iobj)
7159       self.cfg.RemoveInstance(iobj.name)
7160       # Make sure the instance lock gets removed
7161       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7162       raise errors.OpExecError("There are some degraded disks for"
7163                                " this instance")
7164
7165     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7166       if self.op.mode == constants.INSTANCE_CREATE:
7167         if not self.op.no_install:
7168           feedback_fn("* running the instance OS create scripts...")
7169           # FIXME: pass debug option from opcode to backend
7170           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7171                                                  self.op.debug_level)
7172           result.Raise("Could not add os for instance %s"
7173                        " on node %s" % (instance, pnode_name))
7174
7175       elif self.op.mode == constants.INSTANCE_IMPORT:
7176         feedback_fn("* running the instance OS import scripts...")
7177
7178         transfers = []
7179
7180         for idx, image in enumerate(self.src_images):
7181           if not image:
7182             continue
7183
7184           # FIXME: pass debug option from opcode to backend
7185           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7186                                              constants.IEIO_FILE, (image, ),
7187                                              constants.IEIO_SCRIPT,
7188                                              (iobj.disks[idx], idx),
7189                                              None)
7190           transfers.append(dt)
7191
7192         import_result = \
7193           masterd.instance.TransferInstanceData(self, feedback_fn,
7194                                                 self.op.src_node, pnode_name,
7195                                                 self.pnode.secondary_ip,
7196                                                 iobj, transfers)
7197         if not compat.all(import_result):
7198           self.LogWarning("Some disks for instance %s on node %s were not"
7199                           " imported successfully" % (instance, pnode_name))
7200
7201       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7202         feedback_fn("* preparing remote import...")
7203         connect_timeout = constants.RIE_CONNECT_TIMEOUT
7204         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7205
7206         disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7207                                                      self.source_x509_ca,
7208                                                      self._cds, timeouts)
7209         if not compat.all(disk_results):
7210           # TODO: Should the instance still be started, even if some disks
7211           # failed to import (valid for local imports, too)?
7212           self.LogWarning("Some disks for instance %s on node %s were not"
7213                           " imported successfully" % (instance, pnode_name))
7214
7215         # Run rename script on newly imported instance
7216         assert iobj.name == instance
7217         feedback_fn("Running rename script for %s" % instance)
7218         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7219                                                    self.source_instance_name,
7220                                                    self.op.debug_level)
7221         if result.fail_msg:
7222           self.LogWarning("Failed to run rename script for %s on node"
7223                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7224
7225       else:
7226         # also checked in the prereq part
7227         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7228                                      % self.op.mode)
7229
7230     if self.op.start:
7231       iobj.admin_up = True
7232       self.cfg.Update(iobj, feedback_fn)
7233       logging.info("Starting instance %s on node %s", instance, pnode_name)
7234       feedback_fn("* starting instance...")
7235       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7236       result.Raise("Could not start instance")
7237
7238     return list(iobj.all_nodes)
7239
7240
7241 class LUConnectConsole(NoHooksLU):
7242   """Connect to an instance's console.
7243
7244   This is somewhat special in that it returns the command line that
7245   you need to run on the master node in order to connect to the
7246   console.
7247
7248   """
7249   _OP_REQP = [("instance_name", _TNonEmptyString)]
7250   REQ_BGL = False
7251
7252   def ExpandNames(self):
7253     self._ExpandAndLockInstance()
7254
7255   def CheckPrereq(self):
7256     """Check prerequisites.
7257
7258     This checks that the instance is in the cluster.
7259
7260     """
7261     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7262     assert self.instance is not None, \
7263       "Cannot retrieve locked instance %s" % self.op.instance_name
7264     _CheckNodeOnline(self, self.instance.primary_node)
7265
7266   def Exec(self, feedback_fn):
7267     """Connect to the console of an instance
7268
7269     """
7270     instance = self.instance
7271     node = instance.primary_node
7272
7273     node_insts = self.rpc.call_instance_list([node],
7274                                              [instance.hypervisor])[node]
7275     node_insts.Raise("Can't get node information from %s" % node)
7276
7277     if instance.name not in node_insts.payload:
7278       raise errors.OpExecError("Instance %s is not running." % instance.name)
7279
7280     logging.debug("Connecting to console of %s on %s", instance.name, node)
7281
7282     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7283     cluster = self.cfg.GetClusterInfo()
7284     # beparams and hvparams are passed separately, to avoid editing the
7285     # instance and then saving the defaults in the instance itself.
7286     hvparams = cluster.FillHV(instance)
7287     beparams = cluster.FillBE(instance)
7288     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7289
7290     # build ssh cmdline
7291     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7292
7293
7294 class LUReplaceDisks(LogicalUnit):
7295   """Replace the disks of an instance.
7296
7297   """
7298   HPATH = "mirrors-replace"
7299   HTYPE = constants.HTYPE_INSTANCE
7300   _OP_REQP = [
7301     ("instance_name", _TNonEmptyString),
7302     ("mode", _TElemOf(constants.REPLACE_MODES)),
7303     ("disks", _TListOf(_TPositiveInt)),
7304     ]
7305   _OP_DEFS = [
7306     ("remote_node", None),
7307     ("iallocator", None),
7308     ("early_release", None),
7309     ]
7310   REQ_BGL = False
7311
7312   def CheckArguments(self):
7313     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7314                                   self.op.iallocator)
7315
7316   def ExpandNames(self):
7317     self._ExpandAndLockInstance()
7318
7319     if self.op.iallocator is not None:
7320       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7321
7322     elif self.op.remote_node is not None:
7323       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7324       self.op.remote_node = remote_node
7325
7326       # Warning: do not remove the locking of the new secondary here
7327       # unless DRBD8.AddChildren is changed to work in parallel;
7328       # currently it doesn't since parallel invocations of
7329       # FindUnusedMinor will conflict
7330       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7331       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7332
7333     else:
7334       self.needed_locks[locking.LEVEL_NODE] = []
7335       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7336
7337     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7338                                    self.op.iallocator, self.op.remote_node,
7339                                    self.op.disks, False, self.op.early_release)
7340
7341     self.tasklets = [self.replacer]
7342
7343   def DeclareLocks(self, level):
7344     # If we're not already locking all nodes in the set we have to declare the
7345     # instance's primary/secondary nodes.
7346     if (level == locking.LEVEL_NODE and
7347         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7348       self._LockInstancesNodes()
7349
7350   def BuildHooksEnv(self):
7351     """Build hooks env.
7352
7353     This runs on the master, the primary and all the secondaries.
7354
7355     """
7356     instance = self.replacer.instance
7357     env = {
7358       "MODE": self.op.mode,
7359       "NEW_SECONDARY": self.op.remote_node,
7360       "OLD_SECONDARY": instance.secondary_nodes[0],
7361       }
7362     env.update(_BuildInstanceHookEnvByObject(self, instance))
7363     nl = [
7364       self.cfg.GetMasterNode(),
7365       instance.primary_node,
7366       ]
7367     if self.op.remote_node is not None:
7368       nl.append(self.op.remote_node)
7369     return env, nl, nl
7370
7371
7372 class TLReplaceDisks(Tasklet):
7373   """Replaces disks for an instance.
7374
7375   Note: Locking is not within the scope of this class.
7376
7377   """
7378   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7379                disks, delay_iallocator, early_release):
7380     """Initializes this class.
7381
7382     """
7383     Tasklet.__init__(self, lu)
7384
7385     # Parameters
7386     self.instance_name = instance_name
7387     self.mode = mode
7388     self.iallocator_name = iallocator_name
7389     self.remote_node = remote_node
7390     self.disks = disks
7391     self.delay_iallocator = delay_iallocator
7392     self.early_release = early_release
7393
7394     # Runtime data
7395     self.instance = None
7396     self.new_node = None
7397     self.target_node = None
7398     self.other_node = None
7399     self.remote_node_info = None
7400     self.node_secondary_ip = None
7401
7402   @staticmethod
7403   def CheckArguments(mode, remote_node, iallocator):
7404     """Helper function for users of this class.
7405
7406     """
7407     # check for valid parameter combination
7408     if mode == constants.REPLACE_DISK_CHG:
7409       if remote_node is None and iallocator is None:
7410         raise errors.OpPrereqError("When changing the secondary either an"
7411                                    " iallocator script must be used or the"
7412                                    " new node given", errors.ECODE_INVAL)
7413
7414       if remote_node is not None and iallocator is not None:
7415         raise errors.OpPrereqError("Give either the iallocator or the new"
7416                                    " secondary, not both", errors.ECODE_INVAL)
7417
7418     elif remote_node is not None or iallocator is not None:
7419       # Not replacing the secondary
7420       raise errors.OpPrereqError("The iallocator and new node options can"
7421                                  " only be used when changing the"
7422                                  " secondary node", errors.ECODE_INVAL)
7423
7424   @staticmethod
7425   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7426     """Compute a new secondary node using an IAllocator.
7427
7428     """
7429     ial = IAllocator(lu.cfg, lu.rpc,
7430                      mode=constants.IALLOCATOR_MODE_RELOC,
7431                      name=instance_name,
7432                      relocate_from=relocate_from)
7433
7434     ial.Run(iallocator_name)
7435
7436     if not ial.success:
7437       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7438                                  " %s" % (iallocator_name, ial.info),
7439                                  errors.ECODE_NORES)
7440
7441     if len(ial.result) != ial.required_nodes:
7442       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7443                                  " of nodes (%s), required %s" %
7444                                  (iallocator_name,
7445                                   len(ial.result), ial.required_nodes),
7446                                  errors.ECODE_FAULT)
7447
7448     remote_node_name = ial.result[0]
7449
7450     lu.LogInfo("Selected new secondary for instance '%s': %s",
7451                instance_name, remote_node_name)
7452
7453     return remote_node_name
7454
7455   def _FindFaultyDisks(self, node_name):
7456     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7457                                     node_name, True)
7458
7459   def CheckPrereq(self):
7460     """Check prerequisites.
7461
7462     This checks that the instance is in the cluster.
7463
7464     """
7465     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7466     assert instance is not None, \
7467       "Cannot retrieve locked instance %s" % self.instance_name
7468
7469     if instance.disk_template != constants.DT_DRBD8:
7470       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7471                                  " instances", errors.ECODE_INVAL)
7472
7473     if len(instance.secondary_nodes) != 1:
7474       raise errors.OpPrereqError("The instance has a strange layout,"
7475                                  " expected one secondary but found %d" %
7476                                  len(instance.secondary_nodes),
7477                                  errors.ECODE_FAULT)
7478
7479     if not self.delay_iallocator:
7480       self._CheckPrereq2()
7481
7482   def _CheckPrereq2(self):
7483     """Check prerequisites, second part.
7484
7485     This function should always be part of CheckPrereq. It was separated and is
7486     now called from Exec because during node evacuation iallocator was only
7487     called with an unmodified cluster model, not taking planned changes into
7488     account.
7489
7490     """
7491     instance = self.instance
7492     secondary_node = instance.secondary_nodes[0]
7493
7494     if self.iallocator_name is None:
7495       remote_node = self.remote_node
7496     else:
7497       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7498                                        instance.name, instance.secondary_nodes)
7499
7500     if remote_node is not None:
7501       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7502       assert self.remote_node_info is not None, \
7503         "Cannot retrieve locked node %s" % remote_node
7504     else:
7505       self.remote_node_info = None
7506
7507     if remote_node == self.instance.primary_node:
7508       raise errors.OpPrereqError("The specified node is the primary node of"
7509                                  " the instance.", errors.ECODE_INVAL)
7510
7511     if remote_node == secondary_node:
7512       raise errors.OpPrereqError("The specified node is already the"
7513                                  " secondary node of the instance.",
7514                                  errors.ECODE_INVAL)
7515
7516     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7517                                     constants.REPLACE_DISK_CHG):
7518       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7519                                  errors.ECODE_INVAL)
7520
7521     if self.mode == constants.REPLACE_DISK_AUTO:
7522       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7523       faulty_secondary = self._FindFaultyDisks(secondary_node)
7524
7525       if faulty_primary and faulty_secondary:
7526         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7527                                    " one node and can not be repaired"
7528                                    " automatically" % self.instance_name,
7529                                    errors.ECODE_STATE)
7530
7531       if faulty_primary:
7532         self.disks = faulty_primary
7533         self.target_node = instance.primary_node
7534         self.other_node = secondary_node
7535         check_nodes = [self.target_node, self.other_node]
7536       elif faulty_secondary:
7537         self.disks = faulty_secondary
7538         self.target_node = secondary_node
7539         self.other_node = instance.primary_node
7540         check_nodes = [self.target_node, self.other_node]
7541       else:
7542         self.disks = []
7543         check_nodes = []
7544
7545     else:
7546       # Non-automatic modes
7547       if self.mode == constants.REPLACE_DISK_PRI:
7548         self.target_node = instance.primary_node
7549         self.other_node = secondary_node
7550         check_nodes = [self.target_node, self.other_node]
7551
7552       elif self.mode == constants.REPLACE_DISK_SEC:
7553         self.target_node = secondary_node
7554         self.other_node = instance.primary_node
7555         check_nodes = [self.target_node, self.other_node]
7556
7557       elif self.mode == constants.REPLACE_DISK_CHG:
7558         self.new_node = remote_node
7559         self.other_node = instance.primary_node
7560         self.target_node = secondary_node
7561         check_nodes = [self.new_node, self.other_node]
7562
7563         _CheckNodeNotDrained(self.lu, remote_node)
7564
7565         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7566         assert old_node_info is not None
7567         if old_node_info.offline and not self.early_release:
7568           # doesn't make sense to delay the release
7569           self.early_release = True
7570           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7571                           " early-release mode", secondary_node)
7572
7573       else:
7574         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7575                                      self.mode)
7576
7577       # If not specified all disks should be replaced
7578       if not self.disks:
7579         self.disks = range(len(self.instance.disks))
7580
7581     for node in check_nodes:
7582       _CheckNodeOnline(self.lu, node)
7583
7584     # Check whether disks are valid
7585     for disk_idx in self.disks:
7586       instance.FindDisk(disk_idx)
7587
7588     # Get secondary node IP addresses
7589     node_2nd_ip = {}
7590
7591     for node_name in [self.target_node, self.other_node, self.new_node]:
7592       if node_name is not None:
7593         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7594
7595     self.node_secondary_ip = node_2nd_ip
7596
7597   def Exec(self, feedback_fn):
7598     """Execute disk replacement.
7599
7600     This dispatches the disk replacement to the appropriate handler.
7601
7602     """
7603     if self.delay_iallocator:
7604       self._CheckPrereq2()
7605
7606     if not self.disks:
7607       feedback_fn("No disks need replacement")
7608       return
7609
7610     feedback_fn("Replacing disk(s) %s for %s" %
7611                 (utils.CommaJoin(self.disks), self.instance.name))
7612
7613     activate_disks = (not self.instance.admin_up)
7614
7615     # Activate the instance disks if we're replacing them on a down instance
7616     if activate_disks:
7617       _StartInstanceDisks(self.lu, self.instance, True)
7618
7619     try:
7620       # Should we replace the secondary node?
7621       if self.new_node is not None:
7622         fn = self._ExecDrbd8Secondary
7623       else:
7624         fn = self._ExecDrbd8DiskOnly
7625
7626       return fn(feedback_fn)
7627
7628     finally:
7629       # Deactivate the instance disks if we're replacing them on a
7630       # down instance
7631       if activate_disks:
7632         _SafeShutdownInstanceDisks(self.lu, self.instance)
7633
7634   def _CheckVolumeGroup(self, nodes):
7635     self.lu.LogInfo("Checking volume groups")
7636
7637     vgname = self.cfg.GetVGName()
7638
7639     # Make sure volume group exists on all involved nodes
7640     results = self.rpc.call_vg_list(nodes)
7641     if not results:
7642       raise errors.OpExecError("Can't list volume groups on the nodes")
7643
7644     for node in nodes:
7645       res = results[node]
7646       res.Raise("Error checking node %s" % node)
7647       if vgname not in res.payload:
7648         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7649                                  (vgname, node))
7650
7651   def _CheckDisksExistence(self, nodes):
7652     # Check disk existence
7653     for idx, dev in enumerate(self.instance.disks):
7654       if idx not in self.disks:
7655         continue
7656
7657       for node in nodes:
7658         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7659         self.cfg.SetDiskID(dev, node)
7660
7661         result = self.rpc.call_blockdev_find(node, dev)
7662
7663         msg = result.fail_msg
7664         if msg or not result.payload:
7665           if not msg:
7666             msg = "disk not found"
7667           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7668                                    (idx, node, msg))
7669
7670   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7671     for idx, dev in enumerate(self.instance.disks):
7672       if idx not in self.disks:
7673         continue
7674
7675       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7676                       (idx, node_name))
7677
7678       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7679                                    ldisk=ldisk):
7680         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7681                                  " replace disks for instance %s" %
7682                                  (node_name, self.instance.name))
7683
7684   def _CreateNewStorage(self, node_name):
7685     vgname = self.cfg.GetVGName()
7686     iv_names = {}
7687
7688     for idx, dev in enumerate(self.instance.disks):
7689       if idx not in self.disks:
7690         continue
7691
7692       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7693
7694       self.cfg.SetDiskID(dev, node_name)
7695
7696       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7697       names = _GenerateUniqueNames(self.lu, lv_names)
7698
7699       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7700                              logical_id=(vgname, names[0]))
7701       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7702                              logical_id=(vgname, names[1]))
7703
7704       new_lvs = [lv_data, lv_meta]
7705       old_lvs = dev.children
7706       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7707
7708       # we pass force_create=True to force the LVM creation
7709       for new_lv in new_lvs:
7710         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7711                         _GetInstanceInfoText(self.instance), False)
7712
7713     return iv_names
7714
7715   def _CheckDevices(self, node_name, iv_names):
7716     for name, (dev, _, _) in iv_names.iteritems():
7717       self.cfg.SetDiskID(dev, node_name)
7718
7719       result = self.rpc.call_blockdev_find(node_name, dev)
7720
7721       msg = result.fail_msg
7722       if msg or not result.payload:
7723         if not msg:
7724           msg = "disk not found"
7725         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7726                                  (name, msg))
7727
7728       if result.payload.is_degraded:
7729         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7730
7731   def _RemoveOldStorage(self, node_name, iv_names):
7732     for name, (_, old_lvs, _) in iv_names.iteritems():
7733       self.lu.LogInfo("Remove logical volumes for %s" % name)
7734
7735       for lv in old_lvs:
7736         self.cfg.SetDiskID(lv, node_name)
7737
7738         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7739         if msg:
7740           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7741                              hint="remove unused LVs manually")
7742
7743   def _ReleaseNodeLock(self, node_name):
7744     """Releases the lock for a given node."""
7745     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7746
7747   def _ExecDrbd8DiskOnly(self, feedback_fn):
7748     """Replace a disk on the primary or secondary for DRBD 8.
7749
7750     The algorithm for replace is quite complicated:
7751
7752       1. for each disk to be replaced:
7753
7754         1. create new LVs on the target node with unique names
7755         1. detach old LVs from the drbd device
7756         1. rename old LVs to name_replaced.<time_t>
7757         1. rename new LVs to old LVs
7758         1. attach the new LVs (with the old names now) to the drbd device
7759
7760       1. wait for sync across all devices
7761
7762       1. for each modified disk:
7763
7764         1. remove old LVs (which have the name name_replaces.<time_t>)
7765
7766     Failures are not very well handled.
7767
7768     """
7769     steps_total = 6
7770
7771     # Step: check device activation
7772     self.lu.LogStep(1, steps_total, "Check device existence")
7773     self._CheckDisksExistence([self.other_node, self.target_node])
7774     self._CheckVolumeGroup([self.target_node, self.other_node])
7775
7776     # Step: check other node consistency
7777     self.lu.LogStep(2, steps_total, "Check peer consistency")
7778     self._CheckDisksConsistency(self.other_node,
7779                                 self.other_node == self.instance.primary_node,
7780                                 False)
7781
7782     # Step: create new storage
7783     self.lu.LogStep(3, steps_total, "Allocate new storage")
7784     iv_names = self._CreateNewStorage(self.target_node)
7785
7786     # Step: for each lv, detach+rename*2+attach
7787     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7788     for dev, old_lvs, new_lvs in iv_names.itervalues():
7789       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7790
7791       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7792                                                      old_lvs)
7793       result.Raise("Can't detach drbd from local storage on node"
7794                    " %s for device %s" % (self.target_node, dev.iv_name))
7795       #dev.children = []
7796       #cfg.Update(instance)
7797
7798       # ok, we created the new LVs, so now we know we have the needed
7799       # storage; as such, we proceed on the target node to rename
7800       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7801       # using the assumption that logical_id == physical_id (which in
7802       # turn is the unique_id on that node)
7803
7804       # FIXME(iustin): use a better name for the replaced LVs
7805       temp_suffix = int(time.time())
7806       ren_fn = lambda d, suff: (d.physical_id[0],
7807                                 d.physical_id[1] + "_replaced-%s" % suff)
7808
7809       # Build the rename list based on what LVs exist on the node
7810       rename_old_to_new = []
7811       for to_ren in old_lvs:
7812         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7813         if not result.fail_msg and result.payload:
7814           # device exists
7815           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7816
7817       self.lu.LogInfo("Renaming the old LVs on the target node")
7818       result = self.rpc.call_blockdev_rename(self.target_node,
7819                                              rename_old_to_new)
7820       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7821
7822       # Now we rename the new LVs to the old LVs
7823       self.lu.LogInfo("Renaming the new LVs on the target node")
7824       rename_new_to_old = [(new, old.physical_id)
7825                            for old, new in zip(old_lvs, new_lvs)]
7826       result = self.rpc.call_blockdev_rename(self.target_node,
7827                                              rename_new_to_old)
7828       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7829
7830       for old, new in zip(old_lvs, new_lvs):
7831         new.logical_id = old.logical_id
7832         self.cfg.SetDiskID(new, self.target_node)
7833
7834       for disk in old_lvs:
7835         disk.logical_id = ren_fn(disk, temp_suffix)
7836         self.cfg.SetDiskID(disk, self.target_node)
7837
7838       # Now that the new lvs have the old name, we can add them to the device
7839       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7840       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7841                                                   new_lvs)
7842       msg = result.fail_msg
7843       if msg:
7844         for new_lv in new_lvs:
7845           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7846                                                new_lv).fail_msg
7847           if msg2:
7848             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7849                                hint=("cleanup manually the unused logical"
7850                                      "volumes"))
7851         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7852
7853       dev.children = new_lvs
7854
7855       self.cfg.Update(self.instance, feedback_fn)
7856
7857     cstep = 5
7858     if self.early_release:
7859       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7860       cstep += 1
7861       self._RemoveOldStorage(self.target_node, iv_names)
7862       # WARNING: we release both node locks here, do not do other RPCs
7863       # than WaitForSync to the primary node
7864       self._ReleaseNodeLock([self.target_node, self.other_node])
7865
7866     # Wait for sync
7867     # This can fail as the old devices are degraded and _WaitForSync
7868     # does a combined result over all disks, so we don't check its return value
7869     self.lu.LogStep(cstep, steps_total, "Sync devices")
7870     cstep += 1
7871     _WaitForSync(self.lu, self.instance)
7872
7873     # Check all devices manually
7874     self._CheckDevices(self.instance.primary_node, iv_names)
7875
7876     # Step: remove old storage
7877     if not self.early_release:
7878       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7879       cstep += 1
7880       self._RemoveOldStorage(self.target_node, iv_names)
7881
7882   def _ExecDrbd8Secondary(self, feedback_fn):
7883     """Replace the secondary node for DRBD 8.
7884
7885     The algorithm for replace is quite complicated:
7886       - for all disks of the instance:
7887         - create new LVs on the new node with same names
7888         - shutdown the drbd device on the old secondary
7889         - disconnect the drbd network on the primary
7890         - create the drbd device on the new secondary
7891         - network attach the drbd on the primary, using an artifice:
7892           the drbd code for Attach() will connect to the network if it
7893           finds a device which is connected to the good local disks but
7894           not network enabled
7895       - wait for sync across all devices
7896       - remove all disks from the old secondary
7897
7898     Failures are not very well handled.
7899
7900     """
7901     steps_total = 6
7902
7903     # Step: check device activation
7904     self.lu.LogStep(1, steps_total, "Check device existence")
7905     self._CheckDisksExistence([self.instance.primary_node])
7906     self._CheckVolumeGroup([self.instance.primary_node])
7907
7908     # Step: check other node consistency
7909     self.lu.LogStep(2, steps_total, "Check peer consistency")
7910     self._CheckDisksConsistency(self.instance.primary_node, True, True)
7911
7912     # Step: create new storage
7913     self.lu.LogStep(3, steps_total, "Allocate new storage")
7914     for idx, dev in enumerate(self.instance.disks):
7915       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7916                       (self.new_node, idx))
7917       # we pass force_create=True to force LVM creation
7918       for new_lv in dev.children:
7919         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7920                         _GetInstanceInfoText(self.instance), False)
7921
7922     # Step 4: dbrd minors and drbd setups changes
7923     # after this, we must manually remove the drbd minors on both the
7924     # error and the success paths
7925     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7926     minors = self.cfg.AllocateDRBDMinor([self.new_node
7927                                          for dev in self.instance.disks],
7928                                         self.instance.name)
7929     logging.debug("Allocated minors %r", minors)
7930
7931     iv_names = {}
7932     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7933       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7934                       (self.new_node, idx))
7935       # create new devices on new_node; note that we create two IDs:
7936       # one without port, so the drbd will be activated without
7937       # networking information on the new node at this stage, and one
7938       # with network, for the latter activation in step 4
7939       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7940       if self.instance.primary_node == o_node1:
7941         p_minor = o_minor1
7942       else:
7943         assert self.instance.primary_node == o_node2, "Three-node instance?"
7944         p_minor = o_minor2
7945
7946       new_alone_id = (self.instance.primary_node, self.new_node, None,
7947                       p_minor, new_minor, o_secret)
7948       new_net_id = (self.instance.primary_node, self.new_node, o_port,
7949                     p_minor, new_minor, o_secret)
7950
7951       iv_names[idx] = (dev, dev.children, new_net_id)
7952       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7953                     new_net_id)
7954       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7955                               logical_id=new_alone_id,
7956                               children=dev.children,
7957                               size=dev.size)
7958       try:
7959         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7960                               _GetInstanceInfoText(self.instance), False)
7961       except errors.GenericError:
7962         self.cfg.ReleaseDRBDMinors(self.instance.name)
7963         raise
7964
7965     # We have new devices, shutdown the drbd on the old secondary
7966     for idx, dev in enumerate(self.instance.disks):
7967       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7968       self.cfg.SetDiskID(dev, self.target_node)
7969       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7970       if msg:
7971         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7972                            "node: %s" % (idx, msg),
7973                            hint=("Please cleanup this device manually as"
7974                                  " soon as possible"))
7975
7976     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7977     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7978                                                self.node_secondary_ip,
7979                                                self.instance.disks)\
7980                                               [self.instance.primary_node]
7981
7982     msg = result.fail_msg
7983     if msg:
7984       # detaches didn't succeed (unlikely)
7985       self.cfg.ReleaseDRBDMinors(self.instance.name)
7986       raise errors.OpExecError("Can't detach the disks from the network on"
7987                                " old node: %s" % (msg,))
7988
7989     # if we managed to detach at least one, we update all the disks of
7990     # the instance to point to the new secondary
7991     self.lu.LogInfo("Updating instance configuration")
7992     for dev, _, new_logical_id in iv_names.itervalues():
7993       dev.logical_id = new_logical_id
7994       self.cfg.SetDiskID(dev, self.instance.primary_node)
7995
7996     self.cfg.Update(self.instance, feedback_fn)
7997
7998     # and now perform the drbd attach
7999     self.lu.LogInfo("Attaching primary drbds to new secondary"
8000                     " (standalone => connected)")
8001     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8002                                             self.new_node],
8003                                            self.node_secondary_ip,
8004                                            self.instance.disks,
8005                                            self.instance.name,
8006                                            False)
8007     for to_node, to_result in result.items():
8008       msg = to_result.fail_msg
8009       if msg:
8010         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8011                            to_node, msg,
8012                            hint=("please do a gnt-instance info to see the"
8013                                  " status of disks"))
8014     cstep = 5
8015     if self.early_release:
8016       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8017       cstep += 1
8018       self._RemoveOldStorage(self.target_node, iv_names)
8019       # WARNING: we release all node locks here, do not do other RPCs
8020       # than WaitForSync to the primary node
8021       self._ReleaseNodeLock([self.instance.primary_node,
8022                              self.target_node,
8023                              self.new_node])
8024
8025     # Wait for sync
8026     # This can fail as the old devices are degraded and _WaitForSync
8027     # does a combined result over all disks, so we don't check its return value
8028     self.lu.LogStep(cstep, steps_total, "Sync devices")
8029     cstep += 1
8030     _WaitForSync(self.lu, self.instance)
8031
8032     # Check all devices manually
8033     self._CheckDevices(self.instance.primary_node, iv_names)
8034
8035     # Step: remove old storage
8036     if not self.early_release:
8037       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8038       self._RemoveOldStorage(self.target_node, iv_names)
8039
8040
8041 class LURepairNodeStorage(NoHooksLU):
8042   """Repairs the volume group on a node.
8043
8044   """
8045   _OP_REQP = [("node_name", _TNonEmptyString)]
8046   REQ_BGL = False
8047
8048   def CheckArguments(self):
8049     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8050
8051     _CheckStorageType(self.op.storage_type)
8052
8053     storage_type = self.op.storage_type
8054
8055     if (constants.SO_FIX_CONSISTENCY not in
8056         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8057       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8058                                  " repaired" % storage_type,
8059                                  errors.ECODE_INVAL)
8060
8061   def ExpandNames(self):
8062     self.needed_locks = {
8063       locking.LEVEL_NODE: [self.op.node_name],
8064       }
8065
8066   def _CheckFaultyDisks(self, instance, node_name):
8067     """Ensure faulty disks abort the opcode or at least warn."""
8068     try:
8069       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8070                                   node_name, True):
8071         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8072                                    " node '%s'" % (instance.name, node_name),
8073                                    errors.ECODE_STATE)
8074     except errors.OpPrereqError, err:
8075       if self.op.ignore_consistency:
8076         self.proc.LogWarning(str(err.args[0]))
8077       else:
8078         raise
8079
8080   def CheckPrereq(self):
8081     """Check prerequisites.
8082
8083     """
8084     # Check whether any instance on this node has faulty disks
8085     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8086       if not inst.admin_up:
8087         continue
8088       check_nodes = set(inst.all_nodes)
8089       check_nodes.discard(self.op.node_name)
8090       for inst_node_name in check_nodes:
8091         self._CheckFaultyDisks(inst, inst_node_name)
8092
8093   def Exec(self, feedback_fn):
8094     feedback_fn("Repairing storage unit '%s' on %s ..." %
8095                 (self.op.name, self.op.node_name))
8096
8097     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8098     result = self.rpc.call_storage_execute(self.op.node_name,
8099                                            self.op.storage_type, st_args,
8100                                            self.op.name,
8101                                            constants.SO_FIX_CONSISTENCY)
8102     result.Raise("Failed to repair storage unit '%s' on %s" %
8103                  (self.op.name, self.op.node_name))
8104
8105
8106 class LUNodeEvacuationStrategy(NoHooksLU):
8107   """Computes the node evacuation strategy.
8108
8109   """
8110   _OP_REQP = [("nodes", _TListOf(_TNonEmptyString))]
8111   _OP_DEFS = [
8112     ("remote_node", None),
8113     ("iallocator", None),
8114     ]
8115   REQ_BGL = False
8116
8117   def CheckArguments(self):
8118     if self.op.remote_node is not None and self.op.iallocator is not None:
8119       raise errors.OpPrereqError("Give either the iallocator or the new"
8120                                  " secondary, not both", errors.ECODE_INVAL)
8121
8122   def ExpandNames(self):
8123     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8124     self.needed_locks = locks = {}
8125     if self.op.remote_node is None:
8126       locks[locking.LEVEL_NODE] = locking.ALL_SET
8127     else:
8128       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8129       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8130
8131   def Exec(self, feedback_fn):
8132     if self.op.remote_node is not None:
8133       instances = []
8134       for node in self.op.nodes:
8135         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8136       result = []
8137       for i in instances:
8138         if i.primary_node == self.op.remote_node:
8139           raise errors.OpPrereqError("Node %s is the primary node of"
8140                                      " instance %s, cannot use it as"
8141                                      " secondary" %
8142                                      (self.op.remote_node, i.name),
8143                                      errors.ECODE_INVAL)
8144         result.append([i.name, self.op.remote_node])
8145     else:
8146       ial = IAllocator(self.cfg, self.rpc,
8147                        mode=constants.IALLOCATOR_MODE_MEVAC,
8148                        evac_nodes=self.op.nodes)
8149       ial.Run(self.op.iallocator, validate=True)
8150       if not ial.success:
8151         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8152                                  errors.ECODE_NORES)
8153       result = ial.result
8154     return result
8155
8156
8157 class LUGrowDisk(LogicalUnit):
8158   """Grow a disk of an instance.
8159
8160   """
8161   HPATH = "disk-grow"
8162   HTYPE = constants.HTYPE_INSTANCE
8163   _OP_REQP = [
8164     ("instance_name", _TNonEmptyString),
8165     ("disk", _TInt),
8166     ("amount", _TInt),
8167     ("wait_for_sync", _TBool),
8168     ]
8169   REQ_BGL = False
8170
8171   def ExpandNames(self):
8172     self._ExpandAndLockInstance()
8173     self.needed_locks[locking.LEVEL_NODE] = []
8174     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8175
8176   def DeclareLocks(self, level):
8177     if level == locking.LEVEL_NODE:
8178       self._LockInstancesNodes()
8179
8180   def BuildHooksEnv(self):
8181     """Build hooks env.
8182
8183     This runs on the master, the primary and all the secondaries.
8184
8185     """
8186     env = {
8187       "DISK": self.op.disk,
8188       "AMOUNT": self.op.amount,
8189       }
8190     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8191     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8192     return env, nl, nl
8193
8194   def CheckPrereq(self):
8195     """Check prerequisites.
8196
8197     This checks that the instance is in the cluster.
8198
8199     """
8200     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8201     assert instance is not None, \
8202       "Cannot retrieve locked instance %s" % self.op.instance_name
8203     nodenames = list(instance.all_nodes)
8204     for node in nodenames:
8205       _CheckNodeOnline(self, node)
8206
8207     self.instance = instance
8208
8209     if instance.disk_template not in constants.DTS_GROWABLE:
8210       raise errors.OpPrereqError("Instance's disk layout does not support"
8211                                  " growing.", errors.ECODE_INVAL)
8212
8213     self.disk = instance.FindDisk(self.op.disk)
8214
8215     if instance.disk_template != constants.DT_FILE:
8216       # TODO: check the free disk space for file, when that feature will be
8217       # supported
8218       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8219
8220   def Exec(self, feedback_fn):
8221     """Execute disk grow.
8222
8223     """
8224     instance = self.instance
8225     disk = self.disk
8226
8227     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8228     if not disks_ok:
8229       raise errors.OpExecError("Cannot activate block device to grow")
8230
8231     for node in instance.all_nodes:
8232       self.cfg.SetDiskID(disk, node)
8233       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8234       result.Raise("Grow request failed to node %s" % node)
8235
8236       # TODO: Rewrite code to work properly
8237       # DRBD goes into sync mode for a short amount of time after executing the
8238       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8239       # calling "resize" in sync mode fails. Sleeping for a short amount of
8240       # time is a work-around.
8241       time.sleep(5)
8242
8243     disk.RecordGrow(self.op.amount)
8244     self.cfg.Update(instance, feedback_fn)
8245     if self.op.wait_for_sync:
8246       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8247       if disk_abort:
8248         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8249                              " status.\nPlease check the instance.")
8250       if not instance.admin_up:
8251         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8252     elif not instance.admin_up:
8253       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8254                            " not supposed to be running because no wait for"
8255                            " sync mode was requested.")
8256
8257
8258 class LUQueryInstanceData(NoHooksLU):
8259   """Query runtime instance data.
8260
8261   """
8262   _OP_REQP = [
8263     ("instances", _TListOf(_TNonEmptyString)),
8264     ("static", _TBool),
8265     ]
8266   REQ_BGL = False
8267
8268   def ExpandNames(self):
8269     self.needed_locks = {}
8270     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8271
8272     if self.op.instances:
8273       self.wanted_names = []
8274       for name in self.op.instances:
8275         full_name = _ExpandInstanceName(self.cfg, name)
8276         self.wanted_names.append(full_name)
8277       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8278     else:
8279       self.wanted_names = None
8280       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8281
8282     self.needed_locks[locking.LEVEL_NODE] = []
8283     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8284
8285   def DeclareLocks(self, level):
8286     if level == locking.LEVEL_NODE:
8287       self._LockInstancesNodes()
8288
8289   def CheckPrereq(self):
8290     """Check prerequisites.
8291
8292     This only checks the optional instance list against the existing names.
8293
8294     """
8295     if self.wanted_names is None:
8296       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8297
8298     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8299                              in self.wanted_names]
8300
8301   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8302     """Returns the status of a block device
8303
8304     """
8305     if self.op.static or not node:
8306       return None
8307
8308     self.cfg.SetDiskID(dev, node)
8309
8310     result = self.rpc.call_blockdev_find(node, dev)
8311     if result.offline:
8312       return None
8313
8314     result.Raise("Can't compute disk status for %s" % instance_name)
8315
8316     status = result.payload
8317     if status is None:
8318       return None
8319
8320     return (status.dev_path, status.major, status.minor,
8321             status.sync_percent, status.estimated_time,
8322             status.is_degraded, status.ldisk_status)
8323
8324   def _ComputeDiskStatus(self, instance, snode, dev):
8325     """Compute block device status.
8326
8327     """
8328     if dev.dev_type in constants.LDS_DRBD:
8329       # we change the snode then (otherwise we use the one passed in)
8330       if dev.logical_id[0] == instance.primary_node:
8331         snode = dev.logical_id[1]
8332       else:
8333         snode = dev.logical_id[0]
8334
8335     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8336                                               instance.name, dev)
8337     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8338
8339     if dev.children:
8340       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8341                       for child in dev.children]
8342     else:
8343       dev_children = []
8344
8345     data = {
8346       "iv_name": dev.iv_name,
8347       "dev_type": dev.dev_type,
8348       "logical_id": dev.logical_id,
8349       "physical_id": dev.physical_id,
8350       "pstatus": dev_pstatus,
8351       "sstatus": dev_sstatus,
8352       "children": dev_children,
8353       "mode": dev.mode,
8354       "size": dev.size,
8355       }
8356
8357     return data
8358
8359   def Exec(self, feedback_fn):
8360     """Gather and return data"""
8361     result = {}
8362
8363     cluster = self.cfg.GetClusterInfo()
8364
8365     for instance in self.wanted_instances:
8366       if not self.op.static:
8367         remote_info = self.rpc.call_instance_info(instance.primary_node,
8368                                                   instance.name,
8369                                                   instance.hypervisor)
8370         remote_info.Raise("Error checking node %s" % instance.primary_node)
8371         remote_info = remote_info.payload
8372         if remote_info and "state" in remote_info:
8373           remote_state = "up"
8374         else:
8375           remote_state = "down"
8376       else:
8377         remote_state = None
8378       if instance.admin_up:
8379         config_state = "up"
8380       else:
8381         config_state = "down"
8382
8383       disks = [self._ComputeDiskStatus(instance, None, device)
8384                for device in instance.disks]
8385
8386       idict = {
8387         "name": instance.name,
8388         "config_state": config_state,
8389         "run_state": remote_state,
8390         "pnode": instance.primary_node,
8391         "snodes": instance.secondary_nodes,
8392         "os": instance.os,
8393         # this happens to be the same format used for hooks
8394         "nics": _NICListToTuple(self, instance.nics),
8395         "disk_template": instance.disk_template,
8396         "disks": disks,
8397         "hypervisor": instance.hypervisor,
8398         "network_port": instance.network_port,
8399         "hv_instance": instance.hvparams,
8400         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8401         "be_instance": instance.beparams,
8402         "be_actual": cluster.FillBE(instance),
8403         "os_instance": instance.osparams,
8404         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8405         "serial_no": instance.serial_no,
8406         "mtime": instance.mtime,
8407         "ctime": instance.ctime,
8408         "uuid": instance.uuid,
8409         }
8410
8411       result[instance.name] = idict
8412
8413     return result
8414
8415
8416 class LUSetInstanceParams(LogicalUnit):
8417   """Modifies an instances's parameters.
8418
8419   """
8420   HPATH = "instance-modify"
8421   HTYPE = constants.HTYPE_INSTANCE
8422   _OP_REQP = [("instance_name", _TNonEmptyString)]
8423   _OP_DEFS = [
8424     ("nics", _EmptyList),
8425     ("disks", _EmptyList),
8426     ("beparams", _EmptyDict),
8427     ("hvparams", _EmptyDict),
8428     ("disk_template", None),
8429     ("remote_node", None),
8430     ("os_name", None),
8431     ("force_variant", False),
8432     ("osparams", None),
8433     ("force", False),
8434     ]
8435   REQ_BGL = False
8436
8437   def CheckArguments(self):
8438     if not (self.op.nics or self.op.disks or self.op.disk_template or
8439             self.op.hvparams or self.op.beparams or self.op.os_name):
8440       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8441
8442     if self.op.hvparams:
8443       _CheckGlobalHvParams(self.op.hvparams)
8444
8445     # Disk validation
8446     disk_addremove = 0
8447     for disk_op, disk_dict in self.op.disks:
8448       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8449       if disk_op == constants.DDM_REMOVE:
8450         disk_addremove += 1
8451         continue
8452       elif disk_op == constants.DDM_ADD:
8453         disk_addremove += 1
8454       else:
8455         if not isinstance(disk_op, int):
8456           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8457         if not isinstance(disk_dict, dict):
8458           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8459           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8460
8461       if disk_op == constants.DDM_ADD:
8462         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8463         if mode not in constants.DISK_ACCESS_SET:
8464           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8465                                      errors.ECODE_INVAL)
8466         size = disk_dict.get('size', None)
8467         if size is None:
8468           raise errors.OpPrereqError("Required disk parameter size missing",
8469                                      errors.ECODE_INVAL)
8470         try:
8471           size = int(size)
8472         except (TypeError, ValueError), err:
8473           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8474                                      str(err), errors.ECODE_INVAL)
8475         disk_dict['size'] = size
8476       else:
8477         # modification of disk
8478         if 'size' in disk_dict:
8479           raise errors.OpPrereqError("Disk size change not possible, use"
8480                                      " grow-disk", errors.ECODE_INVAL)
8481
8482     if disk_addremove > 1:
8483       raise errors.OpPrereqError("Only one disk add or remove operation"
8484                                  " supported at a time", errors.ECODE_INVAL)
8485
8486     if self.op.disks and self.op.disk_template is not None:
8487       raise errors.OpPrereqError("Disk template conversion and other disk"
8488                                  " changes not supported at the same time",
8489                                  errors.ECODE_INVAL)
8490
8491     if self.op.disk_template:
8492       _CheckDiskTemplate(self.op.disk_template)
8493       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8494           self.op.remote_node is None):
8495         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8496                                    " one requires specifying a secondary node",
8497                                    errors.ECODE_INVAL)
8498
8499     # NIC validation
8500     nic_addremove = 0
8501     for nic_op, nic_dict in self.op.nics:
8502       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8503       if nic_op == constants.DDM_REMOVE:
8504         nic_addremove += 1
8505         continue
8506       elif nic_op == constants.DDM_ADD:
8507         nic_addremove += 1
8508       else:
8509         if not isinstance(nic_op, int):
8510           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8511         if not isinstance(nic_dict, dict):
8512           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8513           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8514
8515       # nic_dict should be a dict
8516       nic_ip = nic_dict.get('ip', None)
8517       if nic_ip is not None:
8518         if nic_ip.lower() == constants.VALUE_NONE:
8519           nic_dict['ip'] = None
8520         else:
8521           if not utils.IsValidIP4(nic_ip):
8522             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8523                                        errors.ECODE_INVAL)
8524
8525       nic_bridge = nic_dict.get('bridge', None)
8526       nic_link = nic_dict.get('link', None)
8527       if nic_bridge and nic_link:
8528         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8529                                    " at the same time", errors.ECODE_INVAL)
8530       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8531         nic_dict['bridge'] = None
8532       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8533         nic_dict['link'] = None
8534
8535       if nic_op == constants.DDM_ADD:
8536         nic_mac = nic_dict.get('mac', None)
8537         if nic_mac is None:
8538           nic_dict['mac'] = constants.VALUE_AUTO
8539
8540       if 'mac' in nic_dict:
8541         nic_mac = nic_dict['mac']
8542         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8543           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8544
8545         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8546           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8547                                      " modifying an existing nic",
8548                                      errors.ECODE_INVAL)
8549
8550     if nic_addremove > 1:
8551       raise errors.OpPrereqError("Only one NIC add or remove operation"
8552                                  " supported at a time", errors.ECODE_INVAL)
8553
8554   def ExpandNames(self):
8555     self._ExpandAndLockInstance()
8556     self.needed_locks[locking.LEVEL_NODE] = []
8557     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8558
8559   def DeclareLocks(self, level):
8560     if level == locking.LEVEL_NODE:
8561       self._LockInstancesNodes()
8562       if self.op.disk_template and self.op.remote_node:
8563         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8564         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8565
8566   def BuildHooksEnv(self):
8567     """Build hooks env.
8568
8569     This runs on the master, primary and secondaries.
8570
8571     """
8572     args = dict()
8573     if constants.BE_MEMORY in self.be_new:
8574       args['memory'] = self.be_new[constants.BE_MEMORY]
8575     if constants.BE_VCPUS in self.be_new:
8576       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8577     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8578     # information at all.
8579     if self.op.nics:
8580       args['nics'] = []
8581       nic_override = dict(self.op.nics)
8582       for idx, nic in enumerate(self.instance.nics):
8583         if idx in nic_override:
8584           this_nic_override = nic_override[idx]
8585         else:
8586           this_nic_override = {}
8587         if 'ip' in this_nic_override:
8588           ip = this_nic_override['ip']
8589         else:
8590           ip = nic.ip
8591         if 'mac' in this_nic_override:
8592           mac = this_nic_override['mac']
8593         else:
8594           mac = nic.mac
8595         if idx in self.nic_pnew:
8596           nicparams = self.nic_pnew[idx]
8597         else:
8598           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8599         mode = nicparams[constants.NIC_MODE]
8600         link = nicparams[constants.NIC_LINK]
8601         args['nics'].append((ip, mac, mode, link))
8602       if constants.DDM_ADD in nic_override:
8603         ip = nic_override[constants.DDM_ADD].get('ip', None)
8604         mac = nic_override[constants.DDM_ADD]['mac']
8605         nicparams = self.nic_pnew[constants.DDM_ADD]
8606         mode = nicparams[constants.NIC_MODE]
8607         link = nicparams[constants.NIC_LINK]
8608         args['nics'].append((ip, mac, mode, link))
8609       elif constants.DDM_REMOVE in nic_override:
8610         del args['nics'][-1]
8611
8612     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8613     if self.op.disk_template:
8614       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8615     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8616     return env, nl, nl
8617
8618   def CheckPrereq(self):
8619     """Check prerequisites.
8620
8621     This only checks the instance list against the existing names.
8622
8623     """
8624     # checking the new params on the primary/secondary nodes
8625
8626     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8627     cluster = self.cluster = self.cfg.GetClusterInfo()
8628     assert self.instance is not None, \
8629       "Cannot retrieve locked instance %s" % self.op.instance_name
8630     pnode = instance.primary_node
8631     nodelist = list(instance.all_nodes)
8632
8633     # OS change
8634     if self.op.os_name and not self.op.force:
8635       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8636                       self.op.force_variant)
8637       instance_os = self.op.os_name
8638     else:
8639       instance_os = instance.os
8640
8641     if self.op.disk_template:
8642       if instance.disk_template == self.op.disk_template:
8643         raise errors.OpPrereqError("Instance already has disk template %s" %
8644                                    instance.disk_template, errors.ECODE_INVAL)
8645
8646       if (instance.disk_template,
8647           self.op.disk_template) not in self._DISK_CONVERSIONS:
8648         raise errors.OpPrereqError("Unsupported disk template conversion from"
8649                                    " %s to %s" % (instance.disk_template,
8650                                                   self.op.disk_template),
8651                                    errors.ECODE_INVAL)
8652       _CheckInstanceDown(self, instance, "cannot change disk template")
8653       if self.op.disk_template in constants.DTS_NET_MIRROR:
8654         _CheckNodeOnline(self, self.op.remote_node)
8655         _CheckNodeNotDrained(self, self.op.remote_node)
8656         disks = [{"size": d.size} for d in instance.disks]
8657         required = _ComputeDiskSize(self.op.disk_template, disks)
8658         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8659
8660     # hvparams processing
8661     if self.op.hvparams:
8662       hv_type = instance.hypervisor
8663       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8664       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8665       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8666
8667       # local check
8668       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8669       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8670       self.hv_new = hv_new # the new actual values
8671       self.hv_inst = i_hvdict # the new dict (without defaults)
8672     else:
8673       self.hv_new = self.hv_inst = {}
8674
8675     # beparams processing
8676     if self.op.beparams:
8677       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8678                                    use_none=True)
8679       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8680       be_new = cluster.SimpleFillBE(i_bedict)
8681       self.be_new = be_new # the new actual values
8682       self.be_inst = i_bedict # the new dict (without defaults)
8683     else:
8684       self.be_new = self.be_inst = {}
8685
8686     # osparams processing
8687     if self.op.osparams:
8688       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8689       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8690       self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8691       self.os_inst = i_osdict # the new dict (without defaults)
8692     else:
8693       self.os_new = self.os_inst = {}
8694
8695     self.warn = []
8696
8697     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8698       mem_check_list = [pnode]
8699       if be_new[constants.BE_AUTO_BALANCE]:
8700         # either we changed auto_balance to yes or it was from before
8701         mem_check_list.extend(instance.secondary_nodes)
8702       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8703                                                   instance.hypervisor)
8704       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8705                                          instance.hypervisor)
8706       pninfo = nodeinfo[pnode]
8707       msg = pninfo.fail_msg
8708       if msg:
8709         # Assume the primary node is unreachable and go ahead
8710         self.warn.append("Can't get info from primary node %s: %s" %
8711                          (pnode,  msg))
8712       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8713         self.warn.append("Node data from primary node %s doesn't contain"
8714                          " free memory information" % pnode)
8715       elif instance_info.fail_msg:
8716         self.warn.append("Can't get instance runtime information: %s" %
8717                         instance_info.fail_msg)
8718       else:
8719         if instance_info.payload:
8720           current_mem = int(instance_info.payload['memory'])
8721         else:
8722           # Assume instance not running
8723           # (there is a slight race condition here, but it's not very probable,
8724           # and we have no other way to check)
8725           current_mem = 0
8726         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8727                     pninfo.payload['memory_free'])
8728         if miss_mem > 0:
8729           raise errors.OpPrereqError("This change will prevent the instance"
8730                                      " from starting, due to %d MB of memory"
8731                                      " missing on its primary node" % miss_mem,
8732                                      errors.ECODE_NORES)
8733
8734       if be_new[constants.BE_AUTO_BALANCE]:
8735         for node, nres in nodeinfo.items():
8736           if node not in instance.secondary_nodes:
8737             continue
8738           msg = nres.fail_msg
8739           if msg:
8740             self.warn.append("Can't get info from secondary node %s: %s" %
8741                              (node, msg))
8742           elif not isinstance(nres.payload.get('memory_free', None), int):
8743             self.warn.append("Secondary node %s didn't return free"
8744                              " memory information" % node)
8745           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8746             self.warn.append("Not enough memory to failover instance to"
8747                              " secondary node %s" % node)
8748
8749     # NIC processing
8750     self.nic_pnew = {}
8751     self.nic_pinst = {}
8752     for nic_op, nic_dict in self.op.nics:
8753       if nic_op == constants.DDM_REMOVE:
8754         if not instance.nics:
8755           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8756                                      errors.ECODE_INVAL)
8757         continue
8758       if nic_op != constants.DDM_ADD:
8759         # an existing nic
8760         if not instance.nics:
8761           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8762                                      " no NICs" % nic_op,
8763                                      errors.ECODE_INVAL)
8764         if nic_op < 0 or nic_op >= len(instance.nics):
8765           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8766                                      " are 0 to %d" %
8767                                      (nic_op, len(instance.nics) - 1),
8768                                      errors.ECODE_INVAL)
8769         old_nic_params = instance.nics[nic_op].nicparams
8770         old_nic_ip = instance.nics[nic_op].ip
8771       else:
8772         old_nic_params = {}
8773         old_nic_ip = None
8774
8775       update_params_dict = dict([(key, nic_dict[key])
8776                                  for key in constants.NICS_PARAMETERS
8777                                  if key in nic_dict])
8778
8779       if 'bridge' in nic_dict:
8780         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8781
8782       new_nic_params = _GetUpdatedParams(old_nic_params,
8783                                          update_params_dict)
8784       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8785       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8786       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8787       self.nic_pinst[nic_op] = new_nic_params
8788       self.nic_pnew[nic_op] = new_filled_nic_params
8789       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8790
8791       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8792         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8793         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8794         if msg:
8795           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8796           if self.op.force:
8797             self.warn.append(msg)
8798           else:
8799             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8800       if new_nic_mode == constants.NIC_MODE_ROUTED:
8801         if 'ip' in nic_dict:
8802           nic_ip = nic_dict['ip']
8803         else:
8804           nic_ip = old_nic_ip
8805         if nic_ip is None:
8806           raise errors.OpPrereqError('Cannot set the nic ip to None'
8807                                      ' on a routed nic', errors.ECODE_INVAL)
8808       if 'mac' in nic_dict:
8809         nic_mac = nic_dict['mac']
8810         if nic_mac is None:
8811           raise errors.OpPrereqError('Cannot set the nic mac to None',
8812                                      errors.ECODE_INVAL)
8813         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8814           # otherwise generate the mac
8815           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8816         else:
8817           # or validate/reserve the current one
8818           try:
8819             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8820           except errors.ReservationError:
8821             raise errors.OpPrereqError("MAC address %s already in use"
8822                                        " in cluster" % nic_mac,
8823                                        errors.ECODE_NOTUNIQUE)
8824
8825     # DISK processing
8826     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8827       raise errors.OpPrereqError("Disk operations not supported for"
8828                                  " diskless instances",
8829                                  errors.ECODE_INVAL)
8830     for disk_op, _ in self.op.disks:
8831       if disk_op == constants.DDM_REMOVE:
8832         if len(instance.disks) == 1:
8833           raise errors.OpPrereqError("Cannot remove the last disk of"
8834                                      " an instance", errors.ECODE_INVAL)
8835         _CheckInstanceDown(self, instance, "cannot remove disks")
8836
8837       if (disk_op == constants.DDM_ADD and
8838           len(instance.nics) >= constants.MAX_DISKS):
8839         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8840                                    " add more" % constants.MAX_DISKS,
8841                                    errors.ECODE_STATE)
8842       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8843         # an existing disk
8844         if disk_op < 0 or disk_op >= len(instance.disks):
8845           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8846                                      " are 0 to %d" %
8847                                      (disk_op, len(instance.disks)),
8848                                      errors.ECODE_INVAL)
8849
8850     return
8851
8852   def _ConvertPlainToDrbd(self, feedback_fn):
8853     """Converts an instance from plain to drbd.
8854
8855     """
8856     feedback_fn("Converting template to drbd")
8857     instance = self.instance
8858     pnode = instance.primary_node
8859     snode = self.op.remote_node
8860
8861     # create a fake disk info for _GenerateDiskTemplate
8862     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8863     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8864                                       instance.name, pnode, [snode],
8865                                       disk_info, None, None, 0)
8866     info = _GetInstanceInfoText(instance)
8867     feedback_fn("Creating aditional volumes...")
8868     # first, create the missing data and meta devices
8869     for disk in new_disks:
8870       # unfortunately this is... not too nice
8871       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8872                             info, True)
8873       for child in disk.children:
8874         _CreateSingleBlockDev(self, snode, instance, child, info, True)
8875     # at this stage, all new LVs have been created, we can rename the
8876     # old ones
8877     feedback_fn("Renaming original volumes...")
8878     rename_list = [(o, n.children[0].logical_id)
8879                    for (o, n) in zip(instance.disks, new_disks)]
8880     result = self.rpc.call_blockdev_rename(pnode, rename_list)
8881     result.Raise("Failed to rename original LVs")
8882
8883     feedback_fn("Initializing DRBD devices...")
8884     # all child devices are in place, we can now create the DRBD devices
8885     for disk in new_disks:
8886       for node in [pnode, snode]:
8887         f_create = node == pnode
8888         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8889
8890     # at this point, the instance has been modified
8891     instance.disk_template = constants.DT_DRBD8
8892     instance.disks = new_disks
8893     self.cfg.Update(instance, feedback_fn)
8894
8895     # disks are created, waiting for sync
8896     disk_abort = not _WaitForSync(self, instance)
8897     if disk_abort:
8898       raise errors.OpExecError("There are some degraded disks for"
8899                                " this instance, please cleanup manually")
8900
8901   def _ConvertDrbdToPlain(self, feedback_fn):
8902     """Converts an instance from drbd to plain.
8903
8904     """
8905     instance = self.instance
8906     assert len(instance.secondary_nodes) == 1
8907     pnode = instance.primary_node
8908     snode = instance.secondary_nodes[0]
8909     feedback_fn("Converting template to plain")
8910
8911     old_disks = instance.disks
8912     new_disks = [d.children[0] for d in old_disks]
8913
8914     # copy over size and mode
8915     for parent, child in zip(old_disks, new_disks):
8916       child.size = parent.size
8917       child.mode = parent.mode
8918
8919     # update instance structure
8920     instance.disks = new_disks
8921     instance.disk_template = constants.DT_PLAIN
8922     self.cfg.Update(instance, feedback_fn)
8923
8924     feedback_fn("Removing volumes on the secondary node...")
8925     for disk in old_disks:
8926       self.cfg.SetDiskID(disk, snode)
8927       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8928       if msg:
8929         self.LogWarning("Could not remove block device %s on node %s,"
8930                         " continuing anyway: %s", disk.iv_name, snode, msg)
8931
8932     feedback_fn("Removing unneeded volumes on the primary node...")
8933     for idx, disk in enumerate(old_disks):
8934       meta = disk.children[1]
8935       self.cfg.SetDiskID(meta, pnode)
8936       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8937       if msg:
8938         self.LogWarning("Could not remove metadata for disk %d on node %s,"
8939                         " continuing anyway: %s", idx, pnode, msg)
8940
8941
8942   def Exec(self, feedback_fn):
8943     """Modifies an instance.
8944
8945     All parameters take effect only at the next restart of the instance.
8946
8947     """
8948     # Process here the warnings from CheckPrereq, as we don't have a
8949     # feedback_fn there.
8950     for warn in self.warn:
8951       feedback_fn("WARNING: %s" % warn)
8952
8953     result = []
8954     instance = self.instance
8955     # disk changes
8956     for disk_op, disk_dict in self.op.disks:
8957       if disk_op == constants.DDM_REMOVE:
8958         # remove the last disk
8959         device = instance.disks.pop()
8960         device_idx = len(instance.disks)
8961         for node, disk in device.ComputeNodeTree(instance.primary_node):
8962           self.cfg.SetDiskID(disk, node)
8963           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8964           if msg:
8965             self.LogWarning("Could not remove disk/%d on node %s: %s,"
8966                             " continuing anyway", device_idx, node, msg)
8967         result.append(("disk/%d" % device_idx, "remove"))
8968       elif disk_op == constants.DDM_ADD:
8969         # add a new disk
8970         if instance.disk_template == constants.DT_FILE:
8971           file_driver, file_path = instance.disks[0].logical_id
8972           file_path = os.path.dirname(file_path)
8973         else:
8974           file_driver = file_path = None
8975         disk_idx_base = len(instance.disks)
8976         new_disk = _GenerateDiskTemplate(self,
8977                                          instance.disk_template,
8978                                          instance.name, instance.primary_node,
8979                                          instance.secondary_nodes,
8980                                          [disk_dict],
8981                                          file_path,
8982                                          file_driver,
8983                                          disk_idx_base)[0]
8984         instance.disks.append(new_disk)
8985         info = _GetInstanceInfoText(instance)
8986
8987         logging.info("Creating volume %s for instance %s",
8988                      new_disk.iv_name, instance.name)
8989         # Note: this needs to be kept in sync with _CreateDisks
8990         #HARDCODE
8991         for node in instance.all_nodes:
8992           f_create = node == instance.primary_node
8993           try:
8994             _CreateBlockDev(self, node, instance, new_disk,
8995                             f_create, info, f_create)
8996           except errors.OpExecError, err:
8997             self.LogWarning("Failed to create volume %s (%s) on"
8998                             " node %s: %s",
8999                             new_disk.iv_name, new_disk, node, err)
9000         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9001                        (new_disk.size, new_disk.mode)))
9002       else:
9003         # change a given disk
9004         instance.disks[disk_op].mode = disk_dict['mode']
9005         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9006
9007     if self.op.disk_template:
9008       r_shut = _ShutdownInstanceDisks(self, instance)
9009       if not r_shut:
9010         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9011                                  " proceed with disk template conversion")
9012       mode = (instance.disk_template, self.op.disk_template)
9013       try:
9014         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9015       except:
9016         self.cfg.ReleaseDRBDMinors(instance.name)
9017         raise
9018       result.append(("disk_template", self.op.disk_template))
9019
9020     # NIC changes
9021     for nic_op, nic_dict in self.op.nics:
9022       if nic_op == constants.DDM_REMOVE:
9023         # remove the last nic
9024         del instance.nics[-1]
9025         result.append(("nic.%d" % len(instance.nics), "remove"))
9026       elif nic_op == constants.DDM_ADD:
9027         # mac and bridge should be set, by now
9028         mac = nic_dict['mac']
9029         ip = nic_dict.get('ip', None)
9030         nicparams = self.nic_pinst[constants.DDM_ADD]
9031         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9032         instance.nics.append(new_nic)
9033         result.append(("nic.%d" % (len(instance.nics) - 1),
9034                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9035                        (new_nic.mac, new_nic.ip,
9036                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9037                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9038                        )))
9039       else:
9040         for key in 'mac', 'ip':
9041           if key in nic_dict:
9042             setattr(instance.nics[nic_op], key, nic_dict[key])
9043         if nic_op in self.nic_pinst:
9044           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9045         for key, val in nic_dict.iteritems():
9046           result.append(("nic.%s/%d" % (key, nic_op), val))
9047
9048     # hvparams changes
9049     if self.op.hvparams:
9050       instance.hvparams = self.hv_inst
9051       for key, val in self.op.hvparams.iteritems():
9052         result.append(("hv/%s" % key, val))
9053
9054     # beparams changes
9055     if self.op.beparams:
9056       instance.beparams = self.be_inst
9057       for key, val in self.op.beparams.iteritems():
9058         result.append(("be/%s" % key, val))
9059
9060     # OS change
9061     if self.op.os_name:
9062       instance.os = self.op.os_name
9063
9064     # osparams changes
9065     if self.op.osparams:
9066       instance.osparams = self.os_inst
9067       for key, val in self.op.osparams.iteritems():
9068         result.append(("os/%s" % key, val))
9069
9070     self.cfg.Update(instance, feedback_fn)
9071
9072     return result
9073
9074   _DISK_CONVERSIONS = {
9075     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9076     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9077     }
9078
9079
9080 class LUQueryExports(NoHooksLU):
9081   """Query the exports list
9082
9083   """
9084   _OP_REQP = [("nodes", _TListOf(_TNonEmptyString))]
9085   REQ_BGL = False
9086
9087   def ExpandNames(self):
9088     self.needed_locks = {}
9089     self.share_locks[locking.LEVEL_NODE] = 1
9090     if not self.op.nodes:
9091       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9092     else:
9093       self.needed_locks[locking.LEVEL_NODE] = \
9094         _GetWantedNodes(self, self.op.nodes)
9095
9096   def Exec(self, feedback_fn):
9097     """Compute the list of all the exported system images.
9098
9099     @rtype: dict
9100     @return: a dictionary with the structure node->(export-list)
9101         where export-list is a list of the instances exported on
9102         that node.
9103
9104     """
9105     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9106     rpcresult = self.rpc.call_export_list(self.nodes)
9107     result = {}
9108     for node in rpcresult:
9109       if rpcresult[node].fail_msg:
9110         result[node] = False
9111       else:
9112         result[node] = rpcresult[node].payload
9113
9114     return result
9115
9116
9117 class LUPrepareExport(NoHooksLU):
9118   """Prepares an instance for an export and returns useful information.
9119
9120   """
9121   _OP_REQP = [
9122     ("instance_name", _TNonEmptyString),
9123     ("mode", _TElemOf(constants.EXPORT_MODES)),
9124     ]
9125   REQ_BGL = False
9126
9127   def ExpandNames(self):
9128     self._ExpandAndLockInstance()
9129
9130   def CheckPrereq(self):
9131     """Check prerequisites.
9132
9133     """
9134     instance_name = self.op.instance_name
9135
9136     self.instance = self.cfg.GetInstanceInfo(instance_name)
9137     assert self.instance is not None, \
9138           "Cannot retrieve locked instance %s" % self.op.instance_name
9139     _CheckNodeOnline(self, self.instance.primary_node)
9140
9141     self._cds = _GetClusterDomainSecret()
9142
9143   def Exec(self, feedback_fn):
9144     """Prepares an instance for an export.
9145
9146     """
9147     instance = self.instance
9148
9149     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9150       salt = utils.GenerateSecret(8)
9151
9152       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9153       result = self.rpc.call_x509_cert_create(instance.primary_node,
9154                                               constants.RIE_CERT_VALIDITY)
9155       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9156
9157       (name, cert_pem) = result.payload
9158
9159       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9160                                              cert_pem)
9161
9162       return {
9163         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9164         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9165                           salt),
9166         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9167         }
9168
9169     return None
9170
9171
9172 class LUExportInstance(LogicalUnit):
9173   """Export an instance to an image in the cluster.
9174
9175   """
9176   HPATH = "instance-export"
9177   HTYPE = constants.HTYPE_INSTANCE
9178   _OP_REQP = [
9179     ("instance_name", _TNonEmptyString),
9180     ("target_node", _TNonEmptyString),
9181     ("shutdown", _TBool),
9182     ("mode", _TElemOf(constants.EXPORT_MODES)),
9183     ]
9184   _OP_DEFS = [
9185     ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT),
9186     ("remove_instance", False),
9187     ("ignore_remove_failures", False),
9188     ("mode", constants.EXPORT_MODE_LOCAL),
9189     ("x509_key_name", None),
9190     ("destination_x509_ca", None),
9191     ]
9192   REQ_BGL = False
9193
9194   def CheckArguments(self):
9195     """Check the arguments.
9196
9197     """
9198     self.x509_key_name = self.op.x509_key_name
9199     self.dest_x509_ca_pem = self.op.destination_x509_ca
9200
9201     if self.op.remove_instance and not self.op.shutdown:
9202       raise errors.OpPrereqError("Can not remove instance without shutting it"
9203                                  " down before")
9204
9205     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9206       if not self.x509_key_name:
9207         raise errors.OpPrereqError("Missing X509 key name for encryption",
9208                                    errors.ECODE_INVAL)
9209
9210       if not self.dest_x509_ca_pem:
9211         raise errors.OpPrereqError("Missing destination X509 CA",
9212                                    errors.ECODE_INVAL)
9213
9214   def ExpandNames(self):
9215     self._ExpandAndLockInstance()
9216
9217     # Lock all nodes for local exports
9218     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9219       # FIXME: lock only instance primary and destination node
9220       #
9221       # Sad but true, for now we have do lock all nodes, as we don't know where
9222       # the previous export might be, and in this LU we search for it and
9223       # remove it from its current node. In the future we could fix this by:
9224       #  - making a tasklet to search (share-lock all), then create the
9225       #    new one, then one to remove, after
9226       #  - removing the removal operation altogether
9227       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9228
9229   def DeclareLocks(self, level):
9230     """Last minute lock declaration."""
9231     # All nodes are locked anyway, so nothing to do here.
9232
9233   def BuildHooksEnv(self):
9234     """Build hooks env.
9235
9236     This will run on the master, primary node and target node.
9237
9238     """
9239     env = {
9240       "EXPORT_MODE": self.op.mode,
9241       "EXPORT_NODE": self.op.target_node,
9242       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9243       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9244       # TODO: Generic function for boolean env variables
9245       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9246       }
9247
9248     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9249
9250     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9251
9252     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9253       nl.append(self.op.target_node)
9254
9255     return env, nl, nl
9256
9257   def CheckPrereq(self):
9258     """Check prerequisites.
9259
9260     This checks that the instance and node names are valid.
9261
9262     """
9263     instance_name = self.op.instance_name
9264
9265     self.instance = self.cfg.GetInstanceInfo(instance_name)
9266     assert self.instance is not None, \
9267           "Cannot retrieve locked instance %s" % self.op.instance_name
9268     _CheckNodeOnline(self, self.instance.primary_node)
9269
9270     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9271       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9272       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9273       assert self.dst_node is not None
9274
9275       _CheckNodeOnline(self, self.dst_node.name)
9276       _CheckNodeNotDrained(self, self.dst_node.name)
9277
9278       self._cds = None
9279       self.dest_disk_info = None
9280       self.dest_x509_ca = None
9281
9282     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9283       self.dst_node = None
9284
9285       if len(self.op.target_node) != len(self.instance.disks):
9286         raise errors.OpPrereqError(("Received destination information for %s"
9287                                     " disks, but instance %s has %s disks") %
9288                                    (len(self.op.target_node), instance_name,
9289                                     len(self.instance.disks)),
9290                                    errors.ECODE_INVAL)
9291
9292       cds = _GetClusterDomainSecret()
9293
9294       # Check X509 key name
9295       try:
9296         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9297       except (TypeError, ValueError), err:
9298         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9299
9300       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9301         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9302                                    errors.ECODE_INVAL)
9303
9304       # Load and verify CA
9305       try:
9306         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9307       except OpenSSL.crypto.Error, err:
9308         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9309                                    (err, ), errors.ECODE_INVAL)
9310
9311       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9312       if errcode is not None:
9313         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9314                                    (msg, ), errors.ECODE_INVAL)
9315
9316       self.dest_x509_ca = cert
9317
9318       # Verify target information
9319       disk_info = []
9320       for idx, disk_data in enumerate(self.op.target_node):
9321         try:
9322           (host, port, magic) = \
9323             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9324         except errors.GenericError, err:
9325           raise errors.OpPrereqError("Target info for disk %s: %s" %
9326                                      (idx, err), errors.ECODE_INVAL)
9327
9328         disk_info.append((host, port, magic))
9329
9330       assert len(disk_info) == len(self.op.target_node)
9331       self.dest_disk_info = disk_info
9332
9333     else:
9334       raise errors.ProgrammerError("Unhandled export mode %r" %
9335                                    self.op.mode)
9336
9337     # instance disk type verification
9338     # TODO: Implement export support for file-based disks
9339     for disk in self.instance.disks:
9340       if disk.dev_type == constants.LD_FILE:
9341         raise errors.OpPrereqError("Export not supported for instances with"
9342                                    " file-based disks", errors.ECODE_INVAL)
9343
9344   def _CleanupExports(self, feedback_fn):
9345     """Removes exports of current instance from all other nodes.
9346
9347     If an instance in a cluster with nodes A..D was exported to node C, its
9348     exports will be removed from the nodes A, B and D.
9349
9350     """
9351     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9352
9353     nodelist = self.cfg.GetNodeList()
9354     nodelist.remove(self.dst_node.name)
9355
9356     # on one-node clusters nodelist will be empty after the removal
9357     # if we proceed the backup would be removed because OpQueryExports
9358     # substitutes an empty list with the full cluster node list.
9359     iname = self.instance.name
9360     if nodelist:
9361       feedback_fn("Removing old exports for instance %s" % iname)
9362       exportlist = self.rpc.call_export_list(nodelist)
9363       for node in exportlist:
9364         if exportlist[node].fail_msg:
9365           continue
9366         if iname in exportlist[node].payload:
9367           msg = self.rpc.call_export_remove(node, iname).fail_msg
9368           if msg:
9369             self.LogWarning("Could not remove older export for instance %s"
9370                             " on node %s: %s", iname, node, msg)
9371
9372   def Exec(self, feedback_fn):
9373     """Export an instance to an image in the cluster.
9374
9375     """
9376     assert self.op.mode in constants.EXPORT_MODES
9377
9378     instance = self.instance
9379     src_node = instance.primary_node
9380
9381     if self.op.shutdown:
9382       # shutdown the instance, but not the disks
9383       feedback_fn("Shutting down instance %s" % instance.name)
9384       result = self.rpc.call_instance_shutdown(src_node, instance,
9385                                                self.op.shutdown_timeout)
9386       # TODO: Maybe ignore failures if ignore_remove_failures is set
9387       result.Raise("Could not shutdown instance %s on"
9388                    " node %s" % (instance.name, src_node))
9389
9390     # set the disks ID correctly since call_instance_start needs the
9391     # correct drbd minor to create the symlinks
9392     for disk in instance.disks:
9393       self.cfg.SetDiskID(disk, src_node)
9394
9395     activate_disks = (not instance.admin_up)
9396
9397     if activate_disks:
9398       # Activate the instance disks if we'exporting a stopped instance
9399       feedback_fn("Activating disks for %s" % instance.name)
9400       _StartInstanceDisks(self, instance, None)
9401
9402     try:
9403       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9404                                                      instance)
9405
9406       helper.CreateSnapshots()
9407       try:
9408         if (self.op.shutdown and instance.admin_up and
9409             not self.op.remove_instance):
9410           assert not activate_disks
9411           feedback_fn("Starting instance %s" % instance.name)
9412           result = self.rpc.call_instance_start(src_node, instance, None, None)
9413           msg = result.fail_msg
9414           if msg:
9415             feedback_fn("Failed to start instance: %s" % msg)
9416             _ShutdownInstanceDisks(self, instance)
9417             raise errors.OpExecError("Could not start instance: %s" % msg)
9418
9419         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9420           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9421         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9422           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9423           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9424
9425           (key_name, _, _) = self.x509_key_name
9426
9427           dest_ca_pem = \
9428             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9429                                             self.dest_x509_ca)
9430
9431           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9432                                                      key_name, dest_ca_pem,
9433                                                      timeouts)
9434       finally:
9435         helper.Cleanup()
9436
9437       # Check for backwards compatibility
9438       assert len(dresults) == len(instance.disks)
9439       assert compat.all(isinstance(i, bool) for i in dresults), \
9440              "Not all results are boolean: %r" % dresults
9441
9442     finally:
9443       if activate_disks:
9444         feedback_fn("Deactivating disks for %s" % instance.name)
9445         _ShutdownInstanceDisks(self, instance)
9446
9447     # Remove instance if requested
9448     if self.op.remove_instance:
9449       if not (compat.all(dresults) and fin_resu):
9450         feedback_fn("Not removing instance %s as parts of the export failed" %
9451                     instance.name)
9452       else:
9453         feedback_fn("Removing instance %s" % instance.name)
9454         _RemoveInstance(self, feedback_fn, instance,
9455                         self.op.ignore_remove_failures)
9456
9457     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9458       self._CleanupExports(feedback_fn)
9459
9460     return fin_resu, dresults
9461
9462
9463 class LURemoveExport(NoHooksLU):
9464   """Remove exports related to the named instance.
9465
9466   """
9467   _OP_REQP = [("instance_name", _TNonEmptyString)]
9468   REQ_BGL = False
9469
9470   def ExpandNames(self):
9471     self.needed_locks = {}
9472     # We need all nodes to be locked in order for RemoveExport to work, but we
9473     # don't need to lock the instance itself, as nothing will happen to it (and
9474     # we can remove exports also for a removed instance)
9475     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9476
9477   def Exec(self, feedback_fn):
9478     """Remove any export.
9479
9480     """
9481     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9482     # If the instance was not found we'll try with the name that was passed in.
9483     # This will only work if it was an FQDN, though.
9484     fqdn_warn = False
9485     if not instance_name:
9486       fqdn_warn = True
9487       instance_name = self.op.instance_name
9488
9489     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9490     exportlist = self.rpc.call_export_list(locked_nodes)
9491     found = False
9492     for node in exportlist:
9493       msg = exportlist[node].fail_msg
9494       if msg:
9495         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9496         continue
9497       if instance_name in exportlist[node].payload:
9498         found = True
9499         result = self.rpc.call_export_remove(node, instance_name)
9500         msg = result.fail_msg
9501         if msg:
9502           logging.error("Could not remove export for instance %s"
9503                         " on node %s: %s", instance_name, node, msg)
9504
9505     if fqdn_warn and not found:
9506       feedback_fn("Export not found. If trying to remove an export belonging"
9507                   " to a deleted instance please use its Fully Qualified"
9508                   " Domain Name.")
9509
9510
9511 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9512   """Generic tags LU.
9513
9514   This is an abstract class which is the parent of all the other tags LUs.
9515
9516   """
9517
9518   def ExpandNames(self):
9519     self.needed_locks = {}
9520     if self.op.kind == constants.TAG_NODE:
9521       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9522       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9523     elif self.op.kind == constants.TAG_INSTANCE:
9524       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9525       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9526
9527   def CheckPrereq(self):
9528     """Check prerequisites.
9529
9530     """
9531     if self.op.kind == constants.TAG_CLUSTER:
9532       self.target = self.cfg.GetClusterInfo()
9533     elif self.op.kind == constants.TAG_NODE:
9534       self.target = self.cfg.GetNodeInfo(self.op.name)
9535     elif self.op.kind == constants.TAG_INSTANCE:
9536       self.target = self.cfg.GetInstanceInfo(self.op.name)
9537     else:
9538       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9539                                  str(self.op.kind), errors.ECODE_INVAL)
9540
9541
9542 class LUGetTags(TagsLU):
9543   """Returns the tags of a given object.
9544
9545   """
9546   _OP_REQP = [
9547     ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9548     ("name", _TNonEmptyString),
9549     ]
9550   REQ_BGL = False
9551
9552   def Exec(self, feedback_fn):
9553     """Returns the tag list.
9554
9555     """
9556     return list(self.target.GetTags())
9557
9558
9559 class LUSearchTags(NoHooksLU):
9560   """Searches the tags for a given pattern.
9561
9562   """
9563   _OP_REQP = [("pattern", _TNonEmptyString)]
9564   REQ_BGL = False
9565
9566   def ExpandNames(self):
9567     self.needed_locks = {}
9568
9569   def CheckPrereq(self):
9570     """Check prerequisites.
9571
9572     This checks the pattern passed for validity by compiling it.
9573
9574     """
9575     try:
9576       self.re = re.compile(self.op.pattern)
9577     except re.error, err:
9578       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9579                                  (self.op.pattern, err), errors.ECODE_INVAL)
9580
9581   def Exec(self, feedback_fn):
9582     """Returns the tag list.
9583
9584     """
9585     cfg = self.cfg
9586     tgts = [("/cluster", cfg.GetClusterInfo())]
9587     ilist = cfg.GetAllInstancesInfo().values()
9588     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9589     nlist = cfg.GetAllNodesInfo().values()
9590     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9591     results = []
9592     for path, target in tgts:
9593       for tag in target.GetTags():
9594         if self.re.search(tag):
9595           results.append((path, tag))
9596     return results
9597
9598
9599 class LUAddTags(TagsLU):
9600   """Sets a tag on a given object.
9601
9602   """
9603   _OP_REQP = [
9604     ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9605     ("name", _TNonEmptyString),
9606     ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9607     ]
9608   REQ_BGL = False
9609
9610   def CheckPrereq(self):
9611     """Check prerequisites.
9612
9613     This checks the type and length of the tag name and value.
9614
9615     """
9616     TagsLU.CheckPrereq(self)
9617     for tag in self.op.tags:
9618       objects.TaggableObject.ValidateTag(tag)
9619
9620   def Exec(self, feedback_fn):
9621     """Sets the tag.
9622
9623     """
9624     try:
9625       for tag in self.op.tags:
9626         self.target.AddTag(tag)
9627     except errors.TagError, err:
9628       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9629     self.cfg.Update(self.target, feedback_fn)
9630
9631
9632 class LUDelTags(TagsLU):
9633   """Delete a list of tags from a given object.
9634
9635   """
9636   _OP_REQP = [
9637     ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9638     ("name", _TNonEmptyString),
9639     ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9640     ]
9641   REQ_BGL = False
9642
9643   def CheckPrereq(self):
9644     """Check prerequisites.
9645
9646     This checks that we have the given tag.
9647
9648     """
9649     TagsLU.CheckPrereq(self)
9650     for tag in self.op.tags:
9651       objects.TaggableObject.ValidateTag(tag)
9652     del_tags = frozenset(self.op.tags)
9653     cur_tags = self.target.GetTags()
9654     if not del_tags <= cur_tags:
9655       diff_tags = del_tags - cur_tags
9656       diff_names = ["'%s'" % tag for tag in diff_tags]
9657       diff_names.sort()
9658       raise errors.OpPrereqError("Tag(s) %s not found" %
9659                                  (",".join(diff_names)), errors.ECODE_NOENT)
9660
9661   def Exec(self, feedback_fn):
9662     """Remove the tag from the object.
9663
9664     """
9665     for tag in self.op.tags:
9666       self.target.RemoveTag(tag)
9667     self.cfg.Update(self.target, feedback_fn)
9668
9669
9670 class LUTestDelay(NoHooksLU):
9671   """Sleep for a specified amount of time.
9672
9673   This LU sleeps on the master and/or nodes for a specified amount of
9674   time.
9675
9676   """
9677   _OP_REQP = [
9678     ("duration", _TFloat),
9679     ("on_master", _TBool),
9680     ("on_nodes", _TListOf(_TNonEmptyString)),
9681     ("repeat", _TPositiveInt)
9682     ]
9683   _OP_DEFS = [
9684     ("repeat", 0),
9685     ]
9686   REQ_BGL = False
9687
9688   def ExpandNames(self):
9689     """Expand names and set required locks.
9690
9691     This expands the node list, if any.
9692
9693     """
9694     self.needed_locks = {}
9695     if self.op.on_nodes:
9696       # _GetWantedNodes can be used here, but is not always appropriate to use
9697       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9698       # more information.
9699       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9700       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9701
9702   def _TestDelay(self):
9703     """Do the actual sleep.
9704
9705     """
9706     if self.op.on_master:
9707       if not utils.TestDelay(self.op.duration):
9708         raise errors.OpExecError("Error during master delay test")
9709     if self.op.on_nodes:
9710       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9711       for node, node_result in result.items():
9712         node_result.Raise("Failure during rpc call to node %s" % node)
9713
9714   def Exec(self, feedback_fn):
9715     """Execute the test delay opcode, with the wanted repetitions.
9716
9717     """
9718     if self.op.repeat == 0:
9719       self._TestDelay()
9720     else:
9721       top_value = self.op.repeat - 1
9722       for i in range(self.op.repeat):
9723         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9724         self._TestDelay()
9725
9726
9727 class IAllocator(object):
9728   """IAllocator framework.
9729
9730   An IAllocator instance has three sets of attributes:
9731     - cfg that is needed to query the cluster
9732     - input data (all members of the _KEYS class attribute are required)
9733     - four buffer attributes (in|out_data|text), that represent the
9734       input (to the external script) in text and data structure format,
9735       and the output from it, again in two formats
9736     - the result variables from the script (success, info, nodes) for
9737       easy usage
9738
9739   """
9740   # pylint: disable-msg=R0902
9741   # lots of instance attributes
9742   _ALLO_KEYS = [
9743     "name", "mem_size", "disks", "disk_template",
9744     "os", "tags", "nics", "vcpus", "hypervisor",
9745     ]
9746   _RELO_KEYS = [
9747     "name", "relocate_from",
9748     ]
9749   _EVAC_KEYS = [
9750     "evac_nodes",
9751     ]
9752
9753   def __init__(self, cfg, rpc, mode, **kwargs):
9754     self.cfg = cfg
9755     self.rpc = rpc
9756     # init buffer variables
9757     self.in_text = self.out_text = self.in_data = self.out_data = None
9758     # init all input fields so that pylint is happy
9759     self.mode = mode
9760     self.mem_size = self.disks = self.disk_template = None
9761     self.os = self.tags = self.nics = self.vcpus = None
9762     self.hypervisor = None
9763     self.relocate_from = None
9764     self.name = None
9765     self.evac_nodes = None
9766     # computed fields
9767     self.required_nodes = None
9768     # init result fields
9769     self.success = self.info = self.result = None
9770     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9771       keyset = self._ALLO_KEYS
9772       fn = self._AddNewInstance
9773     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9774       keyset = self._RELO_KEYS
9775       fn = self._AddRelocateInstance
9776     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9777       keyset = self._EVAC_KEYS
9778       fn = self._AddEvacuateNodes
9779     else:
9780       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9781                                    " IAllocator" % self.mode)
9782     for key in kwargs:
9783       if key not in keyset:
9784         raise errors.ProgrammerError("Invalid input parameter '%s' to"
9785                                      " IAllocator" % key)
9786       setattr(self, key, kwargs[key])
9787
9788     for key in keyset:
9789       if key not in kwargs:
9790         raise errors.ProgrammerError("Missing input parameter '%s' to"
9791                                      " IAllocator" % key)
9792     self._BuildInputData(fn)
9793
9794   def _ComputeClusterData(self):
9795     """Compute the generic allocator input data.
9796
9797     This is the data that is independent of the actual operation.
9798
9799     """
9800     cfg = self.cfg
9801     cluster_info = cfg.GetClusterInfo()
9802     # cluster data
9803     data = {
9804       "version": constants.IALLOCATOR_VERSION,
9805       "cluster_name": cfg.GetClusterName(),
9806       "cluster_tags": list(cluster_info.GetTags()),
9807       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9808       # we don't have job IDs
9809       }
9810     iinfo = cfg.GetAllInstancesInfo().values()
9811     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9812
9813     # node data
9814     node_results = {}
9815     node_list = cfg.GetNodeList()
9816
9817     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9818       hypervisor_name = self.hypervisor
9819     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9820       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9821     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9822       hypervisor_name = cluster_info.enabled_hypervisors[0]
9823
9824     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9825                                         hypervisor_name)
9826     node_iinfo = \
9827       self.rpc.call_all_instances_info(node_list,
9828                                        cluster_info.enabled_hypervisors)
9829     for nname, nresult in node_data.items():
9830       # first fill in static (config-based) values
9831       ninfo = cfg.GetNodeInfo(nname)
9832       pnr = {
9833         "tags": list(ninfo.GetTags()),
9834         "primary_ip": ninfo.primary_ip,
9835         "secondary_ip": ninfo.secondary_ip,
9836         "offline": ninfo.offline,
9837         "drained": ninfo.drained,
9838         "master_candidate": ninfo.master_candidate,
9839         }
9840
9841       if not (ninfo.offline or ninfo.drained):
9842         nresult.Raise("Can't get data for node %s" % nname)
9843         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9844                                 nname)
9845         remote_info = nresult.payload
9846
9847         for attr in ['memory_total', 'memory_free', 'memory_dom0',
9848                      'vg_size', 'vg_free', 'cpu_total']:
9849           if attr not in remote_info:
9850             raise errors.OpExecError("Node '%s' didn't return attribute"
9851                                      " '%s'" % (nname, attr))
9852           if not isinstance(remote_info[attr], int):
9853             raise errors.OpExecError("Node '%s' returned invalid value"
9854                                      " for '%s': %s" %
9855                                      (nname, attr, remote_info[attr]))
9856         # compute memory used by primary instances
9857         i_p_mem = i_p_up_mem = 0
9858         for iinfo, beinfo in i_list:
9859           if iinfo.primary_node == nname:
9860             i_p_mem += beinfo[constants.BE_MEMORY]
9861             if iinfo.name not in node_iinfo[nname].payload:
9862               i_used_mem = 0
9863             else:
9864               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9865             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9866             remote_info['memory_free'] -= max(0, i_mem_diff)
9867
9868             if iinfo.admin_up:
9869               i_p_up_mem += beinfo[constants.BE_MEMORY]
9870
9871         # compute memory used by instances
9872         pnr_dyn = {
9873           "total_memory": remote_info['memory_total'],
9874           "reserved_memory": remote_info['memory_dom0'],
9875           "free_memory": remote_info['memory_free'],
9876           "total_disk": remote_info['vg_size'],
9877           "free_disk": remote_info['vg_free'],
9878           "total_cpus": remote_info['cpu_total'],
9879           "i_pri_memory": i_p_mem,
9880           "i_pri_up_memory": i_p_up_mem,
9881           }
9882         pnr.update(pnr_dyn)
9883
9884       node_results[nname] = pnr
9885     data["nodes"] = node_results
9886
9887     # instance data
9888     instance_data = {}
9889     for iinfo, beinfo in i_list:
9890       nic_data = []
9891       for nic in iinfo.nics:
9892         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9893         nic_dict = {"mac": nic.mac,
9894                     "ip": nic.ip,
9895                     "mode": filled_params[constants.NIC_MODE],
9896                     "link": filled_params[constants.NIC_LINK],
9897                    }
9898         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9899           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9900         nic_data.append(nic_dict)
9901       pir = {
9902         "tags": list(iinfo.GetTags()),
9903         "admin_up": iinfo.admin_up,
9904         "vcpus": beinfo[constants.BE_VCPUS],
9905         "memory": beinfo[constants.BE_MEMORY],
9906         "os": iinfo.os,
9907         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9908         "nics": nic_data,
9909         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9910         "disk_template": iinfo.disk_template,
9911         "hypervisor": iinfo.hypervisor,
9912         }
9913       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9914                                                  pir["disks"])
9915       instance_data[iinfo.name] = pir
9916
9917     data["instances"] = instance_data
9918
9919     self.in_data = data
9920
9921   def _AddNewInstance(self):
9922     """Add new instance data to allocator structure.
9923
9924     This in combination with _AllocatorGetClusterData will create the
9925     correct structure needed as input for the allocator.
9926
9927     The checks for the completeness of the opcode must have already been
9928     done.
9929
9930     """
9931     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9932
9933     if self.disk_template in constants.DTS_NET_MIRROR:
9934       self.required_nodes = 2
9935     else:
9936       self.required_nodes = 1
9937     request = {
9938       "name": self.name,
9939       "disk_template": self.disk_template,
9940       "tags": self.tags,
9941       "os": self.os,
9942       "vcpus": self.vcpus,
9943       "memory": self.mem_size,
9944       "disks": self.disks,
9945       "disk_space_total": disk_space,
9946       "nics": self.nics,
9947       "required_nodes": self.required_nodes,
9948       }
9949     return request
9950
9951   def _AddRelocateInstance(self):
9952     """Add relocate instance data to allocator structure.
9953
9954     This in combination with _IAllocatorGetClusterData will create the
9955     correct structure needed as input for the allocator.
9956
9957     The checks for the completeness of the opcode must have already been
9958     done.
9959
9960     """
9961     instance = self.cfg.GetInstanceInfo(self.name)
9962     if instance is None:
9963       raise errors.ProgrammerError("Unknown instance '%s' passed to"
9964                                    " IAllocator" % self.name)
9965
9966     if instance.disk_template not in constants.DTS_NET_MIRROR:
9967       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9968                                  errors.ECODE_INVAL)
9969
9970     if len(instance.secondary_nodes) != 1:
9971       raise errors.OpPrereqError("Instance has not exactly one secondary node",
9972                                  errors.ECODE_STATE)
9973
9974     self.required_nodes = 1
9975     disk_sizes = [{'size': disk.size} for disk in instance.disks]
9976     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9977
9978     request = {
9979       "name": self.name,
9980       "disk_space_total": disk_space,
9981       "required_nodes": self.required_nodes,
9982       "relocate_from": self.relocate_from,
9983       }
9984     return request
9985
9986   def _AddEvacuateNodes(self):
9987     """Add evacuate nodes data to allocator structure.
9988
9989     """
9990     request = {
9991       "evac_nodes": self.evac_nodes
9992       }
9993     return request
9994
9995   def _BuildInputData(self, fn):
9996     """Build input data structures.
9997
9998     """
9999     self._ComputeClusterData()
10000
10001     request = fn()
10002     request["type"] = self.mode
10003     self.in_data["request"] = request
10004
10005     self.in_text = serializer.Dump(self.in_data)
10006
10007   def Run(self, name, validate=True, call_fn=None):
10008     """Run an instance allocator and return the results.
10009
10010     """
10011     if call_fn is None:
10012       call_fn = self.rpc.call_iallocator_runner
10013
10014     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10015     result.Raise("Failure while running the iallocator script")
10016
10017     self.out_text = result.payload
10018     if validate:
10019       self._ValidateResult()
10020
10021   def _ValidateResult(self):
10022     """Process the allocator results.
10023
10024     This will process and if successful save the result in
10025     self.out_data and the other parameters.
10026
10027     """
10028     try:
10029       rdict = serializer.Load(self.out_text)
10030     except Exception, err:
10031       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10032
10033     if not isinstance(rdict, dict):
10034       raise errors.OpExecError("Can't parse iallocator results: not a dict")
10035
10036     # TODO: remove backwards compatiblity in later versions
10037     if "nodes" in rdict and "result" not in rdict:
10038       rdict["result"] = rdict["nodes"]
10039       del rdict["nodes"]
10040
10041     for key in "success", "info", "result":
10042       if key not in rdict:
10043         raise errors.OpExecError("Can't parse iallocator results:"
10044                                  " missing key '%s'" % key)
10045       setattr(self, key, rdict[key])
10046
10047     if not isinstance(rdict["result"], list):
10048       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10049                                " is not a list")
10050     self.out_data = rdict
10051
10052
10053 class LUTestAllocator(NoHooksLU):
10054   """Run allocator tests.
10055
10056   This LU runs the allocator tests
10057
10058   """
10059   _OP_REQP = [
10060     ("direction", _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10061     ("mode", _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10062     ("name", _TNonEmptyString),
10063     ("nics", _TOr(_TNone, _TListOf(
10064       _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10065                _TOr(_TNone, _TNonEmptyString))))),
10066     ("disks", _TOr(_TNone, _TList)),
10067     ]
10068   _OP_DEFS = [
10069     ("hypervisor", None),
10070     ("allocator", None),
10071     ("nics", None),
10072     ("disks", None),
10073     ]
10074
10075   def CheckPrereq(self):
10076     """Check prerequisites.
10077
10078     This checks the opcode parameters depending on the director and mode test.
10079
10080     """
10081     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10082       for attr in ["mem_size", "disks", "disk_template",
10083                    "os", "tags", "nics", "vcpus"]:
10084         if not hasattr(self.op, attr):
10085           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10086                                      attr, errors.ECODE_INVAL)
10087       iname = self.cfg.ExpandInstanceName(self.op.name)
10088       if iname is not None:
10089         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10090                                    iname, errors.ECODE_EXISTS)
10091       if not isinstance(self.op.nics, list):
10092         raise errors.OpPrereqError("Invalid parameter 'nics'",
10093                                    errors.ECODE_INVAL)
10094       if not isinstance(self.op.disks, list):
10095         raise errors.OpPrereqError("Invalid parameter 'disks'",
10096                                    errors.ECODE_INVAL)
10097       for row in self.op.disks:
10098         if (not isinstance(row, dict) or
10099             "size" not in row or
10100             not isinstance(row["size"], int) or
10101             "mode" not in row or
10102             row["mode"] not in ['r', 'w']):
10103           raise errors.OpPrereqError("Invalid contents of the 'disks'"
10104                                      " parameter", errors.ECODE_INVAL)
10105       if self.op.hypervisor is None:
10106         self.op.hypervisor = self.cfg.GetHypervisorType()
10107     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10108       fname = _ExpandInstanceName(self.cfg, self.op.name)
10109       self.op.name = fname
10110       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10111     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10112       if not hasattr(self.op, "evac_nodes"):
10113         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10114                                    " opcode input", errors.ECODE_INVAL)
10115     else:
10116       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10117                                  self.op.mode, errors.ECODE_INVAL)
10118
10119     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10120       if self.op.allocator is None:
10121         raise errors.OpPrereqError("Missing allocator name",
10122                                    errors.ECODE_INVAL)
10123     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10124       raise errors.OpPrereqError("Wrong allocator test '%s'" %
10125                                  self.op.direction, errors.ECODE_INVAL)
10126
10127   def Exec(self, feedback_fn):
10128     """Run the allocator test.
10129
10130     """
10131     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10132       ial = IAllocator(self.cfg, self.rpc,
10133                        mode=self.op.mode,
10134                        name=self.op.name,
10135                        mem_size=self.op.mem_size,
10136                        disks=self.op.disks,
10137                        disk_template=self.op.disk_template,
10138                        os=self.op.os,
10139                        tags=self.op.tags,
10140                        nics=self.op.nics,
10141                        vcpus=self.op.vcpus,
10142                        hypervisor=self.op.hypervisor,
10143                        )
10144     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10145       ial = IAllocator(self.cfg, self.rpc,
10146                        mode=self.op.mode,
10147                        name=self.op.name,
10148                        relocate_from=list(self.relocate_from),
10149                        )
10150     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10151       ial = IAllocator(self.cfg, self.rpc,
10152                        mode=self.op.mode,
10153                        evac_nodes=self.op.evac_nodes)
10154     else:
10155       raise errors.ProgrammerError("Uncatched mode %s in"
10156                                    " LUTestAllocator.Exec", self.op.mode)
10157
10158     if self.op.direction == constants.IALLOCATOR_DIR_IN:
10159       result = ial.in_text
10160     else:
10161       ial.Run(self.op.allocator, validate=False)
10162       result = ial.out_text
10163     return result