code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39
  40 from ganeti import ssh
  41 from ganeti import utils
  42 from ganeti import errors
  43 from ganeti import hypervisor
  44 from ganeti import locking
  45 from ganeti import constants
  46 from ganeti import objects
  47 from ganeti import serializer
  48 from ganeti import ssconf
  49 from ganeti import uidpool
  50 from ganeti import compat
  51 from ganeti import masterd
  52
  53 import ganeti.masterd.instance # pylint: disable-msg=W0611
  54
  55
  56 # Modifiable default values; need to define these here before the
  57 # actual LUs
  58
  59 def _EmptyList():
  60   """Returns an empty list.
  61
  62   """
  63   return []
  64
  65
  66 def _EmptyDict():
  67   """Returns an empty dict.
  68
  69   """
  70   return {}
  71
  72
  73 # Some basic types
  74 def _TNotNone(val):
  75   """Checks if the given value is not None.
  76
  77   """
  78   return val is not None
  79
  80
  81 def _TNone(val):
  82   """Checks if the given value is None.
  83
  84   """
  85   return val is None
  86
  87
  88 def _TBool(val):
  89   """Checks if the given value is a boolean.
  90
  91   """
  92   return isinstance(val, bool)
  93
  94
  95 def _TInt(val):
  96   """Checks if the given value is an integer.
  97
  98   """
  99   return isinstance(val, int)
 100
 101
 102 def _TFloat(val):
 103   """Checks if the given value is a float.
 104
 105   """
 106   return isinstance(val, float)
 107
 108
 109 def _TString(val):
 110   """Checks if the given value is a string.
 111
 112   """
 113   return isinstance(val, basestring)
 114
 115
 116 def _TTrue(val):
 117   """Checks if a given value evaluates to a boolean True value.
 118
 119   """
 120   return bool(val)
 121
 122
 123 def _TElemOf(target_list):
 124   """Builds a function that checks if a given value is a member of a list.
 125
 126   """
 127   return lambda val: val in target_list
 128
 129
 130 # Container types
 131 def _TList(val):
 132   """Checks if the given value is a list.
 133
 134   """
 135   return isinstance(val, list)
 136
 137
 138 def _TDict(val):
 139   """Checks if the given value is a dictionary.
 140
 141   """
 142   return isinstance(val, dict)
 143
 144
 145 # Combinator types
 146 def _TAnd(*args):
 147   """Combine multiple functions using an AND operation.
 148
 149   """
 150   def fn(val):
 151     return compat.all(t(val) for t in args)
 152   return fn
 153
 154
 155 def _TOr(*args):
 156   """Combine multiple functions using an AND operation.
 157
 158   """
 159   def fn(val):
 160     return compat.any(t(val) for t in args)
 161   return fn
 162
 163
 164 # Type aliases
 165
 166 # non-empty string
 167 _TNonEmptyString = _TAnd(_TString, _TTrue)
 168
 169
 170 # positive integer
 171 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
 172
 173
 174 def _TListOf(my_type):
 175   """Checks if a given value is a list with all elements of the same type.
 176
 177   """
 178   return _TAnd(_TList,
 179                lambda lst: compat.all(my_type(v) for v in lst))
 180
 181
 182 def _TDictOf(key_type, val_type):
 183   """Checks a dict type for the type of its key/values.
 184
 185   """
 186   return _TAnd(_TDict,
 187                lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
 188                                 and compat.all(val_type(v)
 189                                                for v in my_dict.values())))
 190
 191
 192 # End types
 193 class LogicalUnit(object):
 194   """Logical Unit base class.
 195
 196   Subclasses must follow these rules:
 197     - implement ExpandNames
 198     - implement CheckPrereq (except when tasklets are used)
 199     - implement Exec (except when tasklets are used)
 200     - implement BuildHooksEnv
 201     - redefine HPATH and HTYPE
 202     - optionally redefine their run requirements:
 203         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 204
 205   Note that all commands require root permissions.
 206
 207   @ivar dry_run_result: the value (if any) that will be returned to the caller
 208       in dry-run mode (signalled by opcode dry_run parameter)
 209   @cvar _OP_DEFS: a list of opcode attributes and the defaults values
 210       they should get if not already existing
 211
 212   """
 213   HPATH = None
 214   HTYPE = None
 215   _OP_REQP = []
 216   _OP_DEFS = []
 217   REQ_BGL = True
 218
 219   def __init__(self, processor, op, context, rpc):
 220     """Constructor for LogicalUnit.
 221
 222     This needs to be overridden in derived classes in order to check op
 223     validity.
 224
 225     """
 226     self.proc = processor
 227     self.op = op
 228     self.cfg = context.cfg
 229     self.context = context
 230     self.rpc = rpc
 231     # Dicts used to declare locking needs to mcpu
 232     self.needed_locks = None
 233     self.acquired_locks = {}
 234     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 235     self.add_locks = {}
 236     self.remove_locks = {}
 237     # Used to force good behavior when calling helper functions
 238     self.recalculate_locks = {}
 239     self.__ssh = None
 240     # logging
 241     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 242     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 243     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 244     # support for dry-run
 245     self.dry_run_result = None
 246     # support for generic debug attribute
 247     if (not hasattr(self.op, "debug_level") or
 248         not isinstance(self.op.debug_level, int)):
 249       self.op.debug_level = 0
 250
 251     # Tasklets
 252     self.tasklets = None
 253
 254     for aname, aval in self._OP_DEFS:
 255       if not hasattr(self.op, aname):
 256         if callable(aval):
 257           dval = aval()
 258         else:
 259           dval = aval
 260         setattr(self.op, aname, dval)
 261
 262     for attr_name, test in self._OP_REQP:
 263       if not hasattr(op, attr_name):
 264         raise errors.OpPrereqError("Required parameter '%s' missing" %
 265                                    attr_name, errors.ECODE_INVAL)
 266       attr_val = getattr(op, attr_name, None)
 267       if not callable(test):
 268         raise errors.ProgrammerError("Validation for parameter '%s' failed,"
 269                                      " given type is not a proper type (%s)" %
 270                                      (attr_name, test))
 271       if not test(attr_val):
 272         logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
 273                       self.op.OP_ID, attr_name, type(attr_val), attr_val)
 274         raise errors.OpPrereqError("Parameter '%s' has invalid type" %
 275                                    attr_name, errors.ECODE_INVAL)
 276
 277     self.CheckArguments()
 278
 279   def __GetSSH(self):
 280     """Returns the SshRunner object
 281
 282     """
 283     if not self.__ssh:
 284       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 285     return self.__ssh
 286
 287   ssh = property(fget=__GetSSH)
 288
 289   def CheckArguments(self):
 290     """Check syntactic validity for the opcode arguments.
 291
 292     This method is for doing a simple syntactic check and ensure
 293     validity of opcode parameters, without any cluster-related
 294     checks. While the same can be accomplished in ExpandNames and/or
 295     CheckPrereq, doing these separate is better because:
 296
 297       - ExpandNames is left as as purely a lock-related function
 298       - CheckPrereq is run after we have acquired locks (and possible
 299         waited for them)
 300
 301     The function is allowed to change the self.op attribute so that
 302     later methods can no longer worry about missing parameters.
 303
 304     """
 305     pass
 306
 307   def ExpandNames(self):
 308     """Expand names for this LU.
 309
 310     This method is called before starting to execute the opcode, and it should
 311     update all the parameters of the opcode to their canonical form (e.g. a
 312     short node name must be fully expanded after this method has successfully
 313     completed). This way locking, hooks, logging, ecc. can work correctly.
 314
 315     LUs which implement this method must also populate the self.needed_locks
 316     member, as a dict with lock levels as keys, and a list of needed lock names
 317     as values. Rules:
 318
 319       - use an empty dict if you don't need any lock
 320       - if you don't need any lock at a particular level omit that level
 321       - don't put anything for the BGL level
 322       - if you want all locks at a level use locking.ALL_SET as a value
 323
 324     If you need to share locks (rather than acquire them exclusively) at one
 325     level you can modify self.share_locks, setting a true value (usually 1) for
 326     that level. By default locks are not shared.
 327
 328     This function can also define a list of tasklets, which then will be
 329     executed in order instead of the usual LU-level CheckPrereq and Exec
 330     functions, if those are not defined by the LU.
 331
 332     Examples::
 333
 334       # Acquire all nodes and one instance
 335       self.needed_locks = {
 336         locking.LEVEL_NODE: locking.ALL_SET,
 337         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 338       }
 339       # Acquire just two nodes
 340       self.needed_locks = {
 341         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 342       }
 343       # Acquire no locks
 344       self.needed_locks = {} # No, you can't leave it to the default value None
 345
 346     """
 347     # The implementation of this method is mandatory only if the new LU is
 348     # concurrent, so that old LUs don't need to be changed all at the same
 349     # time.
 350     if self.REQ_BGL:
 351       self.needed_locks = {} # Exclusive LUs don't need locks.
 352     else:
 353       raise NotImplementedError
 354
 355   def DeclareLocks(self, level):
 356     """Declare LU locking needs for a level
 357
 358     While most LUs can just declare their locking needs at ExpandNames time,
 359     sometimes there's the need to calculate some locks after having acquired
 360     the ones before. This function is called just before acquiring locks at a
 361     particular level, but after acquiring the ones at lower levels, and permits
 362     such calculations. It can be used to modify self.needed_locks, and by
 363     default it does nothing.
 364
 365     This function is only called if you have something already set in
 366     self.needed_locks for the level.
 367
 368     @param level: Locking level which is going to be locked
 369     @type level: member of ganeti.locking.LEVELS
 370
 371     """
 372
 373   def CheckPrereq(self):
 374     """Check prerequisites for this LU.
 375
 376     This method should check that the prerequisites for the execution
 377     of this LU are fulfilled. It can do internode communication, but
 378     it should be idempotent - no cluster or system changes are
 379     allowed.
 380
 381     The method should raise errors.OpPrereqError in case something is
 382     not fulfilled. Its return value is ignored.
 383
 384     This method should also update all the parameters of the opcode to
 385     their canonical form if it hasn't been done by ExpandNames before.
 386
 387     """
 388     if self.tasklets is not None:
 389       for (idx, tl) in enumerate(self.tasklets):
 390         logging.debug("Checking prerequisites for tasklet %s/%s",
 391                       idx + 1, len(self.tasklets))
 392         tl.CheckPrereq()
 393     else:
 394       pass
 395
 396   def Exec(self, feedback_fn):
 397     """Execute the LU.
 398
 399     This method should implement the actual work. It should raise
 400     errors.OpExecError for failures that are somewhat dealt with in
 401     code, or expected.
 402
 403     """
 404     if self.tasklets is not None:
 405       for (idx, tl) in enumerate(self.tasklets):
 406         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 407         tl.Exec(feedback_fn)
 408     else:
 409       raise NotImplementedError
 410
 411   def BuildHooksEnv(self):
 412     """Build hooks environment for this LU.
 413
 414     This method should return a three-node tuple consisting of: a dict
 415     containing the environment that will be used for running the
 416     specific hook for this LU, a list of node names on which the hook
 417     should run before the execution, and a list of node names on which
 418     the hook should run after the execution.
 419
 420     The keys of the dict must not have 'GANETI_' prefixed as this will
 421     be handled in the hooks runner. Also note additional keys will be
 422     added by the hooks runner. If the LU doesn't define any
 423     environment, an empty dict (and not None) should be returned.
 424
 425     No nodes should be returned as an empty list (and not None).
 426
 427     Note that if the HPATH for a LU class is None, this function will
 428     not be called.
 429
 430     """
 431     raise NotImplementedError
 432
 433   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 434     """Notify the LU about the results of its hooks.
 435
 436     This method is called every time a hooks phase is executed, and notifies
 437     the Logical Unit about the hooks' result. The LU can then use it to alter
 438     its result based on the hooks.  By default the method does nothing and the
 439     previous result is passed back unchanged but any LU can define it if it
 440     wants to use the local cluster hook-scripts somehow.
 441
 442     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 443         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 444     @param hook_results: the results of the multi-node hooks rpc call
 445     @param feedback_fn: function used send feedback back to the caller
 446     @param lu_result: the previous Exec result this LU had, or None
 447         in the PRE phase
 448     @return: the new Exec result, based on the previous result
 449         and hook results
 450
 451     """
 452     # API must be kept, thus we ignore the unused argument and could
 453     # be a function warnings
 454     # pylint: disable-msg=W0613,R0201
 455     return lu_result
 456
 457   def _ExpandAndLockInstance(self):
 458     """Helper function to expand and lock an instance.
 459
 460     Many LUs that work on an instance take its name in self.op.instance_name
 461     and need to expand it and then declare the expanded name for locking. This
 462     function does it, and then updates self.op.instance_name to the expanded
 463     name. It also initializes needed_locks as a dict, if this hasn't been done
 464     before.
 465
 466     """
 467     if self.needed_locks is None:
 468       self.needed_locks = {}
 469     else:
 470       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 471         "_ExpandAndLockInstance called with instance-level locks set"
 472     self.op.instance_name = _ExpandInstanceName(self.cfg,
 473                                                 self.op.instance_name)
 474     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 475
 476   def _LockInstancesNodes(self, primary_only=False):
 477     """Helper function to declare instances' nodes for locking.
 478
 479     This function should be called after locking one or more instances to lock
 480     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 481     with all primary or secondary nodes for instances already locked and
 482     present in self.needed_locks[locking.LEVEL_INSTANCE].
 483
 484     It should be called from DeclareLocks, and for safety only works if
 485     self.recalculate_locks[locking.LEVEL_NODE] is set.
 486
 487     In the future it may grow parameters to just lock some instance's nodes, or
 488     to just lock primaries or secondary nodes, if needed.
 489
 490     If should be called in DeclareLocks in a way similar to::
 491
 492       if level == locking.LEVEL_NODE:
 493         self._LockInstancesNodes()
 494
 495     @type primary_only: boolean
 496     @param primary_only: only lock primary nodes of locked instances
 497
 498     """
 499     assert locking.LEVEL_NODE in self.recalculate_locks, \
 500       "_LockInstancesNodes helper function called with no nodes to recalculate"
 501
 502     # TODO: check if we're really been called with the instance locks held
 503
 504     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 505     # future we might want to have different behaviors depending on the value
 506     # of self.recalculate_locks[locking.LEVEL_NODE]
 507     wanted_nodes = []
 508     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 509       instance = self.context.cfg.GetInstanceInfo(instance_name)
 510       wanted_nodes.append(instance.primary_node)
 511       if not primary_only:
 512         wanted_nodes.extend(instance.secondary_nodes)
 513
 514     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 515       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 516     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 517       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 518
 519     del self.recalculate_locks[locking.LEVEL_NODE]
 520
 521
 522 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 523   """Simple LU which runs no hooks.
 524
 525   This LU is intended as a parent for other LogicalUnits which will
 526   run no hooks, in order to reduce duplicate code.
 527
 528   """
 529   HPATH = None
 530   HTYPE = None
 531
 532   def BuildHooksEnv(self):
 533     """Empty BuildHooksEnv for NoHooksLu.
 534
 535     This just raises an error.
 536
 537     """
 538     assert False, "BuildHooksEnv called for NoHooksLUs"
 539
 540
 541 class Tasklet:
 542   """Tasklet base class.
 543
 544   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 545   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 546   tasklets know nothing about locks.
 547
 548   Subclasses must follow these rules:
 549     - Implement CheckPrereq
 550     - Implement Exec
 551
 552   """
 553   def __init__(self, lu):
 554     self.lu = lu
 555
 556     # Shortcuts
 557     self.cfg = lu.cfg
 558     self.rpc = lu.rpc
 559
 560   def CheckPrereq(self):
 561     """Check prerequisites for this tasklets.
 562
 563     This method should check whether the prerequisites for the execution of
 564     this tasklet are fulfilled. It can do internode communication, but it
 565     should be idempotent - no cluster or system changes are allowed.
 566
 567     The method should raise errors.OpPrereqError in case something is not
 568     fulfilled. Its return value is ignored.
 569
 570     This method should also update all parameters to their canonical form if it
 571     hasn't been done before.
 572
 573     """
 574     pass
 575
 576   def Exec(self, feedback_fn):
 577     """Execute the tasklet.
 578
 579     This method should implement the actual work. It should raise
 580     errors.OpExecError for failures that are somewhat dealt with in code, or
 581     expected.
 582
 583     """
 584     raise NotImplementedError
 585
 586
 587 def _GetWantedNodes(lu, nodes):
 588   """Returns list of checked and expanded node names.
 589
 590   @type lu: L{LogicalUnit}
 591   @param lu: the logical unit on whose behalf we execute
 592   @type nodes: list
 593   @param nodes: list of node names or None for all nodes
 594   @rtype: list
 595   @return: the list of nodes, sorted
 596   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 597
 598   """
 599   if not nodes:
 600     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 601       " non-empty list of nodes whose name is to be expanded.")
 602
 603   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 604   return utils.NiceSort(wanted)
 605
 606
 607 def _GetWantedInstances(lu, instances):
 608   """Returns list of checked and expanded instance names.
 609
 610   @type lu: L{LogicalUnit}
 611   @param lu: the logical unit on whose behalf we execute
 612   @type instances: list
 613   @param instances: list of instance names or None for all instances
 614   @rtype: list
 615   @return: the list of instances, sorted
 616   @raise errors.OpPrereqError: if the instances parameter is wrong type
 617   @raise errors.OpPrereqError: if any of the passed instances is not found
 618
 619   """
 620   if instances:
 621     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 622   else:
 623     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 624   return wanted
 625
 626
 627 def _GetUpdatedParams(old_params, update_dict,
 628                       use_default=True, use_none=False):
 629   """Return the new version of a parameter dictionary.
 630
 631   @type old_params: dict
 632   @param old_params: old parameters
 633   @type update_dict: dict
 634   @param update_dict: dict containing new parameter values, or
 635       constants.VALUE_DEFAULT to reset the parameter to its default
 636       value
 637   @param use_default: boolean
 638   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 639       values as 'to be deleted' values
 640   @param use_none: boolean
 641   @type use_none: whether to recognise C{None} values as 'to be
 642       deleted' values
 643   @rtype: dict
 644   @return: the new parameter dictionary
 645
 646   """
 647   params_copy = copy.deepcopy(old_params)
 648   for key, val in update_dict.iteritems():
 649     if ((use_default and val == constants.VALUE_DEFAULT) or
 650         (use_none and val is None)):
 651       try:
 652         del params_copy[key]
 653       except KeyError:
 654         pass
 655     else:
 656       params_copy[key] = val
 657   return params_copy
 658
 659
 660 def _CheckOutputFields(static, dynamic, selected):
 661   """Checks whether all selected fields are valid.
 662
 663   @type static: L{utils.FieldSet}
 664   @param static: static fields set
 665   @type dynamic: L{utils.FieldSet}
 666   @param dynamic: dynamic fields set
 667
 668   """
 669   f = utils.FieldSet()
 670   f.Extend(static)
 671   f.Extend(dynamic)
 672
 673   delta = f.NonMatching(selected)
 674   if delta:
 675     raise errors.OpPrereqError("Unknown output fields selected: %s"
 676                                % ",".join(delta), errors.ECODE_INVAL)
 677
 678
 679 def _CheckBooleanOpField(op, name):
 680   """Validates boolean opcode parameters.
 681
 682   This will ensure that an opcode parameter is either a boolean value,
 683   or None (but that it always exists).
 684
 685   """
 686   val = getattr(op, name, None)
 687   if not (val is None or isinstance(val, bool)):
 688     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 689                                (name, str(val)), errors.ECODE_INVAL)
 690   setattr(op, name, val)
 691
 692
 693 def _CheckGlobalHvParams(params):
 694   """Validates that given hypervisor params are not global ones.
 695
 696   This will ensure that instances don't get customised versions of
 697   global params.
 698
 699   """
 700   used_globals = constants.HVC_GLOBALS.intersection(params)
 701   if used_globals:
 702     msg = ("The following hypervisor parameters are global and cannot"
 703            " be customized at instance level, please modify them at"
 704            " cluster level: %s" % utils.CommaJoin(used_globals))
 705     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 706
 707
 708 def _CheckNodeOnline(lu, node):
 709   """Ensure that a given node is online.
 710
 711   @param lu: the LU on behalf of which we make the check
 712   @param node: the node to check
 713   @raise errors.OpPrereqError: if the node is offline
 714
 715   """
 716   if lu.cfg.GetNodeInfo(node).offline:
 717     raise errors.OpPrereqError("Can't use offline node %s" % node,
 718                                errors.ECODE_INVAL)
 719
 720
 721 def _CheckNodeNotDrained(lu, node):
 722   """Ensure that a given node is not drained.
 723
 724   @param lu: the LU on behalf of which we make the check
 725   @param node: the node to check
 726   @raise errors.OpPrereqError: if the node is drained
 727
 728   """
 729   if lu.cfg.GetNodeInfo(node).drained:
 730     raise errors.OpPrereqError("Can't use drained node %s" % node,
 731                                errors.ECODE_INVAL)
 732
 733
 734 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 735   """Ensure that a node supports a given OS.
 736
 737   @param lu: the LU on behalf of which we make the check
 738   @param node: the node to check
 739   @param os_name: the OS to query about
 740   @param force_variant: whether to ignore variant errors
 741   @raise errors.OpPrereqError: if the node is not supporting the OS
 742
 743   """
 744   result = lu.rpc.call_os_get(node, os_name)
 745   result.Raise("OS '%s' not in supported OS list for node %s" %
 746                (os_name, node),
 747                prereq=True, ecode=errors.ECODE_INVAL)
 748   if not force_variant:
 749     _CheckOSVariant(result.payload, os_name)
 750
 751
 752 def _RequireFileStorage():
 753   """Checks that file storage is enabled.
 754
 755   @raise errors.OpPrereqError: when file storage is disabled
 756
 757   """
 758   if not constants.ENABLE_FILE_STORAGE:
 759     raise errors.OpPrereqError("File storage disabled at configure time",
 760                                errors.ECODE_INVAL)
 761
 762
 763 def _CheckDiskTemplate(template):
 764   """Ensure a given disk template is valid.
 765
 766   """
 767   if template not in constants.DISK_TEMPLATES:
 768     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 769            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 770     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 771   if template == constants.DT_FILE:
 772     _RequireFileStorage()
 773
 774
 775 def _CheckStorageType(storage_type):
 776   """Ensure a given storage type is valid.
 777
 778   """
 779   if storage_type not in constants.VALID_STORAGE_TYPES:
 780     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 781                                errors.ECODE_INVAL)
 782   if storage_type == constants.ST_FILE:
 783     _RequireFileStorage()
 784   return True
 785
 786
 787 def _GetClusterDomainSecret():
 788   """Reads the cluster domain secret.
 789
 790   """
 791   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 792                                strict=True)
 793
 794
 795 def _CheckInstanceDown(lu, instance, reason):
 796   """Ensure that an instance is not running."""
 797   if instance.admin_up:
 798     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 799                                (instance.name, reason), errors.ECODE_STATE)
 800
 801   pnode = instance.primary_node
 802   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 803   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 804               prereq=True, ecode=errors.ECODE_ENVIRON)
 805
 806   if instance.name in ins_l.payload:
 807     raise errors.OpPrereqError("Instance %s is running, %s" %
 808                                (instance.name, reason), errors.ECODE_STATE)
 809
 810
 811 def _ExpandItemName(fn, name, kind):
 812   """Expand an item name.
 813
 814   @param fn: the function to use for expansion
 815   @param name: requested item name
 816   @param kind: text description ('Node' or 'Instance')
 817   @return: the resolved (full) name
 818   @raise errors.OpPrereqError: if the item is not found
 819
 820   """
 821   full_name = fn(name)
 822   if full_name is None:
 823     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 824                                errors.ECODE_NOENT)
 825   return full_name
 826
 827
 828 def _ExpandNodeName(cfg, name):
 829   """Wrapper over L{_ExpandItemName} for nodes."""
 830   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 831
 832
 833 def _ExpandInstanceName(cfg, name):
 834   """Wrapper over L{_ExpandItemName} for instance."""
 835   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 836
 837
 838 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 839                           memory, vcpus, nics, disk_template, disks,
 840                           bep, hvp, hypervisor_name):
 841   """Builds instance related env variables for hooks
 842
 843   This builds the hook environment from individual variables.
 844
 845   @type name: string
 846   @param name: the name of the instance
 847   @type primary_node: string
 848   @param primary_node: the name of the instance's primary node
 849   @type secondary_nodes: list
 850   @param secondary_nodes: list of secondary nodes as strings
 851   @type os_type: string
 852   @param os_type: the name of the instance's OS
 853   @type status: boolean
 854   @param status: the should_run status of the instance
 855   @type memory: string
 856   @param memory: the memory size of the instance
 857   @type vcpus: string
 858   @param vcpus: the count of VCPUs the instance has
 859   @type nics: list
 860   @param nics: list of tuples (ip, mac, mode, link) representing
 861       the NICs the instance has
 862   @type disk_template: string
 863   @param disk_template: the disk template of the instance
 864   @type disks: list
 865   @param disks: the list of (size, mode) pairs
 866   @type bep: dict
 867   @param bep: the backend parameters for the instance
 868   @type hvp: dict
 869   @param hvp: the hypervisor parameters for the instance
 870   @type hypervisor_name: string
 871   @param hypervisor_name: the hypervisor for the instance
 872   @rtype: dict
 873   @return: the hook environment for this instance
 874
 875   """
 876   if status:
 877     str_status = "up"
 878   else:
 879     str_status = "down"
 880   env = {
 881     "OP_TARGET": name,
 882     "INSTANCE_NAME": name,
 883     "INSTANCE_PRIMARY": primary_node,
 884     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 885     "INSTANCE_OS_TYPE": os_type,
 886     "INSTANCE_STATUS": str_status,
 887     "INSTANCE_MEMORY": memory,
 888     "INSTANCE_VCPUS": vcpus,
 889     "INSTANCE_DISK_TEMPLATE": disk_template,
 890     "INSTANCE_HYPERVISOR": hypervisor_name,
 891   }
 892
 893   if nics:
 894     nic_count = len(nics)
 895     for idx, (ip, mac, mode, link) in enumerate(nics):
 896       if ip is None:
 897         ip = ""
 898       env["INSTANCE_NIC%d_IP" % idx] = ip
 899       env["INSTANCE_NIC%d_MAC" % idx] = mac
 900       env["INSTANCE_NIC%d_MODE" % idx] = mode
 901       env["INSTANCE_NIC%d_LINK" % idx] = link
 902       if mode == constants.NIC_MODE_BRIDGED:
 903         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 904   else:
 905     nic_count = 0
 906
 907   env["INSTANCE_NIC_COUNT"] = nic_count
 908
 909   if disks:
 910     disk_count = len(disks)
 911     for idx, (size, mode) in enumerate(disks):
 912       env["INSTANCE_DISK%d_SIZE" % idx] = size
 913       env["INSTANCE_DISK%d_MODE" % idx] = mode
 914   else:
 915     disk_count = 0
 916
 917   env["INSTANCE_DISK_COUNT"] = disk_count
 918
 919   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 920     for key, value in source.items():
 921       env["INSTANCE_%s_%s" % (kind, key)] = value
 922
 923   return env
 924
 925
 926 def _NICListToTuple(lu, nics):
 927   """Build a list of nic information tuples.
 928
 929   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 930   value in LUQueryInstanceData.
 931
 932   @type lu:  L{LogicalUnit}
 933   @param lu: the logical unit on whose behalf we execute
 934   @type nics: list of L{objects.NIC}
 935   @param nics: list of nics to convert to hooks tuples
 936
 937   """
 938   hooks_nics = []
 939   cluster = lu.cfg.GetClusterInfo()
 940   for nic in nics:
 941     ip = nic.ip
 942     mac = nic.mac
 943     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 944     mode = filled_params[constants.NIC_MODE]
 945     link = filled_params[constants.NIC_LINK]
 946     hooks_nics.append((ip, mac, mode, link))
 947   return hooks_nics
 948
 949
 950 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 951   """Builds instance related env variables for hooks from an object.
 952
 953   @type lu: L{LogicalUnit}
 954   @param lu: the logical unit on whose behalf we execute
 955   @type instance: L{objects.Instance}
 956   @param instance: the instance for which we should build the
 957       environment
 958   @type override: dict
 959   @param override: dictionary with key/values that will override
 960       our values
 961   @rtype: dict
 962   @return: the hook environment dictionary
 963
 964   """
 965   cluster = lu.cfg.GetClusterInfo()
 966   bep = cluster.FillBE(instance)
 967   hvp = cluster.FillHV(instance)
 968   args = {
 969     'name': instance.name,
 970     'primary_node': instance.primary_node,
 971     'secondary_nodes': instance.secondary_nodes,
 972     'os_type': instance.os,
 973     'status': instance.admin_up,
 974     'memory': bep[constants.BE_MEMORY],
 975     'vcpus': bep[constants.BE_VCPUS],
 976     'nics': _NICListToTuple(lu, instance.nics),
 977     'disk_template': instance.disk_template,
 978     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 979     'bep': bep,
 980     'hvp': hvp,
 981     'hypervisor_name': instance.hypervisor,
 982   }
 983   if override:
 984     args.update(override)
 985   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 986
 987
 988 def _AdjustCandidatePool(lu, exceptions):
 989   """Adjust the candidate pool after node operations.
 990
 991   """
 992   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 993   if mod_list:
 994     lu.LogInfo("Promoted nodes to master candidate role: %s",
 995                utils.CommaJoin(node.name for node in mod_list))
 996     for name in mod_list:
 997       lu.context.ReaddNode(name)
 998   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 999   if mc_now > mc_max:
1000     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1001                (mc_now, mc_max))
1002
1003
1004 def _DecideSelfPromotion(lu, exceptions=None):
1005   """Decide whether I should promote myself as a master candidate.
1006
1007   """
1008   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1009   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1010   # the new node will increase mc_max with one, so:
1011   mc_should = min(mc_should + 1, cp_size)
1012   return mc_now < mc_should
1013
1014
1015 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1016   """Check that the brigdes needed by a list of nics exist.
1017
1018   """
1019   cluster = lu.cfg.GetClusterInfo()
1020   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1021   brlist = [params[constants.NIC_LINK] for params in paramslist
1022             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1023   if brlist:
1024     result = lu.rpc.call_bridges_exist(target_node, brlist)
1025     result.Raise("Error checking bridges on destination node '%s'" %
1026                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1027
1028
1029 def _CheckInstanceBridgesExist(lu, instance, node=None):
1030   """Check that the brigdes needed by an instance exist.
1031
1032   """
1033   if node is None:
1034     node = instance.primary_node
1035   _CheckNicsBridgesExist(lu, instance.nics, node)
1036
1037
1038 def _CheckOSVariant(os_obj, name):
1039   """Check whether an OS name conforms to the os variants specification.
1040
1041   @type os_obj: L{objects.OS}
1042   @param os_obj: OS object to check
1043   @type name: string
1044   @param name: OS name passed by the user, to check for validity
1045
1046   """
1047   if not os_obj.supported_variants:
1048     return
1049   try:
1050     variant = name.split("+", 1)[1]
1051   except IndexError:
1052     raise errors.OpPrereqError("OS name must include a variant",
1053                                errors.ECODE_INVAL)
1054
1055   if variant not in os_obj.supported_variants:
1056     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1057
1058
1059 def _GetNodeInstancesInner(cfg, fn):
1060   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1061
1062
1063 def _GetNodeInstances(cfg, node_name):
1064   """Returns a list of all primary and secondary instances on a node.
1065
1066   """
1067
1068   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1069
1070
1071 def _GetNodePrimaryInstances(cfg, node_name):
1072   """Returns primary instances on a node.
1073
1074   """
1075   return _GetNodeInstancesInner(cfg,
1076                                 lambda inst: node_name == inst.primary_node)
1077
1078
1079 def _GetNodeSecondaryInstances(cfg, node_name):
1080   """Returns secondary instances on a node.
1081
1082   """
1083   return _GetNodeInstancesInner(cfg,
1084                                 lambda inst: node_name in inst.secondary_nodes)
1085
1086
1087 def _GetStorageTypeArgs(cfg, storage_type):
1088   """Returns the arguments for a storage type.
1089
1090   """
1091   # Special case for file storage
1092   if storage_type == constants.ST_FILE:
1093     # storage.FileStorage wants a list of storage directories
1094     return [[cfg.GetFileStorageDir()]]
1095
1096   return []
1097
1098
1099 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1100   faulty = []
1101
1102   for dev in instance.disks:
1103     cfg.SetDiskID(dev, node_name)
1104
1105   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1106   result.Raise("Failed to get disk status from node %s" % node_name,
1107                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1108
1109   for idx, bdev_status in enumerate(result.payload):
1110     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1111       faulty.append(idx)
1112
1113   return faulty
1114
1115
1116 class LUPostInitCluster(LogicalUnit):
1117   """Logical unit for running hooks after cluster initialization.
1118
1119   """
1120   HPATH = "cluster-init"
1121   HTYPE = constants.HTYPE_CLUSTER
1122   _OP_REQP = []
1123
1124   def BuildHooksEnv(self):
1125     """Build hooks env.
1126
1127     """
1128     env = {"OP_TARGET": self.cfg.GetClusterName()}
1129     mn = self.cfg.GetMasterNode()
1130     return env, [], [mn]
1131
1132   def Exec(self, feedback_fn):
1133     """Nothing to do.
1134
1135     """
1136     return True
1137
1138
1139 class LUDestroyCluster(LogicalUnit):
1140   """Logical unit for destroying the cluster.
1141
1142   """
1143   HPATH = "cluster-destroy"
1144   HTYPE = constants.HTYPE_CLUSTER
1145   _OP_REQP = []
1146
1147   def BuildHooksEnv(self):
1148     """Build hooks env.
1149
1150     """
1151     env = {"OP_TARGET": self.cfg.GetClusterName()}
1152     return env, [], []
1153
1154   def CheckPrereq(self):
1155     """Check prerequisites.
1156
1157     This checks whether the cluster is empty.
1158
1159     Any errors are signaled by raising errors.OpPrereqError.
1160
1161     """
1162     master = self.cfg.GetMasterNode()
1163
1164     nodelist = self.cfg.GetNodeList()
1165     if len(nodelist) != 1 or nodelist[0] != master:
1166       raise errors.OpPrereqError("There are still %d node(s) in"
1167                                  " this cluster." % (len(nodelist) - 1),
1168                                  errors.ECODE_INVAL)
1169     instancelist = self.cfg.GetInstanceList()
1170     if instancelist:
1171       raise errors.OpPrereqError("There are still %d instance(s) in"
1172                                  " this cluster." % len(instancelist),
1173                                  errors.ECODE_INVAL)
1174
1175   def Exec(self, feedback_fn):
1176     """Destroys the cluster.
1177
1178     """
1179     master = self.cfg.GetMasterNode()
1180     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1181
1182     # Run post hooks on master node before it's removed
1183     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1184     try:
1185       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1186     except:
1187       # pylint: disable-msg=W0702
1188       self.LogWarning("Errors occurred running hooks on %s" % master)
1189
1190     result = self.rpc.call_node_stop_master(master, False)
1191     result.Raise("Could not disable the master role")
1192
1193     if modify_ssh_setup:
1194       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1195       utils.CreateBackup(priv_key)
1196       utils.CreateBackup(pub_key)
1197
1198     return master
1199
1200
1201 def _VerifyCertificate(filename):
1202   """Verifies a certificate for LUVerifyCluster.
1203
1204   @type filename: string
1205   @param filename: Path to PEM file
1206
1207   """
1208   try:
1209     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1210                                            utils.ReadFile(filename))
1211   except Exception, err: # pylint: disable-msg=W0703
1212     return (LUVerifyCluster.ETYPE_ERROR,
1213             "Failed to load X509 certificate %s: %s" % (filename, err))
1214
1215   (errcode, msg) = \
1216     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1217                                 constants.SSL_CERT_EXPIRATION_ERROR)
1218
1219   if msg:
1220     fnamemsg = "While verifying %s: %s" % (filename, msg)
1221   else:
1222     fnamemsg = None
1223
1224   if errcode is None:
1225     return (None, fnamemsg)
1226   elif errcode == utils.CERT_WARNING:
1227     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1228   elif errcode == utils.CERT_ERROR:
1229     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1230
1231   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1232
1233
1234 class LUVerifyCluster(LogicalUnit):
1235   """Verifies the cluster status.
1236
1237   """
1238   HPATH = "cluster-verify"
1239   HTYPE = constants.HTYPE_CLUSTER
1240   _OP_REQP = [
1241     ("skip_checks", _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1242     ("verbose", _TBool),
1243     ("error_codes", _TBool),
1244     ("debug_simulate_errors", _TBool),
1245     ]
1246   REQ_BGL = False
1247
1248   TCLUSTER = "cluster"
1249   TNODE = "node"
1250   TINSTANCE = "instance"
1251
1252   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1253   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1254   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1255   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1256   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1257   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1258   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1259   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1260   ENODEDRBD = (TNODE, "ENODEDRBD")
1261   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1262   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1263   ENODEHV = (TNODE, "ENODEHV")
1264   ENODELVM = (TNODE, "ENODELVM")
1265   ENODEN1 = (TNODE, "ENODEN1")
1266   ENODENET = (TNODE, "ENODENET")
1267   ENODEOS = (TNODE, "ENODEOS")
1268   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1269   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1270   ENODERPC = (TNODE, "ENODERPC")
1271   ENODESSH = (TNODE, "ENODESSH")
1272   ENODEVERSION = (TNODE, "ENODEVERSION")
1273   ENODESETUP = (TNODE, "ENODESETUP")
1274   ENODETIME = (TNODE, "ENODETIME")
1275
1276   ETYPE_FIELD = "code"
1277   ETYPE_ERROR = "ERROR"
1278   ETYPE_WARNING = "WARNING"
1279
1280   class NodeImage(object):
1281     """A class representing the logical and physical status of a node.
1282
1283     @type name: string
1284     @ivar name: the node name to which this object refers
1285     @ivar volumes: a structure as returned from
1286         L{ganeti.backend.GetVolumeList} (runtime)
1287     @ivar instances: a list of running instances (runtime)
1288     @ivar pinst: list of configured primary instances (config)
1289     @ivar sinst: list of configured secondary instances (config)
1290     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1291         of this node (config)
1292     @ivar mfree: free memory, as reported by hypervisor (runtime)
1293     @ivar dfree: free disk, as reported by the node (runtime)
1294     @ivar offline: the offline status (config)
1295     @type rpc_fail: boolean
1296     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1297         not whether the individual keys were correct) (runtime)
1298     @type lvm_fail: boolean
1299     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1300     @type hyp_fail: boolean
1301     @ivar hyp_fail: whether the RPC call didn't return the instance list
1302     @type ghost: boolean
1303     @ivar ghost: whether this is a known node or not (config)
1304     @type os_fail: boolean
1305     @ivar os_fail: whether the RPC call didn't return valid OS data
1306     @type oslist: list
1307     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1308
1309     """
1310     def __init__(self, offline=False, name=None):
1311       self.name = name
1312       self.volumes = {}
1313       self.instances = []
1314       self.pinst = []
1315       self.sinst = []
1316       self.sbp = {}
1317       self.mfree = 0
1318       self.dfree = 0
1319       self.offline = offline
1320       self.rpc_fail = False
1321       self.lvm_fail = False
1322       self.hyp_fail = False
1323       self.ghost = False
1324       self.os_fail = False
1325       self.oslist = {}
1326
1327   def ExpandNames(self):
1328     self.needed_locks = {
1329       locking.LEVEL_NODE: locking.ALL_SET,
1330       locking.LEVEL_INSTANCE: locking.ALL_SET,
1331     }
1332     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1333
1334   def _Error(self, ecode, item, msg, *args, **kwargs):
1335     """Format an error message.
1336
1337     Based on the opcode's error_codes parameter, either format a
1338     parseable error code, or a simpler error string.
1339
1340     This must be called only from Exec and functions called from Exec.
1341
1342     """
1343     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1344     itype, etxt = ecode
1345     # first complete the msg
1346     if args:
1347       msg = msg % args
1348     # then format the whole message
1349     if self.op.error_codes:
1350       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1351     else:
1352       if item:
1353         item = " " + item
1354       else:
1355         item = ""
1356       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1357     # and finally report it via the feedback_fn
1358     self._feedback_fn("  - %s" % msg)
1359
1360   def _ErrorIf(self, cond, *args, **kwargs):
1361     """Log an error message if the passed condition is True.
1362
1363     """
1364     cond = bool(cond) or self.op.debug_simulate_errors
1365     if cond:
1366       self._Error(*args, **kwargs)
1367     # do not mark the operation as failed for WARN cases only
1368     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1369       self.bad = self.bad or cond
1370
1371   def _VerifyNode(self, ninfo, nresult):
1372     """Run multiple tests against a node.
1373
1374     Test list:
1375
1376       - compares ganeti version
1377       - checks vg existence and size > 20G
1378       - checks config file checksum
1379       - checks ssh to other nodes
1380
1381     @type ninfo: L{objects.Node}
1382     @param ninfo: the node to check
1383     @param nresult: the results from the node
1384     @rtype: boolean
1385     @return: whether overall this call was successful (and we can expect
1386          reasonable values in the respose)
1387
1388     """
1389     node = ninfo.name
1390     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1391
1392     # main result, nresult should be a non-empty dict
1393     test = not nresult or not isinstance(nresult, dict)
1394     _ErrorIf(test, self.ENODERPC, node,
1395                   "unable to verify node: no data returned")
1396     if test:
1397       return False
1398
1399     # compares ganeti version
1400     local_version = constants.PROTOCOL_VERSION
1401     remote_version = nresult.get("version", None)
1402     test = not (remote_version and
1403                 isinstance(remote_version, (list, tuple)) and
1404                 len(remote_version) == 2)
1405     _ErrorIf(test, self.ENODERPC, node,
1406              "connection to node returned invalid data")
1407     if test:
1408       return False
1409
1410     test = local_version != remote_version[0]
1411     _ErrorIf(test, self.ENODEVERSION, node,
1412              "incompatible protocol versions: master %s,"
1413              " node %s", local_version, remote_version[0])
1414     if test:
1415       return False
1416
1417     # node seems compatible, we can actually try to look into its results
1418
1419     # full package version
1420     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1421                   self.ENODEVERSION, node,
1422                   "software version mismatch: master %s, node %s",
1423                   constants.RELEASE_VERSION, remote_version[1],
1424                   code=self.ETYPE_WARNING)
1425
1426     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1427     if isinstance(hyp_result, dict):
1428       for hv_name, hv_result in hyp_result.iteritems():
1429         test = hv_result is not None
1430         _ErrorIf(test, self.ENODEHV, node,
1431                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1432
1433
1434     test = nresult.get(constants.NV_NODESETUP,
1435                            ["Missing NODESETUP results"])
1436     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1437              "; ".join(test))
1438
1439     return True
1440
1441   def _VerifyNodeTime(self, ninfo, nresult,
1442                       nvinfo_starttime, nvinfo_endtime):
1443     """Check the node time.
1444
1445     @type ninfo: L{objects.Node}
1446     @param ninfo: the node to check
1447     @param nresult: the remote results for the node
1448     @param nvinfo_starttime: the start time of the RPC call
1449     @param nvinfo_endtime: the end time of the RPC call
1450
1451     """
1452     node = ninfo.name
1453     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1454
1455     ntime = nresult.get(constants.NV_TIME, None)
1456     try:
1457       ntime_merged = utils.MergeTime(ntime)
1458     except (ValueError, TypeError):
1459       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1460       return
1461
1462     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1463       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1464     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1465       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1466     else:
1467       ntime_diff = None
1468
1469     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1470              "Node time diverges by at least %s from master node time",
1471              ntime_diff)
1472
1473   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1474     """Check the node time.
1475
1476     @type ninfo: L{objects.Node}
1477     @param ninfo: the node to check
1478     @param nresult: the remote results for the node
1479     @param vg_name: the configured VG name
1480
1481     """
1482     if vg_name is None:
1483       return
1484
1485     node = ninfo.name
1486     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1487
1488     # checks vg existence and size > 20G
1489     vglist = nresult.get(constants.NV_VGLIST, None)
1490     test = not vglist
1491     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1492     if not test:
1493       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1494                                             constants.MIN_VG_SIZE)
1495       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1496
1497     # check pv names
1498     pvlist = nresult.get(constants.NV_PVLIST, None)
1499     test = pvlist is None
1500     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1501     if not test:
1502       # check that ':' is not present in PV names, since it's a
1503       # special character for lvcreate (denotes the range of PEs to
1504       # use on the PV)
1505       for _, pvname, owner_vg in pvlist:
1506         test = ":" in pvname
1507         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1508                  " '%s' of VG '%s'", pvname, owner_vg)
1509
1510   def _VerifyNodeNetwork(self, ninfo, nresult):
1511     """Check the node time.
1512
1513     @type ninfo: L{objects.Node}
1514     @param ninfo: the node to check
1515     @param nresult: the remote results for the node
1516
1517     """
1518     node = ninfo.name
1519     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1520
1521     test = constants.NV_NODELIST not in nresult
1522     _ErrorIf(test, self.ENODESSH, node,
1523              "node hasn't returned node ssh connectivity data")
1524     if not test:
1525       if nresult[constants.NV_NODELIST]:
1526         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1527           _ErrorIf(True, self.ENODESSH, node,
1528                    "ssh communication with node '%s': %s", a_node, a_msg)
1529
1530     test = constants.NV_NODENETTEST not in nresult
1531     _ErrorIf(test, self.ENODENET, node,
1532              "node hasn't returned node tcp connectivity data")
1533     if not test:
1534       if nresult[constants.NV_NODENETTEST]:
1535         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1536         for anode in nlist:
1537           _ErrorIf(True, self.ENODENET, node,
1538                    "tcp communication with node '%s': %s",
1539                    anode, nresult[constants.NV_NODENETTEST][anode])
1540
1541     test = constants.NV_MASTERIP not in nresult
1542     _ErrorIf(test, self.ENODENET, node,
1543              "node hasn't returned node master IP reachability data")
1544     if not test:
1545       if not nresult[constants.NV_MASTERIP]:
1546         if node == self.master_node:
1547           msg = "the master node cannot reach the master IP (not configured?)"
1548         else:
1549           msg = "cannot reach the master IP"
1550         _ErrorIf(True, self.ENODENET, node, msg)
1551
1552
1553   def _VerifyInstance(self, instance, instanceconfig, node_image):
1554     """Verify an instance.
1555
1556     This function checks to see if the required block devices are
1557     available on the instance's node.
1558
1559     """
1560     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1561     node_current = instanceconfig.primary_node
1562
1563     node_vol_should = {}
1564     instanceconfig.MapLVsByNode(node_vol_should)
1565
1566     for node in node_vol_should:
1567       n_img = node_image[node]
1568       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1569         # ignore missing volumes on offline or broken nodes
1570         continue
1571       for volume in node_vol_should[node]:
1572         test = volume not in n_img.volumes
1573         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1574                  "volume %s missing on node %s", volume, node)
1575
1576     if instanceconfig.admin_up:
1577       pri_img = node_image[node_current]
1578       test = instance not in pri_img.instances and not pri_img.offline
1579       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1580                "instance not running on its primary node %s",
1581                node_current)
1582
1583     for node, n_img in node_image.items():
1584       if (not node == node_current):
1585         test = instance in n_img.instances
1586         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1587                  "instance should not run on node %s", node)
1588
1589   def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1590     """Verify if there are any unknown volumes in the cluster.
1591
1592     The .os, .swap and backup volumes are ignored. All other volumes are
1593     reported as unknown.
1594
1595     """
1596     for node, n_img in node_image.items():
1597       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1598         # skip non-healthy nodes
1599         continue
1600       for volume in n_img.volumes:
1601         test = (node not in node_vol_should or
1602                 volume not in node_vol_should[node])
1603         self._ErrorIf(test, self.ENODEORPHANLV, node,
1604                       "volume %s is unknown", volume)
1605
1606   def _VerifyOrphanInstances(self, instancelist, node_image):
1607     """Verify the list of running instances.
1608
1609     This checks what instances are running but unknown to the cluster.
1610
1611     """
1612     for node, n_img in node_image.items():
1613       for o_inst in n_img.instances:
1614         test = o_inst not in instancelist
1615         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1616                       "instance %s on node %s should not exist", o_inst, node)
1617
1618   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1619     """Verify N+1 Memory Resilience.
1620
1621     Check that if one single node dies we can still start all the
1622     instances it was primary for.
1623
1624     """
1625     for node, n_img in node_image.items():
1626       # This code checks that every node which is now listed as
1627       # secondary has enough memory to host all instances it is
1628       # supposed to should a single other node in the cluster fail.
1629       # FIXME: not ready for failover to an arbitrary node
1630       # FIXME: does not support file-backed instances
1631       # WARNING: we currently take into account down instances as well
1632       # as up ones, considering that even if they're down someone
1633       # might want to start them even in the event of a node failure.
1634       for prinode, instances in n_img.sbp.items():
1635         needed_mem = 0
1636         for instance in instances:
1637           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1638           if bep[constants.BE_AUTO_BALANCE]:
1639             needed_mem += bep[constants.BE_MEMORY]
1640         test = n_img.mfree < needed_mem
1641         self._ErrorIf(test, self.ENODEN1, node,
1642                       "not enough memory on to accommodate"
1643                       " failovers should peer node %s fail", prinode)
1644
1645   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1646                        master_files):
1647     """Verifies and computes the node required file checksums.
1648
1649     @type ninfo: L{objects.Node}
1650     @param ninfo: the node to check
1651     @param nresult: the remote results for the node
1652     @param file_list: required list of files
1653     @param local_cksum: dictionary of local files and their checksums
1654     @param master_files: list of files that only masters should have
1655
1656     """
1657     node = ninfo.name
1658     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1659
1660     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1661     test = not isinstance(remote_cksum, dict)
1662     _ErrorIf(test, self.ENODEFILECHECK, node,
1663              "node hasn't returned file checksum data")
1664     if test:
1665       return
1666
1667     for file_name in file_list:
1668       node_is_mc = ninfo.master_candidate
1669       must_have = (file_name not in master_files) or node_is_mc
1670       # missing
1671       test1 = file_name not in remote_cksum
1672       # invalid checksum
1673       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1674       # existing and good
1675       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1676       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1677                "file '%s' missing", file_name)
1678       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1679                "file '%s' has wrong checksum", file_name)
1680       # not candidate and this is not a must-have file
1681       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1682                "file '%s' should not exist on non master"
1683                " candidates (and the file is outdated)", file_name)
1684       # all good, except non-master/non-must have combination
1685       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1686                "file '%s' should not exist"
1687                " on non master candidates", file_name)
1688
1689   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1690     """Verifies and the node DRBD status.
1691
1692     @type ninfo: L{objects.Node}
1693     @param ninfo: the node to check
1694     @param nresult: the remote results for the node
1695     @param instanceinfo: the dict of instances
1696     @param drbd_map: the DRBD map as returned by
1697         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1698
1699     """
1700     node = ninfo.name
1701     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1702
1703     # compute the DRBD minors
1704     node_drbd = {}
1705     for minor, instance in drbd_map[node].items():
1706       test = instance not in instanceinfo
1707       _ErrorIf(test, self.ECLUSTERCFG, None,
1708                "ghost instance '%s' in temporary DRBD map", instance)
1709         # ghost instance should not be running, but otherwise we
1710         # don't give double warnings (both ghost instance and
1711         # unallocated minor in use)
1712       if test:
1713         node_drbd[minor] = (instance, False)
1714       else:
1715         instance = instanceinfo[instance]
1716         node_drbd[minor] = (instance.name, instance.admin_up)
1717
1718     # and now check them
1719     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1720     test = not isinstance(used_minors, (tuple, list))
1721     _ErrorIf(test, self.ENODEDRBD, node,
1722              "cannot parse drbd status file: %s", str(used_minors))
1723     if test:
1724       # we cannot check drbd status
1725       return
1726
1727     for minor, (iname, must_exist) in node_drbd.items():
1728       test = minor not in used_minors and must_exist
1729       _ErrorIf(test, self.ENODEDRBD, node,
1730                "drbd minor %d of instance %s is not active", minor, iname)
1731     for minor in used_minors:
1732       test = minor not in node_drbd
1733       _ErrorIf(test, self.ENODEDRBD, node,
1734                "unallocated drbd minor %d is in use", minor)
1735
1736   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1737     """Builds the node OS structures.
1738
1739     @type ninfo: L{objects.Node}
1740     @param ninfo: the node to check
1741     @param nresult: the remote results for the node
1742     @param nimg: the node image object
1743
1744     """
1745     node = ninfo.name
1746     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1747
1748     remote_os = nresult.get(constants.NV_OSLIST, None)
1749     test = (not isinstance(remote_os, list) or
1750             not compat.all(isinstance(v, list) and len(v) == 7
1751                            for v in remote_os))
1752
1753     _ErrorIf(test, self.ENODEOS, node,
1754              "node hasn't returned valid OS data")
1755
1756     nimg.os_fail = test
1757
1758     if test:
1759       return
1760
1761     os_dict = {}
1762
1763     for (name, os_path, status, diagnose,
1764          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1765
1766       if name not in os_dict:
1767         os_dict[name] = []
1768
1769       # parameters is a list of lists instead of list of tuples due to
1770       # JSON lacking a real tuple type, fix it:
1771       parameters = [tuple(v) for v in parameters]
1772       os_dict[name].append((os_path, status, diagnose,
1773                             set(variants), set(parameters), set(api_ver)))
1774
1775     nimg.oslist = os_dict
1776
1777   def _VerifyNodeOS(self, ninfo, nimg, base):
1778     """Verifies the node OS list.
1779
1780     @type ninfo: L{objects.Node}
1781     @param ninfo: the node to check
1782     @param nimg: the node image object
1783     @param base: the 'template' node we match against (e.g. from the master)
1784
1785     """
1786     node = ninfo.name
1787     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1788
1789     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1790
1791     for os_name, os_data in nimg.oslist.items():
1792       assert os_data, "Empty OS status for OS %s?!" % os_name
1793       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1794       _ErrorIf(not f_status, self.ENODEOS, node,
1795                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1796       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1797                "OS '%s' has multiple entries (first one shadows the rest): %s",
1798                os_name, utils.CommaJoin([v[0] for v in os_data]))
1799       # this will catched in backend too
1800       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1801                and not f_var, self.ENODEOS, node,
1802                "OS %s with API at least %d does not declare any variant",
1803                os_name, constants.OS_API_V15)
1804       # comparisons with the 'base' image
1805       test = os_name not in base.oslist
1806       _ErrorIf(test, self.ENODEOS, node,
1807                "Extra OS %s not present on reference node (%s)",
1808                os_name, base.name)
1809       if test:
1810         continue
1811       assert base.oslist[os_name], "Base node has empty OS status?"
1812       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1813       if not b_status:
1814         # base OS is invalid, skipping
1815         continue
1816       for kind, a, b in [("API version", f_api, b_api),
1817                          ("variants list", f_var, b_var),
1818                          ("parameters", f_param, b_param)]:
1819         _ErrorIf(a != b, self.ENODEOS, node,
1820                  "OS %s %s differs from reference node %s: %s vs. %s",
1821                  kind, os_name, base.name,
1822                  utils.CommaJoin(a), utils.CommaJoin(b))
1823
1824     # check any missing OSes
1825     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1826     _ErrorIf(missing, self.ENODEOS, node,
1827              "OSes present on reference node %s but missing on this node: %s",
1828              base.name, utils.CommaJoin(missing))
1829
1830   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1831     """Verifies and updates the node volume data.
1832
1833     This function will update a L{NodeImage}'s internal structures
1834     with data from the remote call.
1835
1836     @type ninfo: L{objects.Node}
1837     @param ninfo: the node to check
1838     @param nresult: the remote results for the node
1839     @param nimg: the node image object
1840     @param vg_name: the configured VG name
1841
1842     """
1843     node = ninfo.name
1844     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1845
1846     nimg.lvm_fail = True
1847     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1848     if vg_name is None:
1849       pass
1850     elif isinstance(lvdata, basestring):
1851       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1852                utils.SafeEncode(lvdata))
1853     elif not isinstance(lvdata, dict):
1854       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1855     else:
1856       nimg.volumes = lvdata
1857       nimg.lvm_fail = False
1858
1859   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1860     """Verifies and updates the node instance list.
1861
1862     If the listing was successful, then updates this node's instance
1863     list. Otherwise, it marks the RPC call as failed for the instance
1864     list key.
1865
1866     @type ninfo: L{objects.Node}
1867     @param ninfo: the node to check
1868     @param nresult: the remote results for the node
1869     @param nimg: the node image object
1870
1871     """
1872     idata = nresult.get(constants.NV_INSTANCELIST, None)
1873     test = not isinstance(idata, list)
1874     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1875                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1876     if test:
1877       nimg.hyp_fail = True
1878     else:
1879       nimg.instances = idata
1880
1881   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1882     """Verifies and computes a node information map
1883
1884     @type ninfo: L{objects.Node}
1885     @param ninfo: the node to check
1886     @param nresult: the remote results for the node
1887     @param nimg: the node image object
1888     @param vg_name: the configured VG name
1889
1890     """
1891     node = ninfo.name
1892     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1893
1894     # try to read free memory (from the hypervisor)
1895     hv_info = nresult.get(constants.NV_HVINFO, None)
1896     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1897     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1898     if not test:
1899       try:
1900         nimg.mfree = int(hv_info["memory_free"])
1901       except (ValueError, TypeError):
1902         _ErrorIf(True, self.ENODERPC, node,
1903                  "node returned invalid nodeinfo, check hypervisor")
1904
1905     # FIXME: devise a free space model for file based instances as well
1906     if vg_name is not None:
1907       test = (constants.NV_VGLIST not in nresult or
1908               vg_name not in nresult[constants.NV_VGLIST])
1909       _ErrorIf(test, self.ENODELVM, node,
1910                "node didn't return data for the volume group '%s'"
1911                " - it is either missing or broken", vg_name)
1912       if not test:
1913         try:
1914           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1915         except (ValueError, TypeError):
1916           _ErrorIf(True, self.ENODERPC, node,
1917                    "node returned invalid LVM info, check LVM status")
1918
1919   def BuildHooksEnv(self):
1920     """Build hooks env.
1921
1922     Cluster-Verify hooks just ran in the post phase and their failure makes
1923     the output be logged in the verify output and the verification to fail.
1924
1925     """
1926     all_nodes = self.cfg.GetNodeList()
1927     env = {
1928       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1929       }
1930     for node in self.cfg.GetAllNodesInfo().values():
1931       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1932
1933     return env, [], all_nodes
1934
1935   def Exec(self, feedback_fn):
1936     """Verify integrity of cluster, performing various test on nodes.
1937
1938     """
1939     self.bad = False
1940     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1941     verbose = self.op.verbose
1942     self._feedback_fn = feedback_fn
1943     feedback_fn("* Verifying global settings")
1944     for msg in self.cfg.VerifyConfig():
1945       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1946
1947     # Check the cluster certificates
1948     for cert_filename in constants.ALL_CERT_FILES:
1949       (errcode, msg) = _VerifyCertificate(cert_filename)
1950       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1951
1952     vg_name = self.cfg.GetVGName()
1953     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1954     cluster = self.cfg.GetClusterInfo()
1955     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1956     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1957     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1958     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1959                         for iname in instancelist)
1960     i_non_redundant = [] # Non redundant instances
1961     i_non_a_balanced = [] # Non auto-balanced instances
1962     n_offline = 0 # Count of offline nodes
1963     n_drained = 0 # Count of nodes being drained
1964     node_vol_should = {}
1965
1966     # FIXME: verify OS list
1967     # do local checksums
1968     master_files = [constants.CLUSTER_CONF_FILE]
1969     master_node = self.master_node = self.cfg.GetMasterNode()
1970     master_ip = self.cfg.GetMasterIP()
1971
1972     file_names = ssconf.SimpleStore().GetFileList()
1973     file_names.extend(constants.ALL_CERT_FILES)
1974     file_names.extend(master_files)
1975     if cluster.modify_etc_hosts:
1976       file_names.append(constants.ETC_HOSTS)
1977
1978     local_checksums = utils.FingerprintFiles(file_names)
1979
1980     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1981     node_verify_param = {
1982       constants.NV_FILELIST: file_names,
1983       constants.NV_NODELIST: [node.name for node in nodeinfo
1984                               if not node.offline],
1985       constants.NV_HYPERVISOR: hypervisors,
1986       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1987                                   node.secondary_ip) for node in nodeinfo
1988                                  if not node.offline],
1989       constants.NV_INSTANCELIST: hypervisors,
1990       constants.NV_VERSION: None,
1991       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1992       constants.NV_NODESETUP: None,
1993       constants.NV_TIME: None,
1994       constants.NV_MASTERIP: (master_node, master_ip),
1995       constants.NV_OSLIST: None,
1996       }
1997
1998     if vg_name is not None:
1999       node_verify_param[constants.NV_VGLIST] = None
2000       node_verify_param[constants.NV_LVLIST] = vg_name
2001       node_verify_param[constants.NV_PVLIST] = [vg_name]
2002       node_verify_param[constants.NV_DRBDLIST] = None
2003
2004     # Build our expected cluster state
2005     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2006                                                  name=node.name))
2007                       for node in nodeinfo)
2008
2009     for instance in instancelist:
2010       inst_config = instanceinfo[instance]
2011
2012       for nname in inst_config.all_nodes:
2013         if nname not in node_image:
2014           # ghost node
2015           gnode = self.NodeImage(name=nname)
2016           gnode.ghost = True
2017           node_image[nname] = gnode
2018
2019       inst_config.MapLVsByNode(node_vol_should)
2020
2021       pnode = inst_config.primary_node
2022       node_image[pnode].pinst.append(instance)
2023
2024       for snode in inst_config.secondary_nodes:
2025         nimg = node_image[snode]
2026         nimg.sinst.append(instance)
2027         if pnode not in nimg.sbp:
2028           nimg.sbp[pnode] = []
2029         nimg.sbp[pnode].append(instance)
2030
2031     # At this point, we have the in-memory data structures complete,
2032     # except for the runtime information, which we'll gather next
2033
2034     # Due to the way our RPC system works, exact response times cannot be
2035     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2036     # time before and after executing the request, we can at least have a time
2037     # window.
2038     nvinfo_starttime = time.time()
2039     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2040                                            self.cfg.GetClusterName())
2041     nvinfo_endtime = time.time()
2042
2043     all_drbd_map = self.cfg.ComputeDRBDMap()
2044
2045     feedback_fn("* Verifying node status")
2046
2047     refos_img = None
2048
2049     for node_i in nodeinfo:
2050       node = node_i.name
2051       nimg = node_image[node]
2052
2053       if node_i.offline:
2054         if verbose:
2055           feedback_fn("* Skipping offline node %s" % (node,))
2056         n_offline += 1
2057         continue
2058
2059       if node == master_node:
2060         ntype = "master"
2061       elif node_i.master_candidate:
2062         ntype = "master candidate"
2063       elif node_i.drained:
2064         ntype = "drained"
2065         n_drained += 1
2066       else:
2067         ntype = "regular"
2068       if verbose:
2069         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2070
2071       msg = all_nvinfo[node].fail_msg
2072       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2073       if msg:
2074         nimg.rpc_fail = True
2075         continue
2076
2077       nresult = all_nvinfo[node].payload
2078
2079       nimg.call_ok = self._VerifyNode(node_i, nresult)
2080       self._VerifyNodeNetwork(node_i, nresult)
2081       self._VerifyNodeLVM(node_i, nresult, vg_name)
2082       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2083                             master_files)
2084       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
2085       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2086
2087       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2088       self._UpdateNodeInstances(node_i, nresult, nimg)
2089       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2090       self._UpdateNodeOS(node_i, nresult, nimg)
2091       if not nimg.os_fail:
2092         if refos_img is None:
2093           refos_img = nimg
2094         self._VerifyNodeOS(node_i, nimg, refos_img)
2095
2096     feedback_fn("* Verifying instance status")
2097     for instance in instancelist:
2098       if verbose:
2099         feedback_fn("* Verifying instance %s" % instance)
2100       inst_config = instanceinfo[instance]
2101       self._VerifyInstance(instance, inst_config, node_image)
2102       inst_nodes_offline = []
2103
2104       pnode = inst_config.primary_node
2105       pnode_img = node_image[pnode]
2106       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2107                self.ENODERPC, pnode, "instance %s, connection to"
2108                " primary node failed", instance)
2109
2110       if pnode_img.offline:
2111         inst_nodes_offline.append(pnode)
2112
2113       # If the instance is non-redundant we cannot survive losing its primary
2114       # node, so we are not N+1 compliant. On the other hand we have no disk
2115       # templates with more than one secondary so that situation is not well
2116       # supported either.
2117       # FIXME: does not support file-backed instances
2118       if not inst_config.secondary_nodes:
2119         i_non_redundant.append(instance)
2120       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2121                instance, "instance has multiple secondary nodes: %s",
2122                utils.CommaJoin(inst_config.secondary_nodes),
2123                code=self.ETYPE_WARNING)
2124
2125       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2126         i_non_a_balanced.append(instance)
2127
2128       for snode in inst_config.secondary_nodes:
2129         s_img = node_image[snode]
2130         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2131                  "instance %s, connection to secondary node failed", instance)
2132
2133         if s_img.offline:
2134           inst_nodes_offline.append(snode)
2135
2136       # warn that the instance lives on offline nodes
2137       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2138                "instance lives on offline node(s) %s",
2139                utils.CommaJoin(inst_nodes_offline))
2140       # ... or ghost nodes
2141       for node in inst_config.all_nodes:
2142         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2143                  "instance lives on ghost node %s", node)
2144
2145     feedback_fn("* Verifying orphan volumes")
2146     self._VerifyOrphanVolumes(node_vol_should, node_image)
2147
2148     feedback_fn("* Verifying orphan instances")
2149     self._VerifyOrphanInstances(instancelist, node_image)
2150
2151     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2152       feedback_fn("* Verifying N+1 Memory redundancy")
2153       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2154
2155     feedback_fn("* Other Notes")
2156     if i_non_redundant:
2157       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2158                   % len(i_non_redundant))
2159
2160     if i_non_a_balanced:
2161       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2162                   % len(i_non_a_balanced))
2163
2164     if n_offline:
2165       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2166
2167     if n_drained:
2168       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2169
2170     return not self.bad
2171
2172   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2173     """Analyze the post-hooks' result
2174
2175     This method analyses the hook result, handles it, and sends some
2176     nicely-formatted feedback back to the user.
2177
2178     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2179         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2180     @param hooks_results: the results of the multi-node hooks rpc call
2181     @param feedback_fn: function used send feedback back to the caller
2182     @param lu_result: previous Exec result
2183     @return: the new Exec result, based on the previous result
2184         and hook results
2185
2186     """
2187     # We only really run POST phase hooks, and are only interested in
2188     # their results
2189     if phase == constants.HOOKS_PHASE_POST:
2190       # Used to change hooks' output to proper indentation
2191       indent_re = re.compile('^', re.M)
2192       feedback_fn("* Hooks Results")
2193       assert hooks_results, "invalid result from hooks"
2194
2195       for node_name in hooks_results:
2196         res = hooks_results[node_name]
2197         msg = res.fail_msg
2198         test = msg and not res.offline
2199         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2200                       "Communication failure in hooks execution: %s", msg)
2201         if res.offline or msg:
2202           # No need to investigate payload if node is offline or gave an error.
2203           # override manually lu_result here as _ErrorIf only
2204           # overrides self.bad
2205           lu_result = 1
2206           continue
2207         for script, hkr, output in res.payload:
2208           test = hkr == constants.HKR_FAIL
2209           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2210                         "Script %s failed, output:", script)
2211           if test:
2212             output = indent_re.sub('      ', output)
2213             feedback_fn("%s" % output)
2214             lu_result = 0
2215
2216       return lu_result
2217
2218
2219 class LUVerifyDisks(NoHooksLU):
2220   """Verifies the cluster disks status.
2221
2222   """
2223   _OP_REQP = []
2224   REQ_BGL = False
2225
2226   def ExpandNames(self):
2227     self.needed_locks = {
2228       locking.LEVEL_NODE: locking.ALL_SET,
2229       locking.LEVEL_INSTANCE: locking.ALL_SET,
2230     }
2231     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2232
2233   def Exec(self, feedback_fn):
2234     """Verify integrity of cluster disks.
2235
2236     @rtype: tuple of three items
2237     @return: a tuple of (dict of node-to-node_error, list of instances
2238         which need activate-disks, dict of instance: (node, volume) for
2239         missing volumes
2240
2241     """
2242     result = res_nodes, res_instances, res_missing = {}, [], {}
2243
2244     vg_name = self.cfg.GetVGName()
2245     nodes = utils.NiceSort(self.cfg.GetNodeList())
2246     instances = [self.cfg.GetInstanceInfo(name)
2247                  for name in self.cfg.GetInstanceList()]
2248
2249     nv_dict = {}
2250     for inst in instances:
2251       inst_lvs = {}
2252       if (not inst.admin_up or
2253           inst.disk_template not in constants.DTS_NET_MIRROR):
2254         continue
2255       inst.MapLVsByNode(inst_lvs)
2256       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2257       for node, vol_list in inst_lvs.iteritems():
2258         for vol in vol_list:
2259           nv_dict[(node, vol)] = inst
2260
2261     if not nv_dict:
2262       return result
2263
2264     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2265
2266     for node in nodes:
2267       # node_volume
2268       node_res = node_lvs[node]
2269       if node_res.offline:
2270         continue
2271       msg = node_res.fail_msg
2272       if msg:
2273         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2274         res_nodes[node] = msg
2275         continue
2276
2277       lvs = node_res.payload
2278       for lv_name, (_, _, lv_online) in lvs.items():
2279         inst = nv_dict.pop((node, lv_name), None)
2280         if (not lv_online and inst is not None
2281             and inst.name not in res_instances):
2282           res_instances.append(inst.name)
2283
2284     # any leftover items in nv_dict are missing LVs, let's arrange the
2285     # data better
2286     for key, inst in nv_dict.iteritems():
2287       if inst.name not in res_missing:
2288         res_missing[inst.name] = []
2289       res_missing[inst.name].append(key)
2290
2291     return result
2292
2293
2294 class LURepairDiskSizes(NoHooksLU):
2295   """Verifies the cluster disks sizes.
2296
2297   """
2298   _OP_REQP = [("instances", _TListOf(_TNonEmptyString))]
2299   REQ_BGL = False
2300
2301   def ExpandNames(self):
2302     if self.op.instances:
2303       self.wanted_names = []
2304       for name in self.op.instances:
2305         full_name = _ExpandInstanceName(self.cfg, name)
2306         self.wanted_names.append(full_name)
2307       self.needed_locks = {
2308         locking.LEVEL_NODE: [],
2309         locking.LEVEL_INSTANCE: self.wanted_names,
2310         }
2311       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2312     else:
2313       self.wanted_names = None
2314       self.needed_locks = {
2315         locking.LEVEL_NODE: locking.ALL_SET,
2316         locking.LEVEL_INSTANCE: locking.ALL_SET,
2317         }
2318     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2319
2320   def DeclareLocks(self, level):
2321     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2322       self._LockInstancesNodes(primary_only=True)
2323
2324   def CheckPrereq(self):
2325     """Check prerequisites.
2326
2327     This only checks the optional instance list against the existing names.
2328
2329     """
2330     if self.wanted_names is None:
2331       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2332
2333     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2334                              in self.wanted_names]
2335
2336   def _EnsureChildSizes(self, disk):
2337     """Ensure children of the disk have the needed disk size.
2338
2339     This is valid mainly for DRBD8 and fixes an issue where the
2340     children have smaller disk size.
2341
2342     @param disk: an L{ganeti.objects.Disk} object
2343
2344     """
2345     if disk.dev_type == constants.LD_DRBD8:
2346       assert disk.children, "Empty children for DRBD8?"
2347       fchild = disk.children[0]
2348       mismatch = fchild.size < disk.size
2349       if mismatch:
2350         self.LogInfo("Child disk has size %d, parent %d, fixing",
2351                      fchild.size, disk.size)
2352         fchild.size = disk.size
2353
2354       # and we recurse on this child only, not on the metadev
2355       return self._EnsureChildSizes(fchild) or mismatch
2356     else:
2357       return False
2358
2359   def Exec(self, feedback_fn):
2360     """Verify the size of cluster disks.
2361
2362     """
2363     # TODO: check child disks too
2364     # TODO: check differences in size between primary/secondary nodes
2365     per_node_disks = {}
2366     for instance in self.wanted_instances:
2367       pnode = instance.primary_node
2368       if pnode not in per_node_disks:
2369         per_node_disks[pnode] = []
2370       for idx, disk in enumerate(instance.disks):
2371         per_node_disks[pnode].append((instance, idx, disk))
2372
2373     changed = []
2374     for node, dskl in per_node_disks.items():
2375       newl = [v[2].Copy() for v in dskl]
2376       for dsk in newl:
2377         self.cfg.SetDiskID(dsk, node)
2378       result = self.rpc.call_blockdev_getsizes(node, newl)
2379       if result.fail_msg:
2380         self.LogWarning("Failure in blockdev_getsizes call to node"
2381                         " %s, ignoring", node)
2382         continue
2383       if len(result.data) != len(dskl):
2384         self.LogWarning("Invalid result from node %s, ignoring node results",
2385                         node)
2386         continue
2387       for ((instance, idx, disk), size) in zip(dskl, result.data):
2388         if size is None:
2389           self.LogWarning("Disk %d of instance %s did not return size"
2390                           " information, ignoring", idx, instance.name)
2391           continue
2392         if not isinstance(size, (int, long)):
2393           self.LogWarning("Disk %d of instance %s did not return valid"
2394                           " size information, ignoring", idx, instance.name)
2395           continue
2396         size = size >> 20
2397         if size != disk.size:
2398           self.LogInfo("Disk %d of instance %s has mismatched size,"
2399                        " correcting: recorded %d, actual %d", idx,
2400                        instance.name, disk.size, size)
2401           disk.size = size
2402           self.cfg.Update(instance, feedback_fn)
2403           changed.append((instance.name, idx, size))
2404         if self._EnsureChildSizes(disk):
2405           self.cfg.Update(instance, feedback_fn)
2406           changed.append((instance.name, idx, disk.size))
2407     return changed
2408
2409
2410 class LURenameCluster(LogicalUnit):
2411   """Rename the cluster.
2412
2413   """
2414   HPATH = "cluster-rename"
2415   HTYPE = constants.HTYPE_CLUSTER
2416   _OP_REQP = [("name", _TNonEmptyString)]
2417
2418   def BuildHooksEnv(self):
2419     """Build hooks env.
2420
2421     """
2422     env = {
2423       "OP_TARGET": self.cfg.GetClusterName(),
2424       "NEW_NAME": self.op.name,
2425       }
2426     mn = self.cfg.GetMasterNode()
2427     all_nodes = self.cfg.GetNodeList()
2428     return env, [mn], all_nodes
2429
2430   def CheckPrereq(self):
2431     """Verify that the passed name is a valid one.
2432
2433     """
2434     hostname = utils.GetHostInfo(self.op.name)
2435
2436     new_name = hostname.name
2437     self.ip = new_ip = hostname.ip
2438     old_name = self.cfg.GetClusterName()
2439     old_ip = self.cfg.GetMasterIP()
2440     if new_name == old_name and new_ip == old_ip:
2441       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2442                                  " cluster has changed",
2443                                  errors.ECODE_INVAL)
2444     if new_ip != old_ip:
2445       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2446         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2447                                    " reachable on the network. Aborting." %
2448                                    new_ip, errors.ECODE_NOTUNIQUE)
2449
2450     self.op.name = new_name
2451
2452   def Exec(self, feedback_fn):
2453     """Rename the cluster.
2454
2455     """
2456     clustername = self.op.name
2457     ip = self.ip
2458
2459     # shutdown the master IP
2460     master = self.cfg.GetMasterNode()
2461     result = self.rpc.call_node_stop_master(master, False)
2462     result.Raise("Could not disable the master role")
2463
2464     try:
2465       cluster = self.cfg.GetClusterInfo()
2466       cluster.cluster_name = clustername
2467       cluster.master_ip = ip
2468       self.cfg.Update(cluster, feedback_fn)
2469
2470       # update the known hosts file
2471       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2472       node_list = self.cfg.GetNodeList()
2473       try:
2474         node_list.remove(master)
2475       except ValueError:
2476         pass
2477       result = self.rpc.call_upload_file(node_list,
2478                                          constants.SSH_KNOWN_HOSTS_FILE)
2479       for to_node, to_result in result.iteritems():
2480         msg = to_result.fail_msg
2481         if msg:
2482           msg = ("Copy of file %s to node %s failed: %s" %
2483                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2484           self.proc.LogWarning(msg)
2485
2486     finally:
2487       result = self.rpc.call_node_start_master(master, False, False)
2488       msg = result.fail_msg
2489       if msg:
2490         self.LogWarning("Could not re-enable the master role on"
2491                         " the master, please restart manually: %s", msg)
2492
2493
2494 def _RecursiveCheckIfLVMBased(disk):
2495   """Check if the given disk or its children are lvm-based.
2496
2497   @type disk: L{objects.Disk}
2498   @param disk: the disk to check
2499   @rtype: boolean
2500   @return: boolean indicating whether a LD_LV dev_type was found or not
2501
2502   """
2503   if disk.children:
2504     for chdisk in disk.children:
2505       if _RecursiveCheckIfLVMBased(chdisk):
2506         return True
2507   return disk.dev_type == constants.LD_LV
2508
2509
2510 class LUSetClusterParams(LogicalUnit):
2511   """Change the parameters of the cluster.
2512
2513   """
2514   HPATH = "cluster-modify"
2515   HTYPE = constants.HTYPE_CLUSTER
2516   _OP_REQP = [
2517     ("hvparams", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2518     ("os_hvp", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2519     ("osparams", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2520     ("enabled_hypervisors",
2521      _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2522     ]
2523   _OP_DEFS = [
2524     ("candidate_pool_size", None),
2525     ("uid_pool", None),
2526     ("add_uids", None),
2527     ("remove_uids", None),
2528     ("hvparams", None),
2529     ("os_hvp", None),
2530     ]
2531   REQ_BGL = False
2532
2533   def CheckArguments(self):
2534     """Check parameters
2535
2536     """
2537     if self.op.candidate_pool_size is not None:
2538       try:
2539         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2540       except (ValueError, TypeError), err:
2541         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2542                                    str(err), errors.ECODE_INVAL)
2543       if self.op.candidate_pool_size < 1:
2544         raise errors.OpPrereqError("At least one master candidate needed",
2545                                    errors.ECODE_INVAL)
2546
2547     _CheckBooleanOpField(self.op, "maintain_node_health")
2548
2549     if self.op.uid_pool:
2550       uidpool.CheckUidPool(self.op.uid_pool)
2551
2552     if self.op.add_uids:
2553       uidpool.CheckUidPool(self.op.add_uids)
2554
2555     if self.op.remove_uids:
2556       uidpool.CheckUidPool(self.op.remove_uids)
2557
2558   def ExpandNames(self):
2559     # FIXME: in the future maybe other cluster params won't require checking on
2560     # all nodes to be modified.
2561     self.needed_locks = {
2562       locking.LEVEL_NODE: locking.ALL_SET,
2563     }
2564     self.share_locks[locking.LEVEL_NODE] = 1
2565
2566   def BuildHooksEnv(self):
2567     """Build hooks env.
2568
2569     """
2570     env = {
2571       "OP_TARGET": self.cfg.GetClusterName(),
2572       "NEW_VG_NAME": self.op.vg_name,
2573       }
2574     mn = self.cfg.GetMasterNode()
2575     return env, [mn], [mn]
2576
2577   def CheckPrereq(self):
2578     """Check prerequisites.
2579
2580     This checks whether the given params don't conflict and
2581     if the given volume group is valid.
2582
2583     """
2584     if self.op.vg_name is not None and not self.op.vg_name:
2585       instances = self.cfg.GetAllInstancesInfo().values()
2586       for inst in instances:
2587         for disk in inst.disks:
2588           if _RecursiveCheckIfLVMBased(disk):
2589             raise errors.OpPrereqError("Cannot disable lvm storage while"
2590                                        " lvm-based instances exist",
2591                                        errors.ECODE_INVAL)
2592
2593     node_list = self.acquired_locks[locking.LEVEL_NODE]
2594
2595     # if vg_name not None, checks given volume group on all nodes
2596     if self.op.vg_name:
2597       vglist = self.rpc.call_vg_list(node_list)
2598       for node in node_list:
2599         msg = vglist[node].fail_msg
2600         if msg:
2601           # ignoring down node
2602           self.LogWarning("Error while gathering data on node %s"
2603                           " (ignoring node): %s", node, msg)
2604           continue
2605         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2606                                               self.op.vg_name,
2607                                               constants.MIN_VG_SIZE)
2608         if vgstatus:
2609           raise errors.OpPrereqError("Error on node '%s': %s" %
2610                                      (node, vgstatus), errors.ECODE_ENVIRON)
2611
2612     self.cluster = cluster = self.cfg.GetClusterInfo()
2613     # validate params changes
2614     if self.op.beparams:
2615       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2616       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2617
2618     if self.op.nicparams:
2619       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2620       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2621       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2622       nic_errors = []
2623
2624       # check all instances for consistency
2625       for instance in self.cfg.GetAllInstancesInfo().values():
2626         for nic_idx, nic in enumerate(instance.nics):
2627           params_copy = copy.deepcopy(nic.nicparams)
2628           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2629
2630           # check parameter syntax
2631           try:
2632             objects.NIC.CheckParameterSyntax(params_filled)
2633           except errors.ConfigurationError, err:
2634             nic_errors.append("Instance %s, nic/%d: %s" %
2635                               (instance.name, nic_idx, err))
2636
2637           # if we're moving instances to routed, check that they have an ip
2638           target_mode = params_filled[constants.NIC_MODE]
2639           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2640             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2641                               (instance.name, nic_idx))
2642       if nic_errors:
2643         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2644                                    "\n".join(nic_errors))
2645
2646     # hypervisor list/parameters
2647     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2648     if self.op.hvparams:
2649       for hv_name, hv_dict in self.op.hvparams.items():
2650         if hv_name not in self.new_hvparams:
2651           self.new_hvparams[hv_name] = hv_dict
2652         else:
2653           self.new_hvparams[hv_name].update(hv_dict)
2654
2655     # os hypervisor parameters
2656     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2657     if self.op.os_hvp:
2658       for os_name, hvs in self.op.os_hvp.items():
2659         if os_name not in self.new_os_hvp:
2660           self.new_os_hvp[os_name] = hvs
2661         else:
2662           for hv_name, hv_dict in hvs.items():
2663             if hv_name not in self.new_os_hvp[os_name]:
2664               self.new_os_hvp[os_name][hv_name] = hv_dict
2665             else:
2666               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2667
2668     # os parameters
2669     self.new_osp = objects.FillDict(cluster.osparams, {})
2670     if self.op.osparams:
2671       for os_name, osp in self.op.osparams.items():
2672         if os_name not in self.new_osp:
2673           self.new_osp[os_name] = {}
2674
2675         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2676                                                   use_none=True)
2677
2678         if not self.new_osp[os_name]:
2679           # we removed all parameters
2680           del self.new_osp[os_name]
2681         else:
2682           # check the parameter validity (remote check)
2683           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2684                          os_name, self.new_osp[os_name])
2685
2686     # changes to the hypervisor list
2687     if self.op.enabled_hypervisors is not None:
2688       self.hv_list = self.op.enabled_hypervisors
2689       for hv in self.hv_list:
2690         # if the hypervisor doesn't already exist in the cluster
2691         # hvparams, we initialize it to empty, and then (in both
2692         # cases) we make sure to fill the defaults, as we might not
2693         # have a complete defaults list if the hypervisor wasn't
2694         # enabled before
2695         if hv not in new_hvp:
2696           new_hvp[hv] = {}
2697         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2698         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2699     else:
2700       self.hv_list = cluster.enabled_hypervisors
2701
2702     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2703       # either the enabled list has changed, or the parameters have, validate
2704       for hv_name, hv_params in self.new_hvparams.items():
2705         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2706             (self.op.enabled_hypervisors and
2707              hv_name in self.op.enabled_hypervisors)):
2708           # either this is a new hypervisor, or its parameters have changed
2709           hv_class = hypervisor.GetHypervisor(hv_name)
2710           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2711           hv_class.CheckParameterSyntax(hv_params)
2712           _CheckHVParams(self, node_list, hv_name, hv_params)
2713
2714     if self.op.os_hvp:
2715       # no need to check any newly-enabled hypervisors, since the
2716       # defaults have already been checked in the above code-block
2717       for os_name, os_hvp in self.new_os_hvp.items():
2718         for hv_name, hv_params in os_hvp.items():
2719           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2720           # we need to fill in the new os_hvp on top of the actual hv_p
2721           cluster_defaults = self.new_hvparams.get(hv_name, {})
2722           new_osp = objects.FillDict(cluster_defaults, hv_params)
2723           hv_class = hypervisor.GetHypervisor(hv_name)
2724           hv_class.CheckParameterSyntax(new_osp)
2725           _CheckHVParams(self, node_list, hv_name, new_osp)
2726
2727
2728   def Exec(self, feedback_fn):
2729     """Change the parameters of the cluster.
2730
2731     """
2732     if self.op.vg_name is not None:
2733       new_volume = self.op.vg_name
2734       if not new_volume:
2735         new_volume = None
2736       if new_volume != self.cfg.GetVGName():
2737         self.cfg.SetVGName(new_volume)
2738       else:
2739         feedback_fn("Cluster LVM configuration already in desired"
2740                     " state, not changing")
2741     if self.op.hvparams:
2742       self.cluster.hvparams = self.new_hvparams
2743     if self.op.os_hvp:
2744       self.cluster.os_hvp = self.new_os_hvp
2745     if self.op.enabled_hypervisors is not None:
2746       self.cluster.hvparams = self.new_hvparams
2747       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2748     if self.op.beparams:
2749       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2750     if self.op.nicparams:
2751       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2752     if self.op.osparams:
2753       self.cluster.osparams = self.new_osp
2754
2755     if self.op.candidate_pool_size is not None:
2756       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2757       # we need to update the pool size here, otherwise the save will fail
2758       _AdjustCandidatePool(self, [])
2759
2760     if self.op.maintain_node_health is not None:
2761       self.cluster.maintain_node_health = self.op.maintain_node_health
2762
2763     if self.op.add_uids is not None:
2764       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2765
2766     if self.op.remove_uids is not None:
2767       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2768
2769     if self.op.uid_pool is not None:
2770       self.cluster.uid_pool = self.op.uid_pool
2771
2772     self.cfg.Update(self.cluster, feedback_fn)
2773
2774
2775 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2776   """Distribute additional files which are part of the cluster configuration.
2777
2778   ConfigWriter takes care of distributing the config and ssconf files, but
2779   there are more files which should be distributed to all nodes. This function
2780   makes sure those are copied.
2781
2782   @param lu: calling logical unit
2783   @param additional_nodes: list of nodes not in the config to distribute to
2784
2785   """
2786   # 1. Gather target nodes
2787   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2788   dist_nodes = lu.cfg.GetOnlineNodeList()
2789   if additional_nodes is not None:
2790     dist_nodes.extend(additional_nodes)
2791   if myself.name in dist_nodes:
2792     dist_nodes.remove(myself.name)
2793
2794   # 2. Gather files to distribute
2795   dist_files = set([constants.ETC_HOSTS,
2796                     constants.SSH_KNOWN_HOSTS_FILE,
2797                     constants.RAPI_CERT_FILE,
2798                     constants.RAPI_USERS_FILE,
2799                     constants.CONFD_HMAC_KEY,
2800                     constants.CLUSTER_DOMAIN_SECRET_FILE,
2801                    ])
2802
2803   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2804   for hv_name in enabled_hypervisors:
2805     hv_class = hypervisor.GetHypervisor(hv_name)
2806     dist_files.update(hv_class.GetAncillaryFiles())
2807
2808   # 3. Perform the files upload
2809   for fname in dist_files:
2810     if os.path.exists(fname):
2811       result = lu.rpc.call_upload_file(dist_nodes, fname)
2812       for to_node, to_result in result.items():
2813         msg = to_result.fail_msg
2814         if msg:
2815           msg = ("Copy of file %s to node %s failed: %s" %
2816                  (fname, to_node, msg))
2817           lu.proc.LogWarning(msg)
2818
2819
2820 class LURedistributeConfig(NoHooksLU):
2821   """Force the redistribution of cluster configuration.
2822
2823   This is a very simple LU.
2824
2825   """
2826   _OP_REQP = []
2827   REQ_BGL = False
2828
2829   def ExpandNames(self):
2830     self.needed_locks = {
2831       locking.LEVEL_NODE: locking.ALL_SET,
2832     }
2833     self.share_locks[locking.LEVEL_NODE] = 1
2834
2835   def Exec(self, feedback_fn):
2836     """Redistribute the configuration.
2837
2838     """
2839     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2840     _RedistributeAncillaryFiles(self)
2841
2842
2843 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2844   """Sleep and poll for an instance's disk to sync.
2845
2846   """
2847   if not instance.disks or disks is not None and not disks:
2848     return True
2849
2850   disks = _ExpandCheckDisks(instance, disks)
2851
2852   if not oneshot:
2853     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2854
2855   node = instance.primary_node
2856
2857   for dev in disks:
2858     lu.cfg.SetDiskID(dev, node)
2859
2860   # TODO: Convert to utils.Retry
2861
2862   retries = 0
2863   degr_retries = 10 # in seconds, as we sleep 1 second each time
2864   while True:
2865     max_time = 0
2866     done = True
2867     cumul_degraded = False
2868     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2869     msg = rstats.fail_msg
2870     if msg:
2871       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2872       retries += 1
2873       if retries >= 10:
2874         raise errors.RemoteError("Can't contact node %s for mirror data,"
2875                                  " aborting." % node)
2876       time.sleep(6)
2877       continue
2878     rstats = rstats.payload
2879     retries = 0
2880     for i, mstat in enumerate(rstats):
2881       if mstat is None:
2882         lu.LogWarning("Can't compute data for node %s/%s",
2883                            node, disks[i].iv_name)
2884         continue
2885
2886       cumul_degraded = (cumul_degraded or
2887                         (mstat.is_degraded and mstat.sync_percent is None))
2888       if mstat.sync_percent is not None:
2889         done = False
2890         if mstat.estimated_time is not None:
2891           rem_time = ("%s remaining (estimated)" %
2892                       utils.FormatSeconds(mstat.estimated_time))
2893           max_time = mstat.estimated_time
2894         else:
2895           rem_time = "no time estimate"
2896         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2897                         (disks[i].iv_name, mstat.sync_percent, rem_time))
2898
2899     # if we're done but degraded, let's do a few small retries, to
2900     # make sure we see a stable and not transient situation; therefore
2901     # we force restart of the loop
2902     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2903       logging.info("Degraded disks found, %d retries left", degr_retries)
2904       degr_retries -= 1
2905       time.sleep(1)
2906       continue
2907
2908     if done or oneshot:
2909       break
2910
2911     time.sleep(min(60, max_time))
2912
2913   if done:
2914     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2915   return not cumul_degraded
2916
2917
2918 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2919   """Check that mirrors are not degraded.
2920
2921   The ldisk parameter, if True, will change the test from the
2922   is_degraded attribute (which represents overall non-ok status for
2923   the device(s)) to the ldisk (representing the local storage status).
2924
2925   """
2926   lu.cfg.SetDiskID(dev, node)
2927
2928   result = True
2929
2930   if on_primary or dev.AssembleOnSecondary():
2931     rstats = lu.rpc.call_blockdev_find(node, dev)
2932     msg = rstats.fail_msg
2933     if msg:
2934       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2935       result = False
2936     elif not rstats.payload:
2937       lu.LogWarning("Can't find disk on node %s", node)
2938       result = False
2939     else:
2940       if ldisk:
2941         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2942       else:
2943         result = result and not rstats.payload.is_degraded
2944
2945   if dev.children:
2946     for child in dev.children:
2947       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2948
2949   return result
2950
2951
2952 class LUDiagnoseOS(NoHooksLU):
2953   """Logical unit for OS diagnose/query.
2954
2955   """
2956   _OP_REQP = [
2957     ("output_fields", _TListOf(_TNonEmptyString)),
2958     ("names", _TListOf(_TNonEmptyString)),
2959     ]
2960   REQ_BGL = False
2961   _FIELDS_STATIC = utils.FieldSet()
2962   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2963                                    "parameters", "api_versions")
2964
2965   def CheckArguments(self):
2966     if self.op.names:
2967       raise errors.OpPrereqError("Selective OS query not supported",
2968                                  errors.ECODE_INVAL)
2969
2970     _CheckOutputFields(static=self._FIELDS_STATIC,
2971                        dynamic=self._FIELDS_DYNAMIC,
2972                        selected=self.op.output_fields)
2973
2974   def ExpandNames(self):
2975     # Lock all nodes, in shared mode
2976     # Temporary removal of locks, should be reverted later
2977     # TODO: reintroduce locks when they are lighter-weight
2978     self.needed_locks = {}
2979     #self.share_locks[locking.LEVEL_NODE] = 1
2980     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2981
2982   @staticmethod
2983   def _DiagnoseByOS(rlist):
2984     """Remaps a per-node return list into an a per-os per-node dictionary
2985
2986     @param rlist: a map with node names as keys and OS objects as values
2987
2988     @rtype: dict
2989     @return: a dictionary with osnames as keys and as value another
2990         map, with nodes as keys and tuples of (path, status, diagnose,
2991         variants, parameters, api_versions) as values, eg::
2992
2993           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2994                                      (/srv/..., False, "invalid api")],
2995                            "node2": [(/srv/..., True, "", [], [])]}
2996           }
2997
2998     """
2999     all_os = {}
3000     # we build here the list of nodes that didn't fail the RPC (at RPC
3001     # level), so that nodes with a non-responding node daemon don't
3002     # make all OSes invalid
3003     good_nodes = [node_name for node_name in rlist
3004                   if not rlist[node_name].fail_msg]
3005     for node_name, nr in rlist.items():
3006       if nr.fail_msg or not nr.payload:
3007         continue
3008       for (name, path, status, diagnose, variants,
3009            params, api_versions) in nr.payload:
3010         if name not in all_os:
3011           # build a list of nodes for this os containing empty lists
3012           # for each node in node_list
3013           all_os[name] = {}
3014           for nname in good_nodes:
3015             all_os[name][nname] = []
3016         # convert params from [name, help] to (name, help)
3017         params = [tuple(v) for v in params]
3018         all_os[name][node_name].append((path, status, diagnose,
3019                                         variants, params, api_versions))
3020     return all_os
3021
3022   def Exec(self, feedback_fn):
3023     """Compute the list of OSes.
3024
3025     """
3026     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3027     node_data = self.rpc.call_os_diagnose(valid_nodes)
3028     pol = self._DiagnoseByOS(node_data)
3029     output = []
3030
3031     for os_name, os_data in pol.items():
3032       row = []
3033       valid = True
3034       (variants, params, api_versions) = null_state = (set(), set(), set())
3035       for idx, osl in enumerate(os_data.values()):
3036         valid = bool(valid and osl and osl[0][1])
3037         if not valid:
3038           (variants, params, api_versions) = null_state
3039           break
3040         node_variants, node_params, node_api = osl[0][3:6]
3041         if idx == 0: # first entry
3042           variants = set(node_variants)
3043           params = set(node_params)
3044           api_versions = set(node_api)
3045         else: # keep consistency
3046           variants.intersection_update(node_variants)
3047           params.intersection_update(node_params)
3048           api_versions.intersection_update(node_api)
3049
3050       for field in self.op.output_fields:
3051         if field == "name":
3052           val = os_name
3053         elif field == "valid":
3054           val = valid
3055         elif field == "node_status":
3056           # this is just a copy of the dict
3057           val = {}
3058           for node_name, nos_list in os_data.items():
3059             val[node_name] = nos_list
3060         elif field == "variants":
3061           val = list(variants)
3062         elif field == "parameters":
3063           val = list(params)
3064         elif field == "api_versions":
3065           val = list(api_versions)
3066         else:
3067           raise errors.ParameterError(field)
3068         row.append(val)
3069       output.append(row)
3070
3071     return output
3072
3073
3074 class LURemoveNode(LogicalUnit):
3075   """Logical unit for removing a node.
3076
3077   """
3078   HPATH = "node-remove"
3079   HTYPE = constants.HTYPE_NODE
3080   _OP_REQP = [("node_name", _TNonEmptyString)]
3081
3082   def BuildHooksEnv(self):
3083     """Build hooks env.
3084
3085     This doesn't run on the target node in the pre phase as a failed
3086     node would then be impossible to remove.
3087
3088     """
3089     env = {
3090       "OP_TARGET": self.op.node_name,
3091       "NODE_NAME": self.op.node_name,
3092       }
3093     all_nodes = self.cfg.GetNodeList()
3094     try:
3095       all_nodes.remove(self.op.node_name)
3096     except ValueError:
3097       logging.warning("Node %s which is about to be removed not found"
3098                       " in the all nodes list", self.op.node_name)
3099     return env, all_nodes, all_nodes
3100
3101   def CheckPrereq(self):
3102     """Check prerequisites.
3103
3104     This checks:
3105      - the node exists in the configuration
3106      - it does not have primary or secondary instances
3107      - it's not the master
3108
3109     Any errors are signaled by raising errors.OpPrereqError.
3110
3111     """
3112     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3113     node = self.cfg.GetNodeInfo(self.op.node_name)
3114     assert node is not None
3115
3116     instance_list = self.cfg.GetInstanceList()
3117
3118     masternode = self.cfg.GetMasterNode()
3119     if node.name == masternode:
3120       raise errors.OpPrereqError("Node is the master node,"
3121                                  " you need to failover first.",
3122                                  errors.ECODE_INVAL)
3123
3124     for instance_name in instance_list:
3125       instance = self.cfg.GetInstanceInfo(instance_name)
3126       if node.name in instance.all_nodes:
3127         raise errors.OpPrereqError("Instance %s is still running on the node,"
3128                                    " please remove first." % instance_name,
3129                                    errors.ECODE_INVAL)
3130     self.op.node_name = node.name
3131     self.node = node
3132
3133   def Exec(self, feedback_fn):
3134     """Removes the node from the cluster.
3135
3136     """
3137     node = self.node
3138     logging.info("Stopping the node daemon and removing configs from node %s",
3139                  node.name)
3140
3141     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3142
3143     # Promote nodes to master candidate as needed
3144     _AdjustCandidatePool(self, exceptions=[node.name])
3145     self.context.RemoveNode(node.name)
3146
3147     # Run post hooks on the node before it's removed
3148     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3149     try:
3150       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3151     except:
3152       # pylint: disable-msg=W0702
3153       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3154
3155     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3156     msg = result.fail_msg
3157     if msg:
3158       self.LogWarning("Errors encountered on the remote node while leaving"
3159                       " the cluster: %s", msg)
3160
3161     # Remove node from our /etc/hosts
3162     if self.cfg.GetClusterInfo().modify_etc_hosts:
3163       # FIXME: this should be done via an rpc call to node daemon
3164       utils.RemoveHostFromEtcHosts(node.name)
3165       _RedistributeAncillaryFiles(self)
3166
3167
3168 class LUQueryNodes(NoHooksLU):
3169   """Logical unit for querying nodes.
3170
3171   """
3172   # pylint: disable-msg=W0142
3173   _OP_REQP = [
3174     ("output_fields", _TListOf(_TNonEmptyString)),
3175     ("names", _TListOf(_TNonEmptyString)),
3176     ("use_locking", _TBool),
3177     ]
3178   REQ_BGL = False
3179
3180   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3181                     "master_candidate", "offline", "drained"]
3182
3183   _FIELDS_DYNAMIC = utils.FieldSet(
3184     "dtotal", "dfree",
3185     "mtotal", "mnode", "mfree",
3186     "bootid",
3187     "ctotal", "cnodes", "csockets",
3188     )
3189
3190   _FIELDS_STATIC = utils.FieldSet(*[
3191     "pinst_cnt", "sinst_cnt",
3192     "pinst_list", "sinst_list",
3193     "pip", "sip", "tags",
3194     "master",
3195     "role"] + _SIMPLE_FIELDS
3196     )
3197
3198   def CheckArguments(self):
3199     _CheckOutputFields(static=self._FIELDS_STATIC,
3200                        dynamic=self._FIELDS_DYNAMIC,
3201                        selected=self.op.output_fields)
3202
3203   def ExpandNames(self):
3204     self.needed_locks = {}
3205     self.share_locks[locking.LEVEL_NODE] = 1
3206
3207     if self.op.names:
3208       self.wanted = _GetWantedNodes(self, self.op.names)
3209     else:
3210       self.wanted = locking.ALL_SET
3211
3212     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3213     self.do_locking = self.do_node_query and self.op.use_locking
3214     if self.do_locking:
3215       # if we don't request only static fields, we need to lock the nodes
3216       self.needed_locks[locking.LEVEL_NODE] = self.wanted
3217
3218   def Exec(self, feedback_fn):
3219     """Computes the list of nodes and their attributes.
3220
3221     """
3222     all_info = self.cfg.GetAllNodesInfo()
3223     if self.do_locking:
3224       nodenames = self.acquired_locks[locking.LEVEL_NODE]
3225     elif self.wanted != locking.ALL_SET:
3226       nodenames = self.wanted
3227       missing = set(nodenames).difference(all_info.keys())
3228       if missing:
3229         raise errors.OpExecError(
3230           "Some nodes were removed before retrieving their data: %s" % missing)
3231     else:
3232       nodenames = all_info.keys()
3233
3234     nodenames = utils.NiceSort(nodenames)
3235     nodelist = [all_info[name] for name in nodenames]
3236
3237     # begin data gathering
3238
3239     if self.do_node_query:
3240       live_data = {}
3241       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3242                                           self.cfg.GetHypervisorType())
3243       for name in nodenames:
3244         nodeinfo = node_data[name]
3245         if not nodeinfo.fail_msg and nodeinfo.payload:
3246           nodeinfo = nodeinfo.payload
3247           fn = utils.TryConvert
3248           live_data[name] = {
3249             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3250             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3251             "mfree": fn(int, nodeinfo.get('memory_free', None)),
3252             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3253             "dfree": fn(int, nodeinfo.get('vg_free', None)),
3254             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3255             "bootid": nodeinfo.get('bootid', None),
3256             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3257             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3258             }
3259         else:
3260           live_data[name] = {}
3261     else:
3262       live_data = dict.fromkeys(nodenames, {})
3263
3264     node_to_primary = dict([(name, set()) for name in nodenames])
3265     node_to_secondary = dict([(name, set()) for name in nodenames])
3266
3267     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3268                              "sinst_cnt", "sinst_list"))
3269     if inst_fields & frozenset(self.op.output_fields):
3270       inst_data = self.cfg.GetAllInstancesInfo()
3271
3272       for inst in inst_data.values():
3273         if inst.primary_node in node_to_primary:
3274           node_to_primary[inst.primary_node].add(inst.name)
3275         for secnode in inst.secondary_nodes:
3276           if secnode in node_to_secondary:
3277             node_to_secondary[secnode].add(inst.name)
3278
3279     master_node = self.cfg.GetMasterNode()
3280
3281     # end data gathering
3282
3283     output = []
3284     for node in nodelist:
3285       node_output = []
3286       for field in self.op.output_fields:
3287         if field in self._SIMPLE_FIELDS:
3288           val = getattr(node, field)
3289         elif field == "pinst_list":
3290           val = list(node_to_primary[node.name])
3291         elif field == "sinst_list":
3292           val = list(node_to_secondary[node.name])
3293         elif field == "pinst_cnt":
3294           val = len(node_to_primary[node.name])
3295         elif field == "sinst_cnt":
3296           val = len(node_to_secondary[node.name])
3297         elif field == "pip":
3298           val = node.primary_ip
3299         elif field == "sip":
3300           val = node.secondary_ip
3301         elif field == "tags":
3302           val = list(node.GetTags())
3303         elif field == "master":
3304           val = node.name == master_node
3305         elif self._FIELDS_DYNAMIC.Matches(field):
3306           val = live_data[node.name].get(field, None)
3307         elif field == "role":
3308           if node.name == master_node:
3309             val = "M"
3310           elif node.master_candidate:
3311             val = "C"
3312           elif node.drained:
3313             val = "D"
3314           elif node.offline:
3315             val = "O"
3316           else:
3317             val = "R"
3318         else:
3319           raise errors.ParameterError(field)
3320         node_output.append(val)
3321       output.append(node_output)
3322
3323     return output
3324
3325
3326 class LUQueryNodeVolumes(NoHooksLU):
3327   """Logical unit for getting volumes on node(s).
3328
3329   """
3330   _OP_REQP = [
3331     ("nodes", _TListOf(_TNonEmptyString)),
3332     ("output_fields", _TListOf(_TNonEmptyString)),
3333     ]
3334   REQ_BGL = False
3335   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3336   _FIELDS_STATIC = utils.FieldSet("node")
3337
3338   def CheckArguments(self):
3339     _CheckOutputFields(static=self._FIELDS_STATIC,
3340                        dynamic=self._FIELDS_DYNAMIC,
3341                        selected=self.op.output_fields)
3342
3343   def ExpandNames(self):
3344     self.needed_locks = {}
3345     self.share_locks[locking.LEVEL_NODE] = 1
3346     if not self.op.nodes:
3347       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3348     else:
3349       self.needed_locks[locking.LEVEL_NODE] = \
3350         _GetWantedNodes(self, self.op.nodes)
3351
3352   def Exec(self, feedback_fn):
3353     """Computes the list of nodes and their attributes.
3354
3355     """
3356     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3357     volumes = self.rpc.call_node_volumes(nodenames)
3358
3359     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3360              in self.cfg.GetInstanceList()]
3361
3362     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3363
3364     output = []
3365     for node in nodenames:
3366       nresult = volumes[node]
3367       if nresult.offline:
3368         continue
3369       msg = nresult.fail_msg
3370       if msg:
3371         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3372         continue
3373
3374       node_vols = nresult.payload[:]
3375       node_vols.sort(key=lambda vol: vol['dev'])
3376
3377       for vol in node_vols:
3378         node_output = []
3379         for field in self.op.output_fields:
3380           if field == "node":
3381             val = node
3382           elif field == "phys":
3383             val = vol['dev']
3384           elif field == "vg":
3385             val = vol['vg']
3386           elif field == "name":
3387             val = vol['name']
3388           elif field == "size":
3389             val = int(float(vol['size']))
3390           elif field == "instance":
3391             for inst in ilist:
3392               if node not in lv_by_node[inst]:
3393                 continue
3394               if vol['name'] in lv_by_node[inst][node]:
3395                 val = inst.name
3396                 break
3397             else:
3398               val = '-'
3399           else:
3400             raise errors.ParameterError(field)
3401           node_output.append(str(val))
3402
3403         output.append(node_output)
3404
3405     return output
3406
3407
3408 class LUQueryNodeStorage(NoHooksLU):
3409   """Logical unit for getting information on storage units on node(s).
3410
3411   """
3412   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3413   _OP_REQP = [
3414     ("nodes", _TListOf(_TNonEmptyString)),
3415     ("storage_type", _CheckStorageType),
3416     ("output_fields", _TListOf(_TNonEmptyString)),
3417     ]
3418   _OP_DEFS = [("name", None)]
3419   REQ_BGL = False
3420
3421   def CheckArguments(self):
3422     _CheckOutputFields(static=self._FIELDS_STATIC,
3423                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3424                        selected=self.op.output_fields)
3425
3426   def ExpandNames(self):
3427     self.needed_locks = {}
3428     self.share_locks[locking.LEVEL_NODE] = 1
3429
3430     if self.op.nodes:
3431       self.needed_locks[locking.LEVEL_NODE] = \
3432         _GetWantedNodes(self, self.op.nodes)
3433     else:
3434       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3435
3436   def Exec(self, feedback_fn):
3437     """Computes the list of nodes and their attributes.
3438
3439     """
3440     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3441
3442     # Always get name to sort by
3443     if constants.SF_NAME in self.op.output_fields:
3444       fields = self.op.output_fields[:]
3445     else:
3446       fields = [constants.SF_NAME] + self.op.output_fields
3447
3448     # Never ask for node or type as it's only known to the LU
3449     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3450       while extra in fields:
3451         fields.remove(extra)
3452
3453     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3454     name_idx = field_idx[constants.SF_NAME]
3455
3456     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3457     data = self.rpc.call_storage_list(self.nodes,
3458                                       self.op.storage_type, st_args,
3459                                       self.op.name, fields)
3460
3461     result = []
3462
3463     for node in utils.NiceSort(self.nodes):
3464       nresult = data[node]
3465       if nresult.offline:
3466         continue
3467
3468       msg = nresult.fail_msg
3469       if msg:
3470         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3471         continue
3472
3473       rows = dict([(row[name_idx], row) for row in nresult.payload])
3474
3475       for name in utils.NiceSort(rows.keys()):
3476         row = rows[name]
3477
3478         out = []
3479
3480         for field in self.op.output_fields:
3481           if field == constants.SF_NODE:
3482             val = node
3483           elif field == constants.SF_TYPE:
3484             val = self.op.storage_type
3485           elif field in field_idx:
3486             val = row[field_idx[field]]
3487           else:
3488             raise errors.ParameterError(field)
3489
3490           out.append(val)
3491
3492         result.append(out)
3493
3494     return result
3495
3496
3497 class LUModifyNodeStorage(NoHooksLU):
3498   """Logical unit for modifying a storage volume on a node.
3499
3500   """
3501   _OP_REQP = [
3502     ("node_name", _TNonEmptyString),
3503     ("storage_type", _CheckStorageType),
3504     ("name", _TNonEmptyString),
3505     ("changes", _TDict),
3506     ]
3507   REQ_BGL = False
3508
3509   def CheckArguments(self):
3510     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3511
3512     storage_type = self.op.storage_type
3513
3514     try:
3515       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3516     except KeyError:
3517       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3518                                  " modified" % storage_type,
3519                                  errors.ECODE_INVAL)
3520
3521     diff = set(self.op.changes.keys()) - modifiable
3522     if diff:
3523       raise errors.OpPrereqError("The following fields can not be modified for"
3524                                  " storage units of type '%s': %r" %
3525                                  (storage_type, list(diff)),
3526                                  errors.ECODE_INVAL)
3527
3528   def ExpandNames(self):
3529     self.needed_locks = {
3530       locking.LEVEL_NODE: self.op.node_name,
3531       }
3532
3533   def Exec(self, feedback_fn):
3534     """Computes the list of nodes and their attributes.
3535
3536     """
3537     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3538     result = self.rpc.call_storage_modify(self.op.node_name,
3539                                           self.op.storage_type, st_args,
3540                                           self.op.name, self.op.changes)
3541     result.Raise("Failed to modify storage unit '%s' on %s" %
3542                  (self.op.name, self.op.node_name))
3543
3544
3545 class LUAddNode(LogicalUnit):
3546   """Logical unit for adding node to the cluster.
3547
3548   """
3549   HPATH = "node-add"
3550   HTYPE = constants.HTYPE_NODE
3551   _OP_REQP = [
3552     ("node_name", _TNonEmptyString),
3553     ]
3554   _OP_DEFS = [("secondary_ip", None)]
3555
3556   def CheckArguments(self):
3557     # validate/normalize the node name
3558     self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3559
3560   def BuildHooksEnv(self):
3561     """Build hooks env.
3562
3563     This will run on all nodes before, and on all nodes + the new node after.
3564
3565     """
3566     env = {
3567       "OP_TARGET": self.op.node_name,
3568       "NODE_NAME": self.op.node_name,
3569       "NODE_PIP": self.op.primary_ip,
3570       "NODE_SIP": self.op.secondary_ip,
3571       }
3572     nodes_0 = self.cfg.GetNodeList()
3573     nodes_1 = nodes_0 + [self.op.node_name, ]
3574     return env, nodes_0, nodes_1
3575
3576   def CheckPrereq(self):
3577     """Check prerequisites.
3578
3579     This checks:
3580      - the new node is not already in the config
3581      - it is resolvable
3582      - its parameters (single/dual homed) matches the cluster
3583
3584     Any errors are signaled by raising errors.OpPrereqError.
3585
3586     """
3587     node_name = self.op.node_name
3588     cfg = self.cfg
3589
3590     dns_data = utils.GetHostInfo(node_name)
3591
3592     node = dns_data.name
3593     primary_ip = self.op.primary_ip = dns_data.ip
3594     if self.op.secondary_ip is None:
3595       self.op.secondary_ip = primary_ip
3596     if not utils.IsValidIP(self.op.secondary_ip):
3597       raise errors.OpPrereqError("Invalid secondary IP given",
3598                                  errors.ECODE_INVAL)
3599     secondary_ip = self.op.secondary_ip
3600
3601     node_list = cfg.GetNodeList()
3602     if not self.op.readd and node in node_list:
3603       raise errors.OpPrereqError("Node %s is already in the configuration" %
3604                                  node, errors.ECODE_EXISTS)
3605     elif self.op.readd and node not in node_list:
3606       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3607                                  errors.ECODE_NOENT)
3608
3609     self.changed_primary_ip = False
3610
3611     for existing_node_name in node_list:
3612       existing_node = cfg.GetNodeInfo(existing_node_name)
3613
3614       if self.op.readd and node == existing_node_name:
3615         if existing_node.secondary_ip != secondary_ip:
3616           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3617                                      " address configuration as before",
3618                                      errors.ECODE_INVAL)
3619         if existing_node.primary_ip != primary_ip:
3620           self.changed_primary_ip = True
3621
3622         continue
3623
3624       if (existing_node.primary_ip == primary_ip or
3625           existing_node.secondary_ip == primary_ip or
3626           existing_node.primary_ip == secondary_ip or
3627           existing_node.secondary_ip == secondary_ip):
3628         raise errors.OpPrereqError("New node ip address(es) conflict with"
3629                                    " existing node %s" % existing_node.name,
3630                                    errors.ECODE_NOTUNIQUE)
3631
3632     # check that the type of the node (single versus dual homed) is the
3633     # same as for the master
3634     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3635     master_singlehomed = myself.secondary_ip == myself.primary_ip
3636     newbie_singlehomed = secondary_ip == primary_ip
3637     if master_singlehomed != newbie_singlehomed:
3638       if master_singlehomed:
3639         raise errors.OpPrereqError("The master has no private ip but the"
3640                                    " new node has one",
3641                                    errors.ECODE_INVAL)
3642       else:
3643         raise errors.OpPrereqError("The master has a private ip but the"
3644                                    " new node doesn't have one",
3645                                    errors.ECODE_INVAL)
3646
3647     # checks reachability
3648     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3649       raise errors.OpPrereqError("Node not reachable by ping",
3650                                  errors.ECODE_ENVIRON)
3651
3652     if not newbie_singlehomed:
3653       # check reachability from my secondary ip to newbie's secondary ip
3654       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3655                            source=myself.secondary_ip):
3656         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3657                                    " based ping to noded port",
3658                                    errors.ECODE_ENVIRON)
3659
3660     if self.op.readd:
3661       exceptions = [node]
3662     else:
3663       exceptions = []
3664
3665     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3666
3667     if self.op.readd:
3668       self.new_node = self.cfg.GetNodeInfo(node)
3669       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3670     else:
3671       self.new_node = objects.Node(name=node,
3672                                    primary_ip=primary_ip,
3673                                    secondary_ip=secondary_ip,
3674                                    master_candidate=self.master_candidate,
3675                                    offline=False, drained=False)
3676
3677   def Exec(self, feedback_fn):
3678     """Adds the new node to the cluster.
3679
3680     """
3681     new_node = self.new_node
3682     node = new_node.name
3683
3684     # for re-adds, reset the offline/drained/master-candidate flags;
3685     # we need to reset here, otherwise offline would prevent RPC calls
3686     # later in the procedure; this also means that if the re-add
3687     # fails, we are left with a non-offlined, broken node
3688     if self.op.readd:
3689       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3690       self.LogInfo("Readding a node, the offline/drained flags were reset")
3691       # if we demote the node, we do cleanup later in the procedure
3692       new_node.master_candidate = self.master_candidate
3693       if self.changed_primary_ip:
3694         new_node.primary_ip = self.op.primary_ip
3695
3696     # notify the user about any possible mc promotion
3697     if new_node.master_candidate:
3698       self.LogInfo("Node will be a master candidate")
3699
3700     # check connectivity
3701     result = self.rpc.call_version([node])[node]
3702     result.Raise("Can't get version information from node %s" % node)
3703     if constants.PROTOCOL_VERSION == result.payload:
3704       logging.info("Communication to node %s fine, sw version %s match",
3705                    node, result.payload)
3706     else:
3707       raise errors.OpExecError("Version mismatch master version %s,"
3708                                " node version %s" %
3709                                (constants.PROTOCOL_VERSION, result.payload))
3710
3711     # setup ssh on node
3712     if self.cfg.GetClusterInfo().modify_ssh_setup:
3713       logging.info("Copy ssh key to node %s", node)
3714       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3715       keyarray = []
3716       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3717                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3718                   priv_key, pub_key]
3719
3720       for i in keyfiles:
3721         keyarray.append(utils.ReadFile(i))
3722
3723       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3724                                       keyarray[2], keyarray[3], keyarray[4],
3725                                       keyarray[5])
3726       result.Raise("Cannot transfer ssh keys to the new node")
3727
3728     # Add node to our /etc/hosts, and add key to known_hosts
3729     if self.cfg.GetClusterInfo().modify_etc_hosts:
3730       # FIXME: this should be done via an rpc call to node daemon
3731       utils.AddHostToEtcHosts(new_node.name)
3732
3733     if new_node.secondary_ip != new_node.primary_ip:
3734       result = self.rpc.call_node_has_ip_address(new_node.name,
3735                                                  new_node.secondary_ip)
3736       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3737                    prereq=True, ecode=errors.ECODE_ENVIRON)
3738       if not result.payload:
3739         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3740                                  " you gave (%s). Please fix and re-run this"
3741                                  " command." % new_node.secondary_ip)
3742
3743     node_verify_list = [self.cfg.GetMasterNode()]
3744     node_verify_param = {
3745       constants.NV_NODELIST: [node],
3746       # TODO: do a node-net-test as well?
3747     }
3748
3749     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3750                                        self.cfg.GetClusterName())
3751     for verifier in node_verify_list:
3752       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3753       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3754       if nl_payload:
3755         for failed in nl_payload:
3756           feedback_fn("ssh/hostname verification failed"
3757                       " (checking from %s): %s" %
3758                       (verifier, nl_payload[failed]))
3759         raise errors.OpExecError("ssh/hostname verification failed.")
3760
3761     if self.op.readd:
3762       _RedistributeAncillaryFiles(self)
3763       self.context.ReaddNode(new_node)
3764       # make sure we redistribute the config
3765       self.cfg.Update(new_node, feedback_fn)
3766       # and make sure the new node will not have old files around
3767       if not new_node.master_candidate:
3768         result = self.rpc.call_node_demote_from_mc(new_node.name)
3769         msg = result.fail_msg
3770         if msg:
3771           self.LogWarning("Node failed to demote itself from master"
3772                           " candidate status: %s" % msg)
3773     else:
3774       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3775       self.context.AddNode(new_node, self.proc.GetECId())
3776
3777
3778 class LUSetNodeParams(LogicalUnit):
3779   """Modifies the parameters of a node.
3780
3781   """
3782   HPATH = "node-modify"
3783   HTYPE = constants.HTYPE_NODE
3784   _OP_REQP = [("node_name", _TNonEmptyString)]
3785   REQ_BGL = False
3786
3787   def CheckArguments(self):
3788     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3789     _CheckBooleanOpField(self.op, 'master_candidate')
3790     _CheckBooleanOpField(self.op, 'offline')
3791     _CheckBooleanOpField(self.op, 'drained')
3792     _CheckBooleanOpField(self.op, 'auto_promote')
3793     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3794     if all_mods.count(None) == 3:
3795       raise errors.OpPrereqError("Please pass at least one modification",
3796                                  errors.ECODE_INVAL)
3797     if all_mods.count(True) > 1:
3798       raise errors.OpPrereqError("Can't set the node into more than one"
3799                                  " state at the same time",
3800                                  errors.ECODE_INVAL)
3801
3802     # Boolean value that tells us whether we're offlining or draining the node
3803     self.offline_or_drain = (self.op.offline == True or
3804                              self.op.drained == True)
3805     self.deoffline_or_drain = (self.op.offline == False or
3806                                self.op.drained == False)
3807     self.might_demote = (self.op.master_candidate == False or
3808                          self.offline_or_drain)
3809
3810     self.lock_all = self.op.auto_promote and self.might_demote
3811
3812
3813   def ExpandNames(self):
3814     if self.lock_all:
3815       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3816     else:
3817       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3818
3819   def BuildHooksEnv(self):
3820     """Build hooks env.
3821
3822     This runs on the master node.
3823
3824     """
3825     env = {
3826       "OP_TARGET": self.op.node_name,
3827       "MASTER_CANDIDATE": str(self.op.master_candidate),
3828       "OFFLINE": str(self.op.offline),
3829       "DRAINED": str(self.op.drained),
3830       }
3831     nl = [self.cfg.GetMasterNode(),
3832           self.op.node_name]
3833     return env, nl, nl
3834
3835   def CheckPrereq(self):
3836     """Check prerequisites.
3837
3838     This only checks the instance list against the existing names.
3839
3840     """
3841     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3842
3843     if (self.op.master_candidate is not None or
3844         self.op.drained is not None or
3845         self.op.offline is not None):
3846       # we can't change the master's node flags
3847       if self.op.node_name == self.cfg.GetMasterNode():
3848         raise errors.OpPrereqError("The master role can be changed"
3849                                    " only via masterfailover",
3850                                    errors.ECODE_INVAL)
3851
3852
3853     if node.master_candidate and self.might_demote and not self.lock_all:
3854       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3855       # check if after removing the current node, we're missing master
3856       # candidates
3857       (mc_remaining, mc_should, _) = \
3858           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3859       if mc_remaining < mc_should:
3860         raise errors.OpPrereqError("Not enough master candidates, please"
3861                                    " pass auto_promote to allow promotion",
3862                                    errors.ECODE_INVAL)
3863
3864     if (self.op.master_candidate == True and
3865         ((node.offline and not self.op.offline == False) or
3866          (node.drained and not self.op.drained == False))):
3867       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3868                                  " to master_candidate" % node.name,
3869                                  errors.ECODE_INVAL)
3870
3871     # If we're being deofflined/drained, we'll MC ourself if needed
3872     if (self.deoffline_or_drain and not self.offline_or_drain and not
3873         self.op.master_candidate == True and not node.master_candidate):
3874       self.op.master_candidate = _DecideSelfPromotion(self)
3875       if self.op.master_candidate:
3876         self.LogInfo("Autopromoting node to master candidate")
3877
3878     return
3879
3880   def Exec(self, feedback_fn):
3881     """Modifies a node.
3882
3883     """
3884     node = self.node
3885
3886     result = []
3887     changed_mc = False
3888
3889     if self.op.offline is not None:
3890       node.offline = self.op.offline
3891       result.append(("offline", str(self.op.offline)))
3892       if self.op.offline == True:
3893         if node.master_candidate:
3894           node.master_candidate = False
3895           changed_mc = True
3896           result.append(("master_candidate", "auto-demotion due to offline"))
3897         if node.drained:
3898           node.drained = False
3899           result.append(("drained", "clear drained status due to offline"))
3900
3901     if self.op.master_candidate is not None:
3902       node.master_candidate = self.op.master_candidate
3903       changed_mc = True
3904       result.append(("master_candidate", str(self.op.master_candidate)))
3905       if self.op.master_candidate == False:
3906         rrc = self.rpc.call_node_demote_from_mc(node.name)
3907         msg = rrc.fail_msg
3908         if msg:
3909           self.LogWarning("Node failed to demote itself: %s" % msg)
3910
3911     if self.op.drained is not None:
3912       node.drained = self.op.drained
3913       result.append(("drained", str(self.op.drained)))
3914       if self.op.drained == True:
3915         if node.master_candidate:
3916           node.master_candidate = False
3917           changed_mc = True
3918           result.append(("master_candidate", "auto-demotion due to drain"))
3919           rrc = self.rpc.call_node_demote_from_mc(node.name)
3920           msg = rrc.fail_msg
3921           if msg:
3922             self.LogWarning("Node failed to demote itself: %s" % msg)
3923         if node.offline:
3924           node.offline = False
3925           result.append(("offline", "clear offline status due to drain"))
3926
3927     # we locked all nodes, we adjust the CP before updating this node
3928     if self.lock_all:
3929       _AdjustCandidatePool(self, [node.name])
3930
3931     # this will trigger configuration file update, if needed
3932     self.cfg.Update(node, feedback_fn)
3933
3934     # this will trigger job queue propagation or cleanup
3935     if changed_mc:
3936       self.context.ReaddNode(node)
3937
3938     return result
3939
3940
3941 class LUPowercycleNode(NoHooksLU):
3942   """Powercycles a node.
3943
3944   """
3945   _OP_REQP = [
3946     ("node_name", _TNonEmptyString),
3947     ("force", _TBool),
3948     ]
3949   REQ_BGL = False
3950
3951   def CheckArguments(self):
3952     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3953     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3954       raise errors.OpPrereqError("The node is the master and the force"
3955                                  " parameter was not set",
3956                                  errors.ECODE_INVAL)
3957
3958   def ExpandNames(self):
3959     """Locking for PowercycleNode.
3960
3961     This is a last-resort option and shouldn't block on other
3962     jobs. Therefore, we grab no locks.
3963
3964     """
3965     self.needed_locks = {}
3966
3967   def Exec(self, feedback_fn):
3968     """Reboots a node.
3969
3970     """
3971     result = self.rpc.call_node_powercycle(self.op.node_name,
3972                                            self.cfg.GetHypervisorType())
3973     result.Raise("Failed to schedule the reboot")
3974     return result.payload
3975
3976
3977 class LUQueryClusterInfo(NoHooksLU):
3978   """Query cluster configuration.
3979
3980   """
3981   _OP_REQP = []
3982   REQ_BGL = False
3983
3984   def ExpandNames(self):
3985     self.needed_locks = {}
3986
3987   def Exec(self, feedback_fn):
3988     """Return cluster config.
3989
3990     """
3991     cluster = self.cfg.GetClusterInfo()
3992     os_hvp = {}
3993
3994     # Filter just for enabled hypervisors
3995     for os_name, hv_dict in cluster.os_hvp.items():
3996       os_hvp[os_name] = {}
3997       for hv_name, hv_params in hv_dict.items():
3998         if hv_name in cluster.enabled_hypervisors:
3999           os_hvp[os_name][hv_name] = hv_params
4000
4001     result = {
4002       "software_version": constants.RELEASE_VERSION,
4003       "protocol_version": constants.PROTOCOL_VERSION,
4004       "config_version": constants.CONFIG_VERSION,
4005       "os_api_version": max(constants.OS_API_VERSIONS),
4006       "export_version": constants.EXPORT_VERSION,
4007       "architecture": (platform.architecture()[0], platform.machine()),
4008       "name": cluster.cluster_name,
4009       "master": cluster.master_node,
4010       "default_hypervisor": cluster.enabled_hypervisors[0],
4011       "enabled_hypervisors": cluster.enabled_hypervisors,
4012       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4013                         for hypervisor_name in cluster.enabled_hypervisors]),
4014       "os_hvp": os_hvp,
4015       "beparams": cluster.beparams,
4016       "osparams": cluster.osparams,
4017       "nicparams": cluster.nicparams,
4018       "candidate_pool_size": cluster.candidate_pool_size,
4019       "master_netdev": cluster.master_netdev,
4020       "volume_group_name": cluster.volume_group_name,
4021       "file_storage_dir": cluster.file_storage_dir,
4022       "maintain_node_health": cluster.maintain_node_health,
4023       "ctime": cluster.ctime,
4024       "mtime": cluster.mtime,
4025       "uuid": cluster.uuid,
4026       "tags": list(cluster.GetTags()),
4027       "uid_pool": cluster.uid_pool,
4028       }
4029
4030     return result
4031
4032
4033 class LUQueryConfigValues(NoHooksLU):
4034   """Return configuration values.
4035
4036   """
4037   _OP_REQP = []
4038   REQ_BGL = False
4039   _FIELDS_DYNAMIC = utils.FieldSet()
4040   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4041                                   "watcher_pause")
4042
4043   def CheckArguments(self):
4044     _CheckOutputFields(static=self._FIELDS_STATIC,
4045                        dynamic=self._FIELDS_DYNAMIC,
4046                        selected=self.op.output_fields)
4047
4048   def ExpandNames(self):
4049     self.needed_locks = {}
4050
4051   def Exec(self, feedback_fn):
4052     """Dump a representation of the cluster config to the standard output.
4053
4054     """
4055     values = []
4056     for field in self.op.output_fields:
4057       if field == "cluster_name":
4058         entry = self.cfg.GetClusterName()
4059       elif field == "master_node":
4060         entry = self.cfg.GetMasterNode()
4061       elif field == "drain_flag":
4062         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4063       elif field == "watcher_pause":
4064         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4065       else:
4066         raise errors.ParameterError(field)
4067       values.append(entry)
4068     return values
4069
4070
4071 class LUActivateInstanceDisks(NoHooksLU):
4072   """Bring up an instance's disks.
4073
4074   """
4075   _OP_REQP = [("instance_name", _TNonEmptyString)]
4076   _OP_DEFS = [("ignore_size", False)]
4077   REQ_BGL = False
4078
4079   def ExpandNames(self):
4080     self._ExpandAndLockInstance()
4081     self.needed_locks[locking.LEVEL_NODE] = []
4082     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4083
4084   def DeclareLocks(self, level):
4085     if level == locking.LEVEL_NODE:
4086       self._LockInstancesNodes()
4087
4088   def CheckPrereq(self):
4089     """Check prerequisites.
4090
4091     This checks that the instance is in the cluster.
4092
4093     """
4094     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4095     assert self.instance is not None, \
4096       "Cannot retrieve locked instance %s" % self.op.instance_name
4097     _CheckNodeOnline(self, self.instance.primary_node)
4098
4099   def Exec(self, feedback_fn):
4100     """Activate the disks.
4101
4102     """
4103     disks_ok, disks_info = \
4104               _AssembleInstanceDisks(self, self.instance,
4105                                      ignore_size=self.op.ignore_size)
4106     if not disks_ok:
4107       raise errors.OpExecError("Cannot activate block devices")
4108
4109     return disks_info
4110
4111
4112 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4113                            ignore_size=False):
4114   """Prepare the block devices for an instance.
4115
4116   This sets up the block devices on all nodes.
4117
4118   @type lu: L{LogicalUnit}
4119   @param lu: the logical unit on whose behalf we execute
4120   @type instance: L{objects.Instance}
4121   @param instance: the instance for whose disks we assemble
4122   @type disks: list of L{objects.Disk} or None
4123   @param disks: which disks to assemble (or all, if None)
4124   @type ignore_secondaries: boolean
4125   @param ignore_secondaries: if true, errors on secondary nodes
4126       won't result in an error return from the function
4127   @type ignore_size: boolean
4128   @param ignore_size: if true, the current known size of the disk
4129       will not be used during the disk activation, useful for cases
4130       when the size is wrong
4131   @return: False if the operation failed, otherwise a list of
4132       (host, instance_visible_name, node_visible_name)
4133       with the mapping from node devices to instance devices
4134
4135   """
4136   device_info = []
4137   disks_ok = True
4138   iname = instance.name
4139   disks = _ExpandCheckDisks(instance, disks)
4140
4141   # With the two passes mechanism we try to reduce the window of
4142   # opportunity for the race condition of switching DRBD to primary
4143   # before handshaking occured, but we do not eliminate it
4144
4145   # The proper fix would be to wait (with some limits) until the
4146   # connection has been made and drbd transitions from WFConnection
4147   # into any other network-connected state (Connected, SyncTarget,
4148   # SyncSource, etc.)
4149
4150   # 1st pass, assemble on all nodes in secondary mode
4151   for inst_disk in disks:
4152     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4153       if ignore_size:
4154         node_disk = node_disk.Copy()
4155         node_disk.UnsetSize()
4156       lu.cfg.SetDiskID(node_disk, node)
4157       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4158       msg = result.fail_msg
4159       if msg:
4160         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4161                            " (is_primary=False, pass=1): %s",
4162                            inst_disk.iv_name, node, msg)
4163         if not ignore_secondaries:
4164           disks_ok = False
4165
4166   # FIXME: race condition on drbd migration to primary
4167
4168   # 2nd pass, do only the primary node
4169   for inst_disk in disks:
4170     dev_path = None
4171
4172     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4173       if node != instance.primary_node:
4174         continue
4175       if ignore_size:
4176         node_disk = node_disk.Copy()
4177         node_disk.UnsetSize()
4178       lu.cfg.SetDiskID(node_disk, node)
4179       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4180       msg = result.fail_msg
4181       if msg:
4182         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4183                            " (is_primary=True, pass=2): %s",
4184                            inst_disk.iv_name, node, msg)
4185         disks_ok = False
4186       else:
4187         dev_path = result.payload
4188
4189     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4190
4191   # leave the disks configured for the primary node
4192   # this is a workaround that would be fixed better by
4193   # improving the logical/physical id handling
4194   for disk in disks:
4195     lu.cfg.SetDiskID(disk, instance.primary_node)
4196
4197   return disks_ok, device_info
4198
4199
4200 def _StartInstanceDisks(lu, instance, force):
4201   """Start the disks of an instance.
4202
4203   """
4204   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4205                                            ignore_secondaries=force)
4206   if not disks_ok:
4207     _ShutdownInstanceDisks(lu, instance)
4208     if force is not None and not force:
4209       lu.proc.LogWarning("", hint="If the message above refers to a"
4210                          " secondary node,"
4211                          " you can retry the operation using '--force'.")
4212     raise errors.OpExecError("Disk consistency error")
4213
4214
4215 class LUDeactivateInstanceDisks(NoHooksLU):
4216   """Shutdown an instance's disks.
4217
4218   """
4219   _OP_REQP = [("instance_name", _TNonEmptyString)]
4220   REQ_BGL = False
4221
4222   def ExpandNames(self):
4223     self._ExpandAndLockInstance()
4224     self.needed_locks[locking.LEVEL_NODE] = []
4225     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4226
4227   def DeclareLocks(self, level):
4228     if level == locking.LEVEL_NODE:
4229       self._LockInstancesNodes()
4230
4231   def CheckPrereq(self):
4232     """Check prerequisites.
4233
4234     This checks that the instance is in the cluster.
4235
4236     """
4237     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4238     assert self.instance is not None, \
4239       "Cannot retrieve locked instance %s" % self.op.instance_name
4240
4241   def Exec(self, feedback_fn):
4242     """Deactivate the disks
4243
4244     """
4245     instance = self.instance
4246     _SafeShutdownInstanceDisks(self, instance)
4247
4248
4249 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4250   """Shutdown block devices of an instance.
4251
4252   This function checks if an instance is running, before calling
4253   _ShutdownInstanceDisks.
4254
4255   """
4256   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4257   _ShutdownInstanceDisks(lu, instance, disks=disks)
4258
4259
4260 def _ExpandCheckDisks(instance, disks):
4261   """Return the instance disks selected by the disks list
4262
4263   @type disks: list of L{objects.Disk} or None
4264   @param disks: selected disks
4265   @rtype: list of L{objects.Disk}
4266   @return: selected instance disks to act on
4267
4268   """
4269   if disks is None:
4270     return instance.disks
4271   else:
4272     if not set(disks).issubset(instance.disks):
4273       raise errors.ProgrammerError("Can only act on disks belonging to the"
4274                                    " target instance")
4275     return disks
4276
4277
4278 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4279   """Shutdown block devices of an instance.
4280
4281   This does the shutdown on all nodes of the instance.
4282
4283   If the ignore_primary is false, errors on the primary node are
4284   ignored.
4285
4286   """
4287   all_result = True
4288   disks = _ExpandCheckDisks(instance, disks)
4289
4290   for disk in disks:
4291     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4292       lu.cfg.SetDiskID(top_disk, node)
4293       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4294       msg = result.fail_msg
4295       if msg:
4296         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4297                       disk.iv_name, node, msg)
4298         if not ignore_primary or node != instance.primary_node:
4299           all_result = False
4300   return all_result
4301
4302
4303 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4304   """Checks if a node has enough free memory.
4305
4306   This function check if a given node has the needed amount of free
4307   memory. In case the node has less memory or we cannot get the
4308   information from the node, this function raise an OpPrereqError
4309   exception.
4310
4311   @type lu: C{LogicalUnit}
4312   @param lu: a logical unit from which we get configuration data
4313   @type node: C{str}
4314   @param node: the node to check
4315   @type reason: C{str}
4316   @param reason: string to use in the error message
4317   @type requested: C{int}
4318   @param requested: the amount of memory in MiB to check for
4319   @type hypervisor_name: C{str}
4320   @param hypervisor_name: the hypervisor to ask for memory stats
4321   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4322       we cannot check the node
4323
4324   """
4325   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4326   nodeinfo[node].Raise("Can't get data from node %s" % node,
4327                        prereq=True, ecode=errors.ECODE_ENVIRON)
4328   free_mem = nodeinfo[node].payload.get('memory_free', None)
4329   if not isinstance(free_mem, int):
4330     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4331                                " was '%s'" % (node, free_mem),
4332                                errors.ECODE_ENVIRON)
4333   if requested > free_mem:
4334     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4335                                " needed %s MiB, available %s MiB" %
4336                                (node, reason, requested, free_mem),
4337                                errors.ECODE_NORES)
4338
4339
4340 def _CheckNodesFreeDisk(lu, nodenames, requested):
4341   """Checks if nodes have enough free disk space in the default VG.
4342
4343   This function check if all given nodes have the needed amount of
4344   free disk. In case any node has less disk or we cannot get the
4345   information from the node, this function raise an OpPrereqError
4346   exception.
4347
4348   @type lu: C{LogicalUnit}
4349   @param lu: a logical unit from which we get configuration data
4350   @type nodenames: C{list}
4351   @param nodenames: the list of node names to check
4352   @type requested: C{int}
4353   @param requested: the amount of disk in MiB to check for
4354   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4355       we cannot check the node
4356
4357   """
4358   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4359                                    lu.cfg.GetHypervisorType())
4360   for node in nodenames:
4361     info = nodeinfo[node]
4362     info.Raise("Cannot get current information from node %s" % node,
4363                prereq=True, ecode=errors.ECODE_ENVIRON)
4364     vg_free = info.payload.get("vg_free", None)
4365     if not isinstance(vg_free, int):
4366       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4367                                  " result was '%s'" % (node, vg_free),
4368                                  errors.ECODE_ENVIRON)
4369     if requested > vg_free:
4370       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4371                                  " required %d MiB, available %d MiB" %
4372                                  (node, requested, vg_free),
4373                                  errors.ECODE_NORES)
4374
4375
4376 class LUStartupInstance(LogicalUnit):
4377   """Starts an instance.
4378
4379   """
4380   HPATH = "instance-start"
4381   HTYPE = constants.HTYPE_INSTANCE
4382   _OP_REQP = [
4383     ("instance_name", _TNonEmptyString),
4384     ("force", _TBool),
4385     ("beparams", _TDict),
4386     ("hvparams", _TDict),
4387     ]
4388   _OP_DEFS = [
4389     ("beparams", _EmptyDict),
4390     ("hvparams", _EmptyDict),
4391     ]
4392   REQ_BGL = False
4393
4394   def CheckArguments(self):
4395     # extra beparams
4396     if self.op.beparams:
4397       # fill the beparams dict
4398       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4399
4400   def ExpandNames(self):
4401     self._ExpandAndLockInstance()
4402
4403   def BuildHooksEnv(self):
4404     """Build hooks env.
4405
4406     This runs on master, primary and secondary nodes of the instance.
4407
4408     """
4409     env = {
4410       "FORCE": self.op.force,
4411       }
4412     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4413     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4414     return env, nl, nl
4415
4416   def CheckPrereq(self):
4417     """Check prerequisites.
4418
4419     This checks that the instance is in the cluster.
4420
4421     """
4422     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4423     assert self.instance is not None, \
4424       "Cannot retrieve locked instance %s" % self.op.instance_name
4425
4426     # extra hvparams
4427     if self.op.hvparams:
4428       # check hypervisor parameter syntax (locally)
4429       cluster = self.cfg.GetClusterInfo()
4430       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4431       filled_hvp = cluster.FillHV(instance)
4432       filled_hvp.update(self.op.hvparams)
4433       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4434       hv_type.CheckParameterSyntax(filled_hvp)
4435       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4436
4437     _CheckNodeOnline(self, instance.primary_node)
4438
4439     bep = self.cfg.GetClusterInfo().FillBE(instance)
4440     # check bridges existence
4441     _CheckInstanceBridgesExist(self, instance)
4442
4443     remote_info = self.rpc.call_instance_info(instance.primary_node,
4444                                               instance.name,
4445                                               instance.hypervisor)
4446     remote_info.Raise("Error checking node %s" % instance.primary_node,
4447                       prereq=True, ecode=errors.ECODE_ENVIRON)
4448     if not remote_info.payload: # not running already
4449       _CheckNodeFreeMemory(self, instance.primary_node,
4450                            "starting instance %s" % instance.name,
4451                            bep[constants.BE_MEMORY], instance.hypervisor)
4452
4453   def Exec(self, feedback_fn):
4454     """Start the instance.
4455
4456     """
4457     instance = self.instance
4458     force = self.op.force
4459
4460     self.cfg.MarkInstanceUp(instance.name)
4461
4462     node_current = instance.primary_node
4463
4464     _StartInstanceDisks(self, instance, force)
4465
4466     result = self.rpc.call_instance_start(node_current, instance,
4467                                           self.op.hvparams, self.op.beparams)
4468     msg = result.fail_msg
4469     if msg:
4470       _ShutdownInstanceDisks(self, instance)
4471       raise errors.OpExecError("Could not start instance: %s" % msg)
4472
4473
4474 class LURebootInstance(LogicalUnit):
4475   """Reboot an instance.
4476
4477   """
4478   HPATH = "instance-reboot"
4479   HTYPE = constants.HTYPE_INSTANCE
4480   _OP_REQP = [
4481     ("instance_name", _TNonEmptyString),
4482     ("ignore_secondaries", _TBool),
4483     ("reboot_type", _TElemOf(constants.REBOOT_TYPES)),
4484     ]
4485   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4486   REQ_BGL = False
4487
4488   def ExpandNames(self):
4489     self._ExpandAndLockInstance()
4490
4491   def BuildHooksEnv(self):
4492     """Build hooks env.
4493
4494     This runs on master, primary and secondary nodes of the instance.
4495
4496     """
4497     env = {
4498       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4499       "REBOOT_TYPE": self.op.reboot_type,
4500       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4501       }
4502     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4503     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4504     return env, nl, nl
4505
4506   def CheckPrereq(self):
4507     """Check prerequisites.
4508
4509     This checks that the instance is in the cluster.
4510
4511     """
4512     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4513     assert self.instance is not None, \
4514       "Cannot retrieve locked instance %s" % self.op.instance_name
4515
4516     _CheckNodeOnline(self, instance.primary_node)
4517
4518     # check bridges existence
4519     _CheckInstanceBridgesExist(self, instance)
4520
4521   def Exec(self, feedback_fn):
4522     """Reboot the instance.
4523
4524     """
4525     instance = self.instance
4526     ignore_secondaries = self.op.ignore_secondaries
4527     reboot_type = self.op.reboot_type
4528
4529     node_current = instance.primary_node
4530
4531     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4532                        constants.INSTANCE_REBOOT_HARD]:
4533       for disk in instance.disks:
4534         self.cfg.SetDiskID(disk, node_current)
4535       result = self.rpc.call_instance_reboot(node_current, instance,
4536                                              reboot_type,
4537                                              self.op.shutdown_timeout)
4538       result.Raise("Could not reboot instance")
4539     else:
4540       result = self.rpc.call_instance_shutdown(node_current, instance,
4541                                                self.op.shutdown_timeout)
4542       result.Raise("Could not shutdown instance for full reboot")
4543       _ShutdownInstanceDisks(self, instance)
4544       _StartInstanceDisks(self, instance, ignore_secondaries)
4545       result = self.rpc.call_instance_start(node_current, instance, None, None)
4546       msg = result.fail_msg
4547       if msg:
4548         _ShutdownInstanceDisks(self, instance)
4549         raise errors.OpExecError("Could not start instance for"
4550                                  " full reboot: %s" % msg)
4551
4552     self.cfg.MarkInstanceUp(instance.name)
4553
4554
4555 class LUShutdownInstance(LogicalUnit):
4556   """Shutdown an instance.
4557
4558   """
4559   HPATH = "instance-stop"
4560   HTYPE = constants.HTYPE_INSTANCE
4561   _OP_REQP = [("instance_name", _TNonEmptyString)]
4562   _OP_DEFS = [("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4563   REQ_BGL = False
4564
4565   def ExpandNames(self):
4566     self._ExpandAndLockInstance()
4567
4568   def BuildHooksEnv(self):
4569     """Build hooks env.
4570
4571     This runs on master, primary and secondary nodes of the instance.
4572
4573     """
4574     env = _BuildInstanceHookEnvByObject(self, self.instance)
4575     env["TIMEOUT"] = self.op.timeout
4576     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4577     return env, nl, nl
4578
4579   def CheckPrereq(self):
4580     """Check prerequisites.
4581
4582     This checks that the instance is in the cluster.
4583
4584     """
4585     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4586     assert self.instance is not None, \
4587       "Cannot retrieve locked instance %s" % self.op.instance_name
4588     _CheckNodeOnline(self, self.instance.primary_node)
4589
4590   def Exec(self, feedback_fn):
4591     """Shutdown the instance.
4592
4593     """
4594     instance = self.instance
4595     node_current = instance.primary_node
4596     timeout = self.op.timeout
4597     self.cfg.MarkInstanceDown(instance.name)
4598     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4599     msg = result.fail_msg
4600     if msg:
4601       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4602
4603     _ShutdownInstanceDisks(self, instance)
4604
4605
4606 class LUReinstallInstance(LogicalUnit):
4607   """Reinstall an instance.
4608
4609   """
4610   HPATH = "instance-reinstall"
4611   HTYPE = constants.HTYPE_INSTANCE
4612   _OP_REQP = [("instance_name", _TNonEmptyString)]
4613   _OP_DEFS = [
4614     ("os_type", None),
4615     ("force_variant", False),
4616     ]
4617   REQ_BGL = False
4618
4619   def ExpandNames(self):
4620     self._ExpandAndLockInstance()
4621
4622   def BuildHooksEnv(self):
4623     """Build hooks env.
4624
4625     This runs on master, primary and secondary nodes of the instance.
4626
4627     """
4628     env = _BuildInstanceHookEnvByObject(self, self.instance)
4629     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4630     return env, nl, nl
4631
4632   def CheckPrereq(self):
4633     """Check prerequisites.
4634
4635     This checks that the instance is in the cluster and is not running.
4636
4637     """
4638     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4639     assert instance is not None, \
4640       "Cannot retrieve locked instance %s" % self.op.instance_name
4641     _CheckNodeOnline(self, instance.primary_node)
4642
4643     if instance.disk_template == constants.DT_DISKLESS:
4644       raise errors.OpPrereqError("Instance '%s' has no disks" %
4645                                  self.op.instance_name,
4646                                  errors.ECODE_INVAL)
4647     _CheckInstanceDown(self, instance, "cannot reinstall")
4648
4649     if self.op.os_type is not None:
4650       # OS verification
4651       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4652       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4653
4654     self.instance = instance
4655
4656   def Exec(self, feedback_fn):
4657     """Reinstall the instance.
4658
4659     """
4660     inst = self.instance
4661
4662     if self.op.os_type is not None:
4663       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4664       inst.os = self.op.os_type
4665       self.cfg.Update(inst, feedback_fn)
4666
4667     _StartInstanceDisks(self, inst, None)
4668     try:
4669       feedback_fn("Running the instance OS create scripts...")
4670       # FIXME: pass debug option from opcode to backend
4671       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4672                                              self.op.debug_level)
4673       result.Raise("Could not install OS for instance %s on node %s" %
4674                    (inst.name, inst.primary_node))
4675     finally:
4676       _ShutdownInstanceDisks(self, inst)
4677
4678
4679 class LURecreateInstanceDisks(LogicalUnit):
4680   """Recreate an instance's missing disks.
4681
4682   """
4683   HPATH = "instance-recreate-disks"
4684   HTYPE = constants.HTYPE_INSTANCE
4685   _OP_REQP = [
4686     ("instance_name", _TNonEmptyString),
4687     ("disks", _TListOf(_TPositiveInt)),
4688     ]
4689   REQ_BGL = False
4690
4691   def ExpandNames(self):
4692     self._ExpandAndLockInstance()
4693
4694   def BuildHooksEnv(self):
4695     """Build hooks env.
4696
4697     This runs on master, primary and secondary nodes of the instance.
4698
4699     """
4700     env = _BuildInstanceHookEnvByObject(self, self.instance)
4701     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4702     return env, nl, nl
4703
4704   def CheckPrereq(self):
4705     """Check prerequisites.
4706
4707     This checks that the instance is in the cluster and is not running.
4708
4709     """
4710     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4711     assert instance is not None, \
4712       "Cannot retrieve locked instance %s" % self.op.instance_name
4713     _CheckNodeOnline(self, instance.primary_node)
4714
4715     if instance.disk_template == constants.DT_DISKLESS:
4716       raise errors.OpPrereqError("Instance '%s' has no disks" %
4717                                  self.op.instance_name, errors.ECODE_INVAL)
4718     _CheckInstanceDown(self, instance, "cannot recreate disks")
4719
4720     if not self.op.disks:
4721       self.op.disks = range(len(instance.disks))
4722     else:
4723       for idx in self.op.disks:
4724         if idx >= len(instance.disks):
4725           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4726                                      errors.ECODE_INVAL)
4727
4728     self.instance = instance
4729
4730   def Exec(self, feedback_fn):
4731     """Recreate the disks.
4732
4733     """
4734     to_skip = []
4735     for idx, _ in enumerate(self.instance.disks):
4736       if idx not in self.op.disks: # disk idx has not been passed in
4737         to_skip.append(idx)
4738         continue
4739
4740     _CreateDisks(self, self.instance, to_skip=to_skip)
4741
4742
4743 class LURenameInstance(LogicalUnit):
4744   """Rename an instance.
4745
4746   """
4747   HPATH = "instance-rename"
4748   HTYPE = constants.HTYPE_INSTANCE
4749   _OP_REQP = [
4750     ("instance_name", _TNonEmptyString),
4751     ("new_name", _TNonEmptyString),
4752     ]
4753   _OP_DEFS = [("ignore_ip", False)]
4754
4755   def BuildHooksEnv(self):
4756     """Build hooks env.
4757
4758     This runs on master, primary and secondary nodes of the instance.
4759
4760     """
4761     env = _BuildInstanceHookEnvByObject(self, self.instance)
4762     env["INSTANCE_NEW_NAME"] = self.op.new_name
4763     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4764     return env, nl, nl
4765
4766   def CheckPrereq(self):
4767     """Check prerequisites.
4768
4769     This checks that the instance is in the cluster and is not running.
4770
4771     """
4772     self.op.instance_name = _ExpandInstanceName(self.cfg,
4773                                                 self.op.instance_name)
4774     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4775     assert instance is not None
4776     _CheckNodeOnline(self, instance.primary_node)
4777     _CheckInstanceDown(self, instance, "cannot rename")
4778     self.instance = instance
4779
4780     # new name verification
4781     name_info = utils.GetHostInfo(self.op.new_name)
4782
4783     self.op.new_name = new_name = name_info.name
4784     instance_list = self.cfg.GetInstanceList()
4785     if new_name in instance_list:
4786       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4787                                  new_name, errors.ECODE_EXISTS)
4788
4789     if not self.op.ignore_ip:
4790       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4791         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4792                                    (name_info.ip, new_name),
4793                                    errors.ECODE_NOTUNIQUE)
4794
4795   def Exec(self, feedback_fn):
4796     """Reinstall the instance.
4797
4798     """
4799     inst = self.instance
4800     old_name = inst.name
4801
4802     if inst.disk_template == constants.DT_FILE:
4803       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4804
4805     self.cfg.RenameInstance(inst.name, self.op.new_name)
4806     # Change the instance lock. This is definitely safe while we hold the BGL
4807     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4808     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4809
4810     # re-read the instance from the configuration after rename
4811     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4812
4813     if inst.disk_template == constants.DT_FILE:
4814       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4815       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4816                                                      old_file_storage_dir,
4817                                                      new_file_storage_dir)
4818       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4819                    " (but the instance has been renamed in Ganeti)" %
4820                    (inst.primary_node, old_file_storage_dir,
4821                     new_file_storage_dir))
4822
4823     _StartInstanceDisks(self, inst, None)
4824     try:
4825       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4826                                                  old_name, self.op.debug_level)
4827       msg = result.fail_msg
4828       if msg:
4829         msg = ("Could not run OS rename script for instance %s on node %s"
4830                " (but the instance has been renamed in Ganeti): %s" %
4831                (inst.name, inst.primary_node, msg))
4832         self.proc.LogWarning(msg)
4833     finally:
4834       _ShutdownInstanceDisks(self, inst)
4835
4836
4837 class LURemoveInstance(LogicalUnit):
4838   """Remove an instance.
4839
4840   """
4841   HPATH = "instance-remove"
4842   HTYPE = constants.HTYPE_INSTANCE
4843   _OP_REQP = [
4844     ("instance_name", _TNonEmptyString),
4845     ("ignore_failures", _TBool),
4846     ]
4847   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4848   REQ_BGL = False
4849
4850   def ExpandNames(self):
4851     self._ExpandAndLockInstance()
4852     self.needed_locks[locking.LEVEL_NODE] = []
4853     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4854
4855   def DeclareLocks(self, level):
4856     if level == locking.LEVEL_NODE:
4857       self._LockInstancesNodes()
4858
4859   def BuildHooksEnv(self):
4860     """Build hooks env.
4861
4862     This runs on master, primary and secondary nodes of the instance.
4863
4864     """
4865     env = _BuildInstanceHookEnvByObject(self, self.instance)
4866     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4867     nl = [self.cfg.GetMasterNode()]
4868     nl_post = list(self.instance.all_nodes) + nl
4869     return env, nl, nl_post
4870
4871   def CheckPrereq(self):
4872     """Check prerequisites.
4873
4874     This checks that the instance is in the cluster.
4875
4876     """
4877     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4878     assert self.instance is not None, \
4879       "Cannot retrieve locked instance %s" % self.op.instance_name
4880
4881   def Exec(self, feedback_fn):
4882     """Remove the instance.
4883
4884     """
4885     instance = self.instance
4886     logging.info("Shutting down instance %s on node %s",
4887                  instance.name, instance.primary_node)
4888
4889     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4890                                              self.op.shutdown_timeout)
4891     msg = result.fail_msg
4892     if msg:
4893       if self.op.ignore_failures:
4894         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4895       else:
4896         raise errors.OpExecError("Could not shutdown instance %s on"
4897                                  " node %s: %s" %
4898                                  (instance.name, instance.primary_node, msg))
4899
4900     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4901
4902
4903 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4904   """Utility function to remove an instance.
4905
4906   """
4907   logging.info("Removing block devices for instance %s", instance.name)
4908
4909   if not _RemoveDisks(lu, instance):
4910     if not ignore_failures:
4911       raise errors.OpExecError("Can't remove instance's disks")
4912     feedback_fn("Warning: can't remove instance's disks")
4913
4914   logging.info("Removing instance %s out of cluster config", instance.name)
4915
4916   lu.cfg.RemoveInstance(instance.name)
4917
4918   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4919     "Instance lock removal conflict"
4920
4921   # Remove lock for the instance
4922   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4923
4924
4925 class LUQueryInstances(NoHooksLU):
4926   """Logical unit for querying instances.
4927
4928   """
4929   # pylint: disable-msg=W0142
4930   _OP_REQP = [
4931     ("output_fields", _TListOf(_TNonEmptyString)),
4932     ("names", _TListOf(_TNonEmptyString)),
4933     ("use_locking", _TBool),
4934     ]
4935   REQ_BGL = False
4936   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4937                     "serial_no", "ctime", "mtime", "uuid"]
4938   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4939                                     "admin_state",
4940                                     "disk_template", "ip", "mac", "bridge",
4941                                     "nic_mode", "nic_link",
4942                                     "sda_size", "sdb_size", "vcpus", "tags",
4943                                     "network_port", "beparams",
4944                                     r"(disk)\.(size)/([0-9]+)",
4945                                     r"(disk)\.(sizes)", "disk_usage",
4946                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4947                                     r"(nic)\.(bridge)/([0-9]+)",
4948                                     r"(nic)\.(macs|ips|modes|links|bridges)",
4949                                     r"(disk|nic)\.(count)",
4950                                     "hvparams",
4951                                     ] + _SIMPLE_FIELDS +
4952                                   ["hv/%s" % name
4953                                    for name in constants.HVS_PARAMETERS
4954                                    if name not in constants.HVC_GLOBALS] +
4955                                   ["be/%s" % name
4956                                    for name in constants.BES_PARAMETERS])
4957   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4958
4959
4960   def CheckArguments(self):
4961     _CheckOutputFields(static=self._FIELDS_STATIC,
4962                        dynamic=self._FIELDS_DYNAMIC,
4963                        selected=self.op.output_fields)
4964
4965   def ExpandNames(self):
4966     self.needed_locks = {}
4967     self.share_locks[locking.LEVEL_INSTANCE] = 1
4968     self.share_locks[locking.LEVEL_NODE] = 1
4969
4970     if self.op.names:
4971       self.wanted = _GetWantedInstances(self, self.op.names)
4972     else:
4973       self.wanted = locking.ALL_SET
4974
4975     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4976     self.do_locking = self.do_node_query and self.op.use_locking
4977     if self.do_locking:
4978       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4979       self.needed_locks[locking.LEVEL_NODE] = []
4980       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4981
4982   def DeclareLocks(self, level):
4983     if level == locking.LEVEL_NODE and self.do_locking:
4984       self._LockInstancesNodes()
4985
4986   def Exec(self, feedback_fn):
4987     """Computes the list of nodes and their attributes.
4988
4989     """
4990     # pylint: disable-msg=R0912
4991     # way too many branches here
4992     all_info = self.cfg.GetAllInstancesInfo()
4993     if self.wanted == locking.ALL_SET:
4994       # caller didn't specify instance names, so ordering is not important
4995       if self.do_locking:
4996         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4997       else:
4998         instance_names = all_info.keys()
4999       instance_names = utils.NiceSort(instance_names)
5000     else:
5001       # caller did specify names, so we must keep the ordering
5002       if self.do_locking:
5003         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5004       else:
5005         tgt_set = all_info.keys()
5006       missing = set(self.wanted).difference(tgt_set)
5007       if missing:
5008         raise errors.OpExecError("Some instances were removed before"
5009                                  " retrieving their data: %s" % missing)
5010       instance_names = self.wanted
5011
5012     instance_list = [all_info[iname] for iname in instance_names]
5013
5014     # begin data gathering
5015
5016     nodes = frozenset([inst.primary_node for inst in instance_list])
5017     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5018
5019     bad_nodes = []
5020     off_nodes = []
5021     if self.do_node_query:
5022       live_data = {}
5023       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5024       for name in nodes:
5025         result = node_data[name]
5026         if result.offline:
5027           # offline nodes will be in both lists
5028           off_nodes.append(name)
5029         if result.fail_msg:
5030           bad_nodes.append(name)
5031         else:
5032           if result.payload:
5033             live_data.update(result.payload)
5034           # else no instance is alive
5035     else:
5036       live_data = dict([(name, {}) for name in instance_names])
5037
5038     # end data gathering
5039
5040     HVPREFIX = "hv/"
5041     BEPREFIX = "be/"
5042     output = []
5043     cluster = self.cfg.GetClusterInfo()
5044     for instance in instance_list:
5045       iout = []
5046       i_hv = cluster.FillHV(instance, skip_globals=True)
5047       i_be = cluster.FillBE(instance)
5048       i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5049       for field in self.op.output_fields:
5050         st_match = self._FIELDS_STATIC.Matches(field)
5051         if field in self._SIMPLE_FIELDS:
5052           val = getattr(instance, field)
5053         elif field == "pnode":
5054           val = instance.primary_node
5055         elif field == "snodes":
5056           val = list(instance.secondary_nodes)
5057         elif field == "admin_state":
5058           val = instance.admin_up
5059         elif field == "oper_state":
5060           if instance.primary_node in bad_nodes:
5061             val = None
5062           else:
5063             val = bool(live_data.get(instance.name))
5064         elif field == "status":
5065           if instance.primary_node in off_nodes:
5066             val = "ERROR_nodeoffline"
5067           elif instance.primary_node in bad_nodes:
5068             val = "ERROR_nodedown"
5069           else:
5070             running = bool(live_data.get(instance.name))
5071             if running:
5072               if instance.admin_up:
5073                 val = "running"
5074               else:
5075                 val = "ERROR_up"
5076             else:
5077               if instance.admin_up:
5078                 val = "ERROR_down"
5079               else:
5080                 val = "ADMIN_down"
5081         elif field == "oper_ram":
5082           if instance.primary_node in bad_nodes:
5083             val = None
5084           elif instance.name in live_data:
5085             val = live_data[instance.name].get("memory", "?")
5086           else:
5087             val = "-"
5088         elif field == "vcpus":
5089           val = i_be[constants.BE_VCPUS]
5090         elif field == "disk_template":
5091           val = instance.disk_template
5092         elif field == "ip":
5093           if instance.nics:
5094             val = instance.nics[0].ip
5095           else:
5096             val = None
5097         elif field == "nic_mode":
5098           if instance.nics:
5099             val = i_nicp[0][constants.NIC_MODE]
5100           else:
5101             val = None
5102         elif field == "nic_link":
5103           if instance.nics:
5104             val = i_nicp[0][constants.NIC_LINK]
5105           else:
5106             val = None
5107         elif field == "bridge":
5108           if (instance.nics and
5109               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5110             val = i_nicp[0][constants.NIC_LINK]
5111           else:
5112             val = None
5113         elif field == "mac":
5114           if instance.nics:
5115             val = instance.nics[0].mac
5116           else:
5117             val = None
5118         elif field == "sda_size" or field == "sdb_size":
5119           idx = ord(field[2]) - ord('a')
5120           try:
5121             val = instance.FindDisk(idx).size
5122           except errors.OpPrereqError:
5123             val = None
5124         elif field == "disk_usage": # total disk usage per node
5125           disk_sizes = [{'size': disk.size} for disk in instance.disks]
5126           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5127         elif field == "tags":
5128           val = list(instance.GetTags())
5129         elif field == "hvparams":
5130           val = i_hv
5131         elif (field.startswith(HVPREFIX) and
5132               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5133               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5134           val = i_hv.get(field[len(HVPREFIX):], None)
5135         elif field == "beparams":
5136           val = i_be
5137         elif (field.startswith(BEPREFIX) and
5138               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5139           val = i_be.get(field[len(BEPREFIX):], None)
5140         elif st_match and st_match.groups():
5141           # matches a variable list
5142           st_groups = st_match.groups()
5143           if st_groups and st_groups[0] == "disk":
5144             if st_groups[1] == "count":
5145               val = len(instance.disks)
5146             elif st_groups[1] == "sizes":
5147               val = [disk.size for disk in instance.disks]
5148             elif st_groups[1] == "size":
5149               try:
5150                 val = instance.FindDisk(st_groups[2]).size
5151               except errors.OpPrereqError:
5152                 val = None
5153             else:
5154               assert False, "Unhandled disk parameter"
5155           elif st_groups[0] == "nic":
5156             if st_groups[1] == "count":
5157               val = len(instance.nics)
5158             elif st_groups[1] == "macs":
5159               val = [nic.mac for nic in instance.nics]
5160             elif st_groups[1] == "ips":
5161               val = [nic.ip for nic in instance.nics]
5162             elif st_groups[1] == "modes":
5163               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5164             elif st_groups[1] == "links":
5165               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5166             elif st_groups[1] == "bridges":
5167               val = []
5168               for nicp in i_nicp:
5169                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5170                   val.append(nicp[constants.NIC_LINK])
5171                 else:
5172                   val.append(None)
5173             else:
5174               # index-based item
5175               nic_idx = int(st_groups[2])
5176               if nic_idx >= len(instance.nics):
5177                 val = None
5178               else:
5179                 if st_groups[1] == "mac":
5180                   val = instance.nics[nic_idx].mac
5181                 elif st_groups[1] == "ip":
5182                   val = instance.nics[nic_idx].ip
5183                 elif st_groups[1] == "mode":
5184                   val = i_nicp[nic_idx][constants.NIC_MODE]
5185                 elif st_groups[1] == "link":
5186                   val = i_nicp[nic_idx][constants.NIC_LINK]
5187                 elif st_groups[1] == "bridge":
5188                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5189                   if nic_mode == constants.NIC_MODE_BRIDGED:
5190                     val = i_nicp[nic_idx][constants.NIC_LINK]
5191                   else:
5192                     val = None
5193                 else:
5194                   assert False, "Unhandled NIC parameter"
5195           else:
5196             assert False, ("Declared but unhandled variable parameter '%s'" %
5197                            field)
5198         else:
5199           assert False, "Declared but unhandled parameter '%s'" % field
5200         iout.append(val)
5201       output.append(iout)
5202
5203     return output
5204
5205
5206 class LUFailoverInstance(LogicalUnit):
5207   """Failover an instance.
5208
5209   """
5210   HPATH = "instance-failover"
5211   HTYPE = constants.HTYPE_INSTANCE
5212   _OP_REQP = [
5213     ("instance_name", _TNonEmptyString),
5214     ("ignore_consistency", _TBool),
5215     ]
5216   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5217   REQ_BGL = False
5218
5219   def ExpandNames(self):
5220     self._ExpandAndLockInstance()
5221     self.needed_locks[locking.LEVEL_NODE] = []
5222     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5223
5224   def DeclareLocks(self, level):
5225     if level == locking.LEVEL_NODE:
5226       self._LockInstancesNodes()
5227
5228   def BuildHooksEnv(self):
5229     """Build hooks env.
5230
5231     This runs on master, primary and secondary nodes of the instance.
5232
5233     """
5234     instance = self.instance
5235     source_node = instance.primary_node
5236     target_node = instance.secondary_nodes[0]
5237     env = {
5238       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5239       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5240       "OLD_PRIMARY": source_node,
5241       "OLD_SECONDARY": target_node,
5242       "NEW_PRIMARY": target_node,
5243       "NEW_SECONDARY": source_node,
5244       }
5245     env.update(_BuildInstanceHookEnvByObject(self, instance))
5246     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5247     nl_post = list(nl)
5248     nl_post.append(source_node)
5249     return env, nl, nl_post
5250
5251   def CheckPrereq(self):
5252     """Check prerequisites.
5253
5254     This checks that the instance is in the cluster.
5255
5256     """
5257     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5258     assert self.instance is not None, \
5259       "Cannot retrieve locked instance %s" % self.op.instance_name
5260
5261     bep = self.cfg.GetClusterInfo().FillBE(instance)
5262     if instance.disk_template not in constants.DTS_NET_MIRROR:
5263       raise errors.OpPrereqError("Instance's disk layout is not"
5264                                  " network mirrored, cannot failover.",
5265                                  errors.ECODE_STATE)
5266
5267     secondary_nodes = instance.secondary_nodes
5268     if not secondary_nodes:
5269       raise errors.ProgrammerError("no secondary node but using "
5270                                    "a mirrored disk template")
5271
5272     target_node = secondary_nodes[0]
5273     _CheckNodeOnline(self, target_node)
5274     _CheckNodeNotDrained(self, target_node)
5275     if instance.admin_up:
5276       # check memory requirements on the secondary node
5277       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5278                            instance.name, bep[constants.BE_MEMORY],
5279                            instance.hypervisor)
5280     else:
5281       self.LogInfo("Not checking memory on the secondary node as"
5282                    " instance will not be started")
5283
5284     # check bridge existance
5285     _CheckInstanceBridgesExist(self, instance, node=target_node)
5286
5287   def Exec(self, feedback_fn):
5288     """Failover an instance.
5289
5290     The failover is done by shutting it down on its present node and
5291     starting it on the secondary.
5292
5293     """
5294     instance = self.instance
5295
5296     source_node = instance.primary_node
5297     target_node = instance.secondary_nodes[0]
5298
5299     if instance.admin_up:
5300       feedback_fn("* checking disk consistency between source and target")
5301       for dev in instance.disks:
5302         # for drbd, these are drbd over lvm
5303         if not _CheckDiskConsistency(self, dev, target_node, False):
5304           if not self.op.ignore_consistency:
5305             raise errors.OpExecError("Disk %s is degraded on target node,"
5306                                      " aborting failover." % dev.iv_name)
5307     else:
5308       feedback_fn("* not checking disk consistency as instance is not running")
5309
5310     feedback_fn("* shutting down instance on source node")
5311     logging.info("Shutting down instance %s on node %s",
5312                  instance.name, source_node)
5313
5314     result = self.rpc.call_instance_shutdown(source_node, instance,
5315                                              self.op.shutdown_timeout)
5316     msg = result.fail_msg
5317     if msg:
5318       if self.op.ignore_consistency:
5319         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5320                              " Proceeding anyway. Please make sure node"
5321                              " %s is down. Error details: %s",
5322                              instance.name, source_node, source_node, msg)
5323       else:
5324         raise errors.OpExecError("Could not shutdown instance %s on"
5325                                  " node %s: %s" %
5326                                  (instance.name, source_node, msg))
5327
5328     feedback_fn("* deactivating the instance's disks on source node")
5329     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5330       raise errors.OpExecError("Can't shut down the instance's disks.")
5331
5332     instance.primary_node = target_node
5333     # distribute new instance config to the other nodes
5334     self.cfg.Update(instance, feedback_fn)
5335
5336     # Only start the instance if it's marked as up
5337     if instance.admin_up:
5338       feedback_fn("* activating the instance's disks on target node")
5339       logging.info("Starting instance %s on node %s",
5340                    instance.name, target_node)
5341
5342       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5343                                            ignore_secondaries=True)
5344       if not disks_ok:
5345         _ShutdownInstanceDisks(self, instance)
5346         raise errors.OpExecError("Can't activate the instance's disks")
5347
5348       feedback_fn("* starting the instance on the target node")
5349       result = self.rpc.call_instance_start(target_node, instance, None, None)
5350       msg = result.fail_msg
5351       if msg:
5352         _ShutdownInstanceDisks(self, instance)
5353         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5354                                  (instance.name, target_node, msg))
5355
5356
5357 class LUMigrateInstance(LogicalUnit):
5358   """Migrate an instance.
5359
5360   This is migration without shutting down, compared to the failover,
5361   which is done with shutdown.
5362
5363   """
5364   HPATH = "instance-migrate"
5365   HTYPE = constants.HTYPE_INSTANCE
5366   _OP_REQP = [
5367     ("instance_name", _TNonEmptyString),
5368     ("live", _TBool),
5369     ("cleanup", _TBool),
5370     ]
5371
5372   REQ_BGL = False
5373
5374   def ExpandNames(self):
5375     self._ExpandAndLockInstance()
5376
5377     self.needed_locks[locking.LEVEL_NODE] = []
5378     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5379
5380     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5381                                        self.op.live, self.op.cleanup)
5382     self.tasklets = [self._migrater]
5383
5384   def DeclareLocks(self, level):
5385     if level == locking.LEVEL_NODE:
5386       self._LockInstancesNodes()
5387
5388   def BuildHooksEnv(self):
5389     """Build hooks env.
5390
5391     This runs on master, primary and secondary nodes of the instance.
5392
5393     """
5394     instance = self._migrater.instance
5395     source_node = instance.primary_node
5396     target_node = instance.secondary_nodes[0]
5397     env = _BuildInstanceHookEnvByObject(self, instance)
5398     env["MIGRATE_LIVE"] = self.op.live
5399     env["MIGRATE_CLEANUP"] = self.op.cleanup
5400     env.update({
5401         "OLD_PRIMARY": source_node,
5402         "OLD_SECONDARY": target_node,
5403         "NEW_PRIMARY": target_node,
5404         "NEW_SECONDARY": source_node,
5405         })
5406     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5407     nl_post = list(nl)
5408     nl_post.append(source_node)
5409     return env, nl, nl_post
5410
5411
5412 class LUMoveInstance(LogicalUnit):
5413   """Move an instance by data-copying.
5414
5415   """
5416   HPATH = "instance-move"
5417   HTYPE = constants.HTYPE_INSTANCE
5418   _OP_REQP = [
5419     ("instance_name", _TNonEmptyString),
5420     ("target_node", _TNonEmptyString),
5421     ]
5422   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5423   REQ_BGL = False
5424
5425   def ExpandNames(self):
5426     self._ExpandAndLockInstance()
5427     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5428     self.op.target_node = target_node
5429     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5430     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5431
5432   def DeclareLocks(self, level):
5433     if level == locking.LEVEL_NODE:
5434       self._LockInstancesNodes(primary_only=True)
5435
5436   def BuildHooksEnv(self):
5437     """Build hooks env.
5438
5439     This runs on master, primary and secondary nodes of the instance.
5440
5441     """
5442     env = {
5443       "TARGET_NODE": self.op.target_node,
5444       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5445       }
5446     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5447     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5448                                        self.op.target_node]
5449     return env, nl, nl
5450
5451   def CheckPrereq(self):
5452     """Check prerequisites.
5453
5454     This checks that the instance is in the cluster.
5455
5456     """
5457     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5458     assert self.instance is not None, \
5459       "Cannot retrieve locked instance %s" % self.op.instance_name
5460
5461     node = self.cfg.GetNodeInfo(self.op.target_node)
5462     assert node is not None, \
5463       "Cannot retrieve locked node %s" % self.op.target_node
5464
5465     self.target_node = target_node = node.name
5466
5467     if target_node == instance.primary_node:
5468       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5469                                  (instance.name, target_node),
5470                                  errors.ECODE_STATE)
5471
5472     bep = self.cfg.GetClusterInfo().FillBE(instance)
5473
5474     for idx, dsk in enumerate(instance.disks):
5475       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5476         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5477                                    " cannot copy" % idx, errors.ECODE_STATE)
5478
5479     _CheckNodeOnline(self, target_node)
5480     _CheckNodeNotDrained(self, target_node)
5481
5482     if instance.admin_up:
5483       # check memory requirements on the secondary node
5484       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5485                            instance.name, bep[constants.BE_MEMORY],
5486                            instance.hypervisor)
5487     else:
5488       self.LogInfo("Not checking memory on the secondary node as"
5489                    " instance will not be started")
5490
5491     # check bridge existance
5492     _CheckInstanceBridgesExist(self, instance, node=target_node)
5493
5494   def Exec(self, feedback_fn):
5495     """Move an instance.
5496
5497     The move is done by shutting it down on its present node, copying
5498     the data over (slow) and starting it on the new node.
5499
5500     """
5501     instance = self.instance
5502
5503     source_node = instance.primary_node
5504     target_node = self.target_node
5505
5506     self.LogInfo("Shutting down instance %s on source node %s",
5507                  instance.name, source_node)
5508
5509     result = self.rpc.call_instance_shutdown(source_node, instance,
5510                                              self.op.shutdown_timeout)
5511     msg = result.fail_msg
5512     if msg:
5513       if self.op.ignore_consistency:
5514         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5515                              " Proceeding anyway. Please make sure node"
5516                              " %s is down. Error details: %s",
5517                              instance.name, source_node, source_node, msg)
5518       else:
5519         raise errors.OpExecError("Could not shutdown instance %s on"
5520                                  " node %s: %s" %
5521                                  (instance.name, source_node, msg))
5522
5523     # create the target disks
5524     try:
5525       _CreateDisks(self, instance, target_node=target_node)
5526     except errors.OpExecError:
5527       self.LogWarning("Device creation failed, reverting...")
5528       try:
5529         _RemoveDisks(self, instance, target_node=target_node)
5530       finally:
5531         self.cfg.ReleaseDRBDMinors(instance.name)
5532         raise
5533
5534     cluster_name = self.cfg.GetClusterInfo().cluster_name
5535
5536     errs = []
5537     # activate, get path, copy the data over
5538     for idx, disk in enumerate(instance.disks):
5539       self.LogInfo("Copying data for disk %d", idx)
5540       result = self.rpc.call_blockdev_assemble(target_node, disk,
5541                                                instance.name, True)
5542       if result.fail_msg:
5543         self.LogWarning("Can't assemble newly created disk %d: %s",
5544                         idx, result.fail_msg)
5545         errs.append(result.fail_msg)
5546         break
5547       dev_path = result.payload
5548       result = self.rpc.call_blockdev_export(source_node, disk,
5549                                              target_node, dev_path,
5550                                              cluster_name)
5551       if result.fail_msg:
5552         self.LogWarning("Can't copy data over for disk %d: %s",
5553                         idx, result.fail_msg)
5554         errs.append(result.fail_msg)
5555         break
5556
5557     if errs:
5558       self.LogWarning("Some disks failed to copy, aborting")
5559       try:
5560         _RemoveDisks(self, instance, target_node=target_node)
5561       finally:
5562         self.cfg.ReleaseDRBDMinors(instance.name)
5563         raise errors.OpExecError("Errors during disk copy: %s" %
5564                                  (",".join(errs),))
5565
5566     instance.primary_node = target_node
5567     self.cfg.Update(instance, feedback_fn)
5568
5569     self.LogInfo("Removing the disks on the original node")
5570     _RemoveDisks(self, instance, target_node=source_node)
5571
5572     # Only start the instance if it's marked as up
5573     if instance.admin_up:
5574       self.LogInfo("Starting instance %s on node %s",
5575                    instance.name, target_node)
5576
5577       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5578                                            ignore_secondaries=True)
5579       if not disks_ok:
5580         _ShutdownInstanceDisks(self, instance)
5581         raise errors.OpExecError("Can't activate the instance's disks")
5582
5583       result = self.rpc.call_instance_start(target_node, instance, None, None)
5584       msg = result.fail_msg
5585       if msg:
5586         _ShutdownInstanceDisks(self, instance)
5587         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5588                                  (instance.name, target_node, msg))
5589
5590
5591 class LUMigrateNode(LogicalUnit):
5592   """Migrate all instances from a node.
5593
5594   """
5595   HPATH = "node-migrate"
5596   HTYPE = constants.HTYPE_NODE
5597   _OP_REQP = [
5598     ("node_name", _TNonEmptyString),
5599     ("live", _TBool),
5600     ]
5601   REQ_BGL = False
5602
5603   def ExpandNames(self):
5604     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5605
5606     self.needed_locks = {
5607       locking.LEVEL_NODE: [self.op.node_name],
5608       }
5609
5610     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5611
5612     # Create tasklets for migrating instances for all instances on this node
5613     names = []
5614     tasklets = []
5615
5616     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5617       logging.debug("Migrating instance %s", inst.name)
5618       names.append(inst.name)
5619
5620       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5621
5622     self.tasklets = tasklets
5623
5624     # Declare instance locks
5625     self.needed_locks[locking.LEVEL_INSTANCE] = names
5626
5627   def DeclareLocks(self, level):
5628     if level == locking.LEVEL_NODE:
5629       self._LockInstancesNodes()
5630
5631   def BuildHooksEnv(self):
5632     """Build hooks env.
5633
5634     This runs on the master, the primary and all the secondaries.
5635
5636     """
5637     env = {
5638       "NODE_NAME": self.op.node_name,
5639       }
5640
5641     nl = [self.cfg.GetMasterNode()]
5642
5643     return (env, nl, nl)
5644
5645
5646 class TLMigrateInstance(Tasklet):
5647   def __init__(self, lu, instance_name, live, cleanup):
5648     """Initializes this class.
5649
5650     """
5651     Tasklet.__init__(self, lu)
5652
5653     # Parameters
5654     self.instance_name = instance_name
5655     self.live = live
5656     self.cleanup = cleanup
5657
5658   def CheckPrereq(self):
5659     """Check prerequisites.
5660
5661     This checks that the instance is in the cluster.
5662
5663     """
5664     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5665     instance = self.cfg.GetInstanceInfo(instance_name)
5666     assert instance is not None
5667
5668     if instance.disk_template != constants.DT_DRBD8:
5669       raise errors.OpPrereqError("Instance's disk layout is not"
5670                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5671
5672     secondary_nodes = instance.secondary_nodes
5673     if not secondary_nodes:
5674       raise errors.ConfigurationError("No secondary node but using"
5675                                       " drbd8 disk template")
5676
5677     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5678
5679     target_node = secondary_nodes[0]
5680     # check memory requirements on the secondary node
5681     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5682                          instance.name, i_be[constants.BE_MEMORY],
5683                          instance.hypervisor)
5684
5685     # check bridge existance
5686     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5687
5688     if not self.cleanup:
5689       _CheckNodeNotDrained(self.lu, target_node)
5690       result = self.rpc.call_instance_migratable(instance.primary_node,
5691                                                  instance)
5692       result.Raise("Can't migrate, please use failover",
5693                    prereq=True, ecode=errors.ECODE_STATE)
5694
5695     self.instance = instance
5696
5697   def _WaitUntilSync(self):
5698     """Poll with custom rpc for disk sync.
5699
5700     This uses our own step-based rpc call.
5701
5702     """
5703     self.feedback_fn("* wait until resync is done")
5704     all_done = False
5705     while not all_done:
5706       all_done = True
5707       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5708                                             self.nodes_ip,
5709                                             self.instance.disks)
5710       min_percent = 100
5711       for node, nres in result.items():
5712         nres.Raise("Cannot resync disks on node %s" % node)
5713         node_done, node_percent = nres.payload
5714         all_done = all_done and node_done
5715         if node_percent is not None:
5716           min_percent = min(min_percent, node_percent)
5717       if not all_done:
5718         if min_percent < 100:
5719           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5720         time.sleep(2)
5721
5722   def _EnsureSecondary(self, node):
5723     """Demote a node to secondary.
5724
5725     """
5726     self.feedback_fn("* switching node %s to secondary mode" % node)
5727
5728     for dev in self.instance.disks:
5729       self.cfg.SetDiskID(dev, node)
5730
5731     result = self.rpc.call_blockdev_close(node, self.instance.name,
5732                                           self.instance.disks)
5733     result.Raise("Cannot change disk to secondary on node %s" % node)
5734
5735   def _GoStandalone(self):
5736     """Disconnect from the network.
5737
5738     """
5739     self.feedback_fn("* changing into standalone mode")
5740     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5741                                                self.instance.disks)
5742     for node, nres in result.items():
5743       nres.Raise("Cannot disconnect disks node %s" % node)
5744
5745   def _GoReconnect(self, multimaster):
5746     """Reconnect to the network.
5747
5748     """
5749     if multimaster:
5750       msg = "dual-master"
5751     else:
5752       msg = "single-master"
5753     self.feedback_fn("* changing disks into %s mode" % msg)
5754     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5755                                            self.instance.disks,
5756                                            self.instance.name, multimaster)
5757     for node, nres in result.items():
5758       nres.Raise("Cannot change disks config on node %s" % node)
5759
5760   def _ExecCleanup(self):
5761     """Try to cleanup after a failed migration.
5762
5763     The cleanup is done by:
5764       - check that the instance is running only on one node
5765         (and update the config if needed)
5766       - change disks on its secondary node to secondary
5767       - wait until disks are fully synchronized
5768       - disconnect from the network
5769       - change disks into single-master mode
5770       - wait again until disks are fully synchronized
5771
5772     """
5773     instance = self.instance
5774     target_node = self.target_node
5775     source_node = self.source_node
5776
5777     # check running on only one node
5778     self.feedback_fn("* checking where the instance actually runs"
5779                      " (if this hangs, the hypervisor might be in"
5780                      " a bad state)")
5781     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5782     for node, result in ins_l.items():
5783       result.Raise("Can't contact node %s" % node)
5784
5785     runningon_source = instance.name in ins_l[source_node].payload
5786     runningon_target = instance.name in ins_l[target_node].payload
5787
5788     if runningon_source and runningon_target:
5789       raise errors.OpExecError("Instance seems to be running on two nodes,"
5790                                " or the hypervisor is confused. You will have"
5791                                " to ensure manually that it runs only on one"
5792                                " and restart this operation.")
5793
5794     if not (runningon_source or runningon_target):
5795       raise errors.OpExecError("Instance does not seem to be running at all."
5796                                " In this case, it's safer to repair by"
5797                                " running 'gnt-instance stop' to ensure disk"
5798                                " shutdown, and then restarting it.")
5799
5800     if runningon_target:
5801       # the migration has actually succeeded, we need to update the config
5802       self.feedback_fn("* instance running on secondary node (%s),"
5803                        " updating config" % target_node)
5804       instance.primary_node = target_node
5805       self.cfg.Update(instance, self.feedback_fn)
5806       demoted_node = source_node
5807     else:
5808       self.feedback_fn("* instance confirmed to be running on its"
5809                        " primary node (%s)" % source_node)
5810       demoted_node = target_node
5811
5812     self._EnsureSecondary(demoted_node)
5813     try:
5814       self._WaitUntilSync()
5815     except errors.OpExecError:
5816       # we ignore here errors, since if the device is standalone, it
5817       # won't be able to sync
5818       pass
5819     self._GoStandalone()
5820     self._GoReconnect(False)
5821     self._WaitUntilSync()
5822
5823     self.feedback_fn("* done")
5824
5825   def _RevertDiskStatus(self):
5826     """Try to revert the disk status after a failed migration.
5827
5828     """
5829     target_node = self.target_node
5830     try:
5831       self._EnsureSecondary(target_node)
5832       self._GoStandalone()
5833       self._GoReconnect(False)
5834       self._WaitUntilSync()
5835     except errors.OpExecError, err:
5836       self.lu.LogWarning("Migration failed and I can't reconnect the"
5837                          " drives: error '%s'\n"
5838                          "Please look and recover the instance status" %
5839                          str(err))
5840
5841   def _AbortMigration(self):
5842     """Call the hypervisor code to abort a started migration.
5843
5844     """
5845     instance = self.instance
5846     target_node = self.target_node
5847     migration_info = self.migration_info
5848
5849     abort_result = self.rpc.call_finalize_migration(target_node,
5850                                                     instance,
5851                                                     migration_info,
5852                                                     False)
5853     abort_msg = abort_result.fail_msg
5854     if abort_msg:
5855       logging.error("Aborting migration failed on target node %s: %s",
5856                     target_node, abort_msg)
5857       # Don't raise an exception here, as we stil have to try to revert the
5858       # disk status, even if this step failed.
5859
5860   def _ExecMigration(self):
5861     """Migrate an instance.
5862
5863     The migrate is done by:
5864       - change the disks into dual-master mode
5865       - wait until disks are fully synchronized again
5866       - migrate the instance
5867       - change disks on the new secondary node (the old primary) to secondary
5868       - wait until disks are fully synchronized
5869       - change disks into single-master mode
5870
5871     """
5872     instance = self.instance
5873     target_node = self.target_node
5874     source_node = self.source_node
5875
5876     self.feedback_fn("* checking disk consistency between source and target")
5877     for dev in instance.disks:
5878       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5879         raise errors.OpExecError("Disk %s is degraded or not fully"
5880                                  " synchronized on target node,"
5881                                  " aborting migrate." % dev.iv_name)
5882
5883     # First get the migration information from the remote node
5884     result = self.rpc.call_migration_info(source_node, instance)
5885     msg = result.fail_msg
5886     if msg:
5887       log_err = ("Failed fetching source migration information from %s: %s" %
5888                  (source_node, msg))
5889       logging.error(log_err)
5890       raise errors.OpExecError(log_err)
5891
5892     self.migration_info = migration_info = result.payload
5893
5894     # Then switch the disks to master/master mode
5895     self._EnsureSecondary(target_node)
5896     self._GoStandalone()
5897     self._GoReconnect(True)
5898     self._WaitUntilSync()
5899
5900     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5901     result = self.rpc.call_accept_instance(target_node,
5902                                            instance,
5903                                            migration_info,
5904                                            self.nodes_ip[target_node])
5905
5906     msg = result.fail_msg
5907     if msg:
5908       logging.error("Instance pre-migration failed, trying to revert"
5909                     " disk status: %s", msg)
5910       self.feedback_fn("Pre-migration failed, aborting")
5911       self._AbortMigration()
5912       self._RevertDiskStatus()
5913       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5914                                (instance.name, msg))
5915
5916     self.feedback_fn("* migrating instance to %s" % target_node)
5917     time.sleep(10)
5918     result = self.rpc.call_instance_migrate(source_node, instance,
5919                                             self.nodes_ip[target_node],
5920                                             self.live)
5921     msg = result.fail_msg
5922     if msg:
5923       logging.error("Instance migration failed, trying to revert"
5924                     " disk status: %s", msg)
5925       self.feedback_fn("Migration failed, aborting")
5926       self._AbortMigration()
5927       self._RevertDiskStatus()
5928       raise errors.OpExecError("Could not migrate instance %s: %s" %
5929                                (instance.name, msg))
5930     time.sleep(10)
5931
5932     instance.primary_node = target_node
5933     # distribute new instance config to the other nodes
5934     self.cfg.Update(instance, self.feedback_fn)
5935
5936     result = self.rpc.call_finalize_migration(target_node,
5937                                               instance,
5938                                               migration_info,
5939                                               True)
5940     msg = result.fail_msg
5941     if msg:
5942       logging.error("Instance migration succeeded, but finalization failed:"
5943                     " %s", msg)
5944       raise errors.OpExecError("Could not finalize instance migration: %s" %
5945                                msg)
5946
5947     self._EnsureSecondary(source_node)
5948     self._WaitUntilSync()
5949     self._GoStandalone()
5950     self._GoReconnect(False)
5951     self._WaitUntilSync()
5952
5953     self.feedback_fn("* done")
5954
5955   def Exec(self, feedback_fn):
5956     """Perform the migration.
5957
5958     """
5959     feedback_fn("Migrating instance %s" % self.instance.name)
5960
5961     self.feedback_fn = feedback_fn
5962
5963     self.source_node = self.instance.primary_node
5964     self.target_node = self.instance.secondary_nodes[0]
5965     self.all_nodes = [self.source_node, self.target_node]
5966     self.nodes_ip = {
5967       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5968       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5969       }
5970
5971     if self.cleanup:
5972       return self._ExecCleanup()
5973     else:
5974       return self._ExecMigration()
5975
5976
5977 def _CreateBlockDev(lu, node, instance, device, force_create,
5978                     info, force_open):
5979   """Create a tree of block devices on a given node.
5980
5981   If this device type has to be created on secondaries, create it and
5982   all its children.
5983
5984   If not, just recurse to children keeping the same 'force' value.
5985
5986   @param lu: the lu on whose behalf we execute
5987   @param node: the node on which to create the device
5988   @type instance: L{objects.Instance}
5989   @param instance: the instance which owns the device
5990   @type device: L{objects.Disk}
5991   @param device: the device to create
5992   @type force_create: boolean
5993   @param force_create: whether to force creation of this device; this
5994       will be change to True whenever we find a device which has
5995       CreateOnSecondary() attribute
5996   @param info: the extra 'metadata' we should attach to the device
5997       (this will be represented as a LVM tag)
5998   @type force_open: boolean
5999   @param force_open: this parameter will be passes to the
6000       L{backend.BlockdevCreate} function where it specifies
6001       whether we run on primary or not, and it affects both
6002       the child assembly and the device own Open() execution
6003
6004   """
6005   if device.CreateOnSecondary():
6006     force_create = True
6007
6008   if device.children:
6009     for child in device.children:
6010       _CreateBlockDev(lu, node, instance, child, force_create,
6011                       info, force_open)
6012
6013   if not force_create:
6014     return
6015
6016   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6017
6018
6019 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6020   """Create a single block device on a given node.
6021
6022   This will not recurse over children of the device, so they must be
6023   created in advance.
6024
6025   @param lu: the lu on whose behalf we execute
6026   @param node: the node on which to create the device
6027   @type instance: L{objects.Instance}
6028   @param instance: the instance which owns the device
6029   @type device: L{objects.Disk}
6030   @param device: the device to create
6031   @param info: the extra 'metadata' we should attach to the device
6032       (this will be represented as a LVM tag)
6033   @type force_open: boolean
6034   @param force_open: this parameter will be passes to the
6035       L{backend.BlockdevCreate} function where it specifies
6036       whether we run on primary or not, and it affects both
6037       the child assembly and the device own Open() execution
6038
6039   """
6040   lu.cfg.SetDiskID(device, node)
6041   result = lu.rpc.call_blockdev_create(node, device, device.size,
6042                                        instance.name, force_open, info)
6043   result.Raise("Can't create block device %s on"
6044                " node %s for instance %s" % (device, node, instance.name))
6045   if device.physical_id is None:
6046     device.physical_id = result.payload
6047
6048
6049 def _GenerateUniqueNames(lu, exts):
6050   """Generate a suitable LV name.
6051
6052   This will generate a logical volume name for the given instance.
6053
6054   """
6055   results = []
6056   for val in exts:
6057     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6058     results.append("%s%s" % (new_id, val))
6059   return results
6060
6061
6062 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6063                          p_minor, s_minor):
6064   """Generate a drbd8 device complete with its children.
6065
6066   """
6067   port = lu.cfg.AllocatePort()
6068   vgname = lu.cfg.GetVGName()
6069   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6070   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6071                           logical_id=(vgname, names[0]))
6072   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6073                           logical_id=(vgname, names[1]))
6074   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6075                           logical_id=(primary, secondary, port,
6076                                       p_minor, s_minor,
6077                                       shared_secret),
6078                           children=[dev_data, dev_meta],
6079                           iv_name=iv_name)
6080   return drbd_dev
6081
6082
6083 def _GenerateDiskTemplate(lu, template_name,
6084                           instance_name, primary_node,
6085                           secondary_nodes, disk_info,
6086                           file_storage_dir, file_driver,
6087                           base_index):
6088   """Generate the entire disk layout for a given template type.
6089
6090   """
6091   #TODO: compute space requirements
6092
6093   vgname = lu.cfg.GetVGName()
6094   disk_count = len(disk_info)
6095   disks = []
6096   if template_name == constants.DT_DISKLESS:
6097     pass
6098   elif template_name == constants.DT_PLAIN:
6099     if len(secondary_nodes) != 0:
6100       raise errors.ProgrammerError("Wrong template configuration")
6101
6102     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6103                                       for i in range(disk_count)])
6104     for idx, disk in enumerate(disk_info):
6105       disk_index = idx + base_index
6106       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6107                               logical_id=(vgname, names[idx]),
6108                               iv_name="disk/%d" % disk_index,
6109                               mode=disk["mode"])
6110       disks.append(disk_dev)
6111   elif template_name == constants.DT_DRBD8:
6112     if len(secondary_nodes) != 1:
6113       raise errors.ProgrammerError("Wrong template configuration")
6114     remote_node = secondary_nodes[0]
6115     minors = lu.cfg.AllocateDRBDMinor(
6116       [primary_node, remote_node] * len(disk_info), instance_name)
6117
6118     names = []
6119     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6120                                                for i in range(disk_count)]):
6121       names.append(lv_prefix + "_data")
6122       names.append(lv_prefix + "_meta")
6123     for idx, disk in enumerate(disk_info):
6124       disk_index = idx + base_index
6125       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6126                                       disk["size"], names[idx*2:idx*2+2],
6127                                       "disk/%d" % disk_index,
6128                                       minors[idx*2], minors[idx*2+1])
6129       disk_dev.mode = disk["mode"]
6130       disks.append(disk_dev)
6131   elif template_name == constants.DT_FILE:
6132     if len(secondary_nodes) != 0:
6133       raise errors.ProgrammerError("Wrong template configuration")
6134
6135     _RequireFileStorage()
6136
6137     for idx, disk in enumerate(disk_info):
6138       disk_index = idx + base_index
6139       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6140                               iv_name="disk/%d" % disk_index,
6141                               logical_id=(file_driver,
6142                                           "%s/disk%d" % (file_storage_dir,
6143                                                          disk_index)),
6144                               mode=disk["mode"])
6145       disks.append(disk_dev)
6146   else:
6147     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6148   return disks
6149
6150
6151 def _GetInstanceInfoText(instance):
6152   """Compute that text that should be added to the disk's metadata.
6153
6154   """
6155   return "originstname+%s" % instance.name
6156
6157
6158 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6159   """Create all disks for an instance.
6160
6161   This abstracts away some work from AddInstance.
6162
6163   @type lu: L{LogicalUnit}
6164   @param lu: the logical unit on whose behalf we execute
6165   @type instance: L{objects.Instance}
6166   @param instance: the instance whose disks we should create
6167   @type to_skip: list
6168   @param to_skip: list of indices to skip
6169   @type target_node: string
6170   @param target_node: if passed, overrides the target node for creation
6171   @rtype: boolean
6172   @return: the success of the creation
6173
6174   """
6175   info = _GetInstanceInfoText(instance)
6176   if target_node is None:
6177     pnode = instance.primary_node
6178     all_nodes = instance.all_nodes
6179   else:
6180     pnode = target_node
6181     all_nodes = [pnode]
6182
6183   if instance.disk_template == constants.DT_FILE:
6184     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6185     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6186
6187     result.Raise("Failed to create directory '%s' on"
6188                  " node %s" % (file_storage_dir, pnode))
6189
6190   # Note: this needs to be kept in sync with adding of disks in
6191   # LUSetInstanceParams
6192   for idx, device in enumerate(instance.disks):
6193     if to_skip and idx in to_skip:
6194       continue
6195     logging.info("Creating volume %s for instance %s",
6196                  device.iv_name, instance.name)
6197     #HARDCODE
6198     for node in all_nodes:
6199       f_create = node == pnode
6200       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6201
6202
6203 def _RemoveDisks(lu, instance, target_node=None):
6204   """Remove all disks for an instance.
6205
6206   This abstracts away some work from `AddInstance()` and
6207   `RemoveInstance()`. Note that in case some of the devices couldn't
6208   be removed, the removal will continue with the other ones (compare
6209   with `_CreateDisks()`).
6210
6211   @type lu: L{LogicalUnit}
6212   @param lu: the logical unit on whose behalf we execute
6213   @type instance: L{objects.Instance}
6214   @param instance: the instance whose disks we should remove
6215   @type target_node: string
6216   @param target_node: used to override the node on which to remove the disks
6217   @rtype: boolean
6218   @return: the success of the removal
6219
6220   """
6221   logging.info("Removing block devices for instance %s", instance.name)
6222
6223   all_result = True
6224   for device in instance.disks:
6225     if target_node:
6226       edata = [(target_node, device)]
6227     else:
6228       edata = device.ComputeNodeTree(instance.primary_node)
6229     for node, disk in edata:
6230       lu.cfg.SetDiskID(disk, node)
6231       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6232       if msg:
6233         lu.LogWarning("Could not remove block device %s on node %s,"
6234                       " continuing anyway: %s", device.iv_name, node, msg)
6235         all_result = False
6236
6237   if instance.disk_template == constants.DT_FILE:
6238     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6239     if target_node:
6240       tgt = target_node
6241     else:
6242       tgt = instance.primary_node
6243     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6244     if result.fail_msg:
6245       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6246                     file_storage_dir, instance.primary_node, result.fail_msg)
6247       all_result = False
6248
6249   return all_result
6250
6251
6252 def _ComputeDiskSize(disk_template, disks):
6253   """Compute disk size requirements in the volume group
6254
6255   """
6256   # Required free disk space as a function of disk and swap space
6257   req_size_dict = {
6258     constants.DT_DISKLESS: None,
6259     constants.DT_PLAIN: sum(d["size"] for d in disks),
6260     # 128 MB are added for drbd metadata for each disk
6261     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6262     constants.DT_FILE: None,
6263   }
6264
6265   if disk_template not in req_size_dict:
6266     raise errors.ProgrammerError("Disk template '%s' size requirement"
6267                                  " is unknown" %  disk_template)
6268
6269   return req_size_dict[disk_template]
6270
6271
6272 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6273   """Hypervisor parameter validation.
6274
6275   This function abstract the hypervisor parameter validation to be
6276   used in both instance create and instance modify.
6277
6278   @type lu: L{LogicalUnit}
6279   @param lu: the logical unit for which we check
6280   @type nodenames: list
6281   @param nodenames: the list of nodes on which we should check
6282   @type hvname: string
6283   @param hvname: the name of the hypervisor we should use
6284   @type hvparams: dict
6285   @param hvparams: the parameters which we need to check
6286   @raise errors.OpPrereqError: if the parameters are not valid
6287
6288   """
6289   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6290                                                   hvname,
6291                                                   hvparams)
6292   for node in nodenames:
6293     info = hvinfo[node]
6294     if info.offline:
6295       continue
6296     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6297
6298
6299 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6300   """OS parameters validation.
6301
6302   @type lu: L{LogicalUnit}
6303   @param lu: the logical unit for which we check
6304   @type required: boolean
6305   @param required: whether the validation should fail if the OS is not
6306       found
6307   @type nodenames: list
6308   @param nodenames: the list of nodes on which we should check
6309   @type osname: string
6310   @param osname: the name of the hypervisor we should use
6311   @type osparams: dict
6312   @param osparams: the parameters which we need to check
6313   @raise errors.OpPrereqError: if the parameters are not valid
6314
6315   """
6316   result = lu.rpc.call_os_validate(required, nodenames, osname,
6317                                    [constants.OS_VALIDATE_PARAMETERS],
6318                                    osparams)
6319   for node, nres in result.items():
6320     # we don't check for offline cases since this should be run only
6321     # against the master node and/or an instance's nodes
6322     nres.Raise("OS Parameters validation failed on node %s" % node)
6323     if not nres.payload:
6324       lu.LogInfo("OS %s not found on node %s, validation skipped",
6325                  osname, node)
6326
6327
6328 class LUCreateInstance(LogicalUnit):
6329   """Create an instance.
6330
6331   """
6332   HPATH = "instance-add"
6333   HTYPE = constants.HTYPE_INSTANCE
6334   _OP_REQP = [
6335     ("instance_name", _TNonEmptyString),
6336     ("mode", _TElemOf(constants.INSTANCE_CREATE_MODES)),
6337     ("start", _TBool),
6338     ("wait_for_sync", _TBool),
6339     ("ip_check", _TBool),
6340     ("disks", _TListOf(_TDict)),
6341     ("nics", _TListOf(_TDict)),
6342     ("hvparams", _TDict),
6343     ("beparams", _TDict),
6344     ("osparams", _TDict),
6345     ]
6346   _OP_DEFS = [
6347     ("name_check", True),
6348     ("no_install", False),
6349     ("os_type", None),
6350     ("force_variant", False),
6351     ("source_handshake", None),
6352     ("source_x509_ca", None),
6353     ("source_instance_name", None),
6354     ("src_node", None),
6355     ("src_path", None),
6356     ("pnode", None),
6357     ("snode", None),
6358     ("iallocator", None),
6359     ("hypervisor", None),
6360     ("disk_template", None),
6361     ("identify_defaults", None),
6362     ]
6363   REQ_BGL = False
6364
6365   def CheckArguments(self):
6366     """Check arguments.
6367
6368     """
6369     # do not require name_check to ease forward/backward compatibility
6370     # for tools
6371     if self.op.no_install and self.op.start:
6372       self.LogInfo("No-installation mode selected, disabling startup")
6373       self.op.start = False
6374     # validate/normalize the instance name
6375     self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6376     if self.op.ip_check and not self.op.name_check:
6377       # TODO: make the ip check more flexible and not depend on the name check
6378       raise errors.OpPrereqError("Cannot do ip checks without a name check",
6379                                  errors.ECODE_INVAL)
6380
6381     # check nics' parameter names
6382     for nic in self.op.nics:
6383       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6384
6385     # check disks. parameter names and consistent adopt/no-adopt strategy
6386     has_adopt = has_no_adopt = False
6387     for disk in self.op.disks:
6388       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6389       if "adopt" in disk:
6390         has_adopt = True
6391       else:
6392         has_no_adopt = True
6393     if has_adopt and has_no_adopt:
6394       raise errors.OpPrereqError("Either all disks are adopted or none is",
6395                                  errors.ECODE_INVAL)
6396     if has_adopt:
6397       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6398         raise errors.OpPrereqError("Disk adoption is not supported for the"
6399                                    " '%s' disk template" %
6400                                    self.op.disk_template,
6401                                    errors.ECODE_INVAL)
6402       if self.op.iallocator is not None:
6403         raise errors.OpPrereqError("Disk adoption not allowed with an"
6404                                    " iallocator script", errors.ECODE_INVAL)
6405       if self.op.mode == constants.INSTANCE_IMPORT:
6406         raise errors.OpPrereqError("Disk adoption not allowed for"
6407                                    " instance import", errors.ECODE_INVAL)
6408
6409     self.adopt_disks = has_adopt
6410
6411     # instance name verification
6412     if self.op.name_check:
6413       self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6414       self.op.instance_name = self.hostname1.name
6415       # used in CheckPrereq for ip ping check
6416       self.check_ip = self.hostname1.ip
6417     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6418       raise errors.OpPrereqError("Remote imports require names to be checked" %
6419                                  errors.ECODE_INVAL)
6420     else:
6421       self.check_ip = None
6422
6423     # file storage checks
6424     if (self.op.file_driver and
6425         not self.op.file_driver in constants.FILE_DRIVER):
6426       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6427                                  self.op.file_driver, errors.ECODE_INVAL)
6428
6429     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6430       raise errors.OpPrereqError("File storage directory path not absolute",
6431                                  errors.ECODE_INVAL)
6432
6433     ### Node/iallocator related checks
6434     if [self.op.iallocator, self.op.pnode].count(None) != 1:
6435       raise errors.OpPrereqError("One and only one of iallocator and primary"
6436                                  " node must be given",
6437                                  errors.ECODE_INVAL)
6438
6439     self._cds = _GetClusterDomainSecret()
6440
6441     if self.op.mode == constants.INSTANCE_IMPORT:
6442       # On import force_variant must be True, because if we forced it at
6443       # initial install, our only chance when importing it back is that it
6444       # works again!
6445       self.op.force_variant = True
6446
6447       if self.op.no_install:
6448         self.LogInfo("No-installation mode has no effect during import")
6449
6450     elif self.op.mode == constants.INSTANCE_CREATE:
6451       if self.op.os_type is None:
6452         raise errors.OpPrereqError("No guest OS specified",
6453                                    errors.ECODE_INVAL)
6454       if self.op.disk_template is None:
6455         raise errors.OpPrereqError("No disk template specified",
6456                                    errors.ECODE_INVAL)
6457
6458     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6459       # Check handshake to ensure both clusters have the same domain secret
6460       src_handshake = self.op.source_handshake
6461       if not src_handshake:
6462         raise errors.OpPrereqError("Missing source handshake",
6463                                    errors.ECODE_INVAL)
6464
6465       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6466                                                            src_handshake)
6467       if errmsg:
6468         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6469                                    errors.ECODE_INVAL)
6470
6471       # Load and check source CA
6472       self.source_x509_ca_pem = self.op.source_x509_ca
6473       if not self.source_x509_ca_pem:
6474         raise errors.OpPrereqError("Missing source X509 CA",
6475                                    errors.ECODE_INVAL)
6476
6477       try:
6478         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6479                                                     self._cds)
6480       except OpenSSL.crypto.Error, err:
6481         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6482                                    (err, ), errors.ECODE_INVAL)
6483
6484       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6485       if errcode is not None:
6486         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6487                                    errors.ECODE_INVAL)
6488
6489       self.source_x509_ca = cert
6490
6491       src_instance_name = self.op.source_instance_name
6492       if not src_instance_name:
6493         raise errors.OpPrereqError("Missing source instance name",
6494                                    errors.ECODE_INVAL)
6495
6496       self.source_instance_name = \
6497         utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6498
6499     else:
6500       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6501                                  self.op.mode, errors.ECODE_INVAL)
6502
6503   def ExpandNames(self):
6504     """ExpandNames for CreateInstance.
6505
6506     Figure out the right locks for instance creation.
6507
6508     """
6509     self.needed_locks = {}
6510
6511     instance_name = self.op.instance_name
6512     # this is just a preventive check, but someone might still add this
6513     # instance in the meantime, and creation will fail at lock-add time
6514     if instance_name in self.cfg.GetInstanceList():
6515       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6516                                  instance_name, errors.ECODE_EXISTS)
6517
6518     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6519
6520     if self.op.iallocator:
6521       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6522     else:
6523       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6524       nodelist = [self.op.pnode]
6525       if self.op.snode is not None:
6526         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6527         nodelist.append(self.op.snode)
6528       self.needed_locks[locking.LEVEL_NODE] = nodelist
6529
6530     # in case of import lock the source node too
6531     if self.op.mode == constants.INSTANCE_IMPORT:
6532       src_node = self.op.src_node
6533       src_path = self.op.src_path
6534
6535       if src_path is None:
6536         self.op.src_path = src_path = self.op.instance_name
6537
6538       if src_node is None:
6539         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6540         self.op.src_node = None
6541         if os.path.isabs(src_path):
6542           raise errors.OpPrereqError("Importing an instance from an absolute"
6543                                      " path requires a source node option.",
6544                                      errors.ECODE_INVAL)
6545       else:
6546         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6547         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6548           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6549         if not os.path.isabs(src_path):
6550           self.op.src_path = src_path = \
6551             utils.PathJoin(constants.EXPORT_DIR, src_path)
6552
6553   def _RunAllocator(self):
6554     """Run the allocator based on input opcode.
6555
6556     """
6557     nics = [n.ToDict() for n in self.nics]
6558     ial = IAllocator(self.cfg, self.rpc,
6559                      mode=constants.IALLOCATOR_MODE_ALLOC,
6560                      name=self.op.instance_name,
6561                      disk_template=self.op.disk_template,
6562                      tags=[],
6563                      os=self.op.os_type,
6564                      vcpus=self.be_full[constants.BE_VCPUS],
6565                      mem_size=self.be_full[constants.BE_MEMORY],
6566                      disks=self.disks,
6567                      nics=nics,
6568                      hypervisor=self.op.hypervisor,
6569                      )
6570
6571     ial.Run(self.op.iallocator)
6572
6573     if not ial.success:
6574       raise errors.OpPrereqError("Can't compute nodes using"
6575                                  " iallocator '%s': %s" %
6576                                  (self.op.iallocator, ial.info),
6577                                  errors.ECODE_NORES)
6578     if len(ial.result) != ial.required_nodes:
6579       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6580                                  " of nodes (%s), required %s" %
6581                                  (self.op.iallocator, len(ial.result),
6582                                   ial.required_nodes), errors.ECODE_FAULT)
6583     self.op.pnode = ial.result[0]
6584     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6585                  self.op.instance_name, self.op.iallocator,
6586                  utils.CommaJoin(ial.result))
6587     if ial.required_nodes == 2:
6588       self.op.snode = ial.result[1]
6589
6590   def BuildHooksEnv(self):
6591     """Build hooks env.
6592
6593     This runs on master, primary and secondary nodes of the instance.
6594
6595     """
6596     env = {
6597       "ADD_MODE": self.op.mode,
6598       }
6599     if self.op.mode == constants.INSTANCE_IMPORT:
6600       env["SRC_NODE"] = self.op.src_node
6601       env["SRC_PATH"] = self.op.src_path
6602       env["SRC_IMAGES"] = self.src_images
6603
6604     env.update(_BuildInstanceHookEnv(
6605       name=self.op.instance_name,
6606       primary_node=self.op.pnode,
6607       secondary_nodes=self.secondaries,
6608       status=self.op.start,
6609       os_type=self.op.os_type,
6610       memory=self.be_full[constants.BE_MEMORY],
6611       vcpus=self.be_full[constants.BE_VCPUS],
6612       nics=_NICListToTuple(self, self.nics),
6613       disk_template=self.op.disk_template,
6614       disks=[(d["size"], d["mode"]) for d in self.disks],
6615       bep=self.be_full,
6616       hvp=self.hv_full,
6617       hypervisor_name=self.op.hypervisor,
6618     ))
6619
6620     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6621           self.secondaries)
6622     return env, nl, nl
6623
6624   def _ReadExportInfo(self):
6625     """Reads the export information from disk.
6626
6627     It will override the opcode source node and path with the actual
6628     information, if these two were not specified before.
6629
6630     @return: the export information
6631
6632     """
6633     assert self.op.mode == constants.INSTANCE_IMPORT
6634
6635     src_node = self.op.src_node
6636     src_path = self.op.src_path
6637
6638     if src_node is None:
6639       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6640       exp_list = self.rpc.call_export_list(locked_nodes)
6641       found = False
6642       for node in exp_list:
6643         if exp_list[node].fail_msg:
6644           continue
6645         if src_path in exp_list[node].payload:
6646           found = True
6647           self.op.src_node = src_node = node
6648           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6649                                                        src_path)
6650           break
6651       if not found:
6652         raise errors.OpPrereqError("No export found for relative path %s" %
6653                                     src_path, errors.ECODE_INVAL)
6654
6655     _CheckNodeOnline(self, src_node)
6656     result = self.rpc.call_export_info(src_node, src_path)
6657     result.Raise("No export or invalid export found in dir %s" % src_path)
6658
6659     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6660     if not export_info.has_section(constants.INISECT_EXP):
6661       raise errors.ProgrammerError("Corrupted export config",
6662                                    errors.ECODE_ENVIRON)
6663
6664     ei_version = export_info.get(constants.INISECT_EXP, "version")
6665     if (int(ei_version) != constants.EXPORT_VERSION):
6666       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6667                                  (ei_version, constants.EXPORT_VERSION),
6668                                  errors.ECODE_ENVIRON)
6669     return export_info
6670
6671   def _ReadExportParams(self, einfo):
6672     """Use export parameters as defaults.
6673
6674     In case the opcode doesn't specify (as in override) some instance
6675     parameters, then try to use them from the export information, if
6676     that declares them.
6677
6678     """
6679     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6680
6681     if self.op.disk_template is None:
6682       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6683         self.op.disk_template = einfo.get(constants.INISECT_INS,
6684                                           "disk_template")
6685       else:
6686         raise errors.OpPrereqError("No disk template specified and the export"
6687                                    " is missing the disk_template information",
6688                                    errors.ECODE_INVAL)
6689
6690     if not self.op.disks:
6691       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6692         disks = []
6693         # TODO: import the disk iv_name too
6694         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6695           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6696           disks.append({"size": disk_sz})
6697         self.op.disks = disks
6698       else:
6699         raise errors.OpPrereqError("No disk info specified and the export"
6700                                    " is missing the disk information",
6701                                    errors.ECODE_INVAL)
6702
6703     if (not self.op.nics and
6704         einfo.has_option(constants.INISECT_INS, "nic_count")):
6705       nics = []
6706       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6707         ndict = {}
6708         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6709           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6710           ndict[name] = v
6711         nics.append(ndict)
6712       self.op.nics = nics
6713
6714     if (self.op.hypervisor is None and
6715         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6716       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6717     if einfo.has_section(constants.INISECT_HYP):
6718       # use the export parameters but do not override the ones
6719       # specified by the user
6720       for name, value in einfo.items(constants.INISECT_HYP):
6721         if name not in self.op.hvparams:
6722           self.op.hvparams[name] = value
6723
6724     if einfo.has_section(constants.INISECT_BEP):
6725       # use the parameters, without overriding
6726       for name, value in einfo.items(constants.INISECT_BEP):
6727         if name not in self.op.beparams:
6728           self.op.beparams[name] = value
6729     else:
6730       # try to read the parameters old style, from the main section
6731       for name in constants.BES_PARAMETERS:
6732         if (name not in self.op.beparams and
6733             einfo.has_option(constants.INISECT_INS, name)):
6734           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6735
6736     if einfo.has_section(constants.INISECT_OSP):
6737       # use the parameters, without overriding
6738       for name, value in einfo.items(constants.INISECT_OSP):
6739         if name not in self.op.osparams:
6740           self.op.osparams[name] = value
6741
6742   def _RevertToDefaults(self, cluster):
6743     """Revert the instance parameters to the default values.
6744
6745     """
6746     # hvparams
6747     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6748     for name in self.op.hvparams.keys():
6749       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6750         del self.op.hvparams[name]
6751     # beparams
6752     be_defs = cluster.SimpleFillBE({})
6753     for name in self.op.beparams.keys():
6754       if name in be_defs and be_defs[name] == self.op.beparams[name]:
6755         del self.op.beparams[name]
6756     # nic params
6757     nic_defs = cluster.SimpleFillNIC({})
6758     for nic in self.op.nics:
6759       for name in constants.NICS_PARAMETERS:
6760         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6761           del nic[name]
6762     # osparams
6763     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6764     for name in self.op.osparams.keys():
6765       if name in os_defs and os_defs[name] == self.op.osparams[name]:
6766         del self.op.osparams[name]
6767
6768   def CheckPrereq(self):
6769     """Check prerequisites.
6770
6771     """
6772     if self.op.mode == constants.INSTANCE_IMPORT:
6773       export_info = self._ReadExportInfo()
6774       self._ReadExportParams(export_info)
6775
6776     _CheckDiskTemplate(self.op.disk_template)
6777
6778     if (not self.cfg.GetVGName() and
6779         self.op.disk_template not in constants.DTS_NOT_LVM):
6780       raise errors.OpPrereqError("Cluster does not support lvm-based"
6781                                  " instances", errors.ECODE_STATE)
6782
6783     if self.op.hypervisor is None:
6784       self.op.hypervisor = self.cfg.GetHypervisorType()
6785
6786     cluster = self.cfg.GetClusterInfo()
6787     enabled_hvs = cluster.enabled_hypervisors
6788     if self.op.hypervisor not in enabled_hvs:
6789       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6790                                  " cluster (%s)" % (self.op.hypervisor,
6791                                   ",".join(enabled_hvs)),
6792                                  errors.ECODE_STATE)
6793
6794     # check hypervisor parameter syntax (locally)
6795     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6796     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6797                                       self.op.hvparams)
6798     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6799     hv_type.CheckParameterSyntax(filled_hvp)
6800     self.hv_full = filled_hvp
6801     # check that we don't specify global parameters on an instance
6802     _CheckGlobalHvParams(self.op.hvparams)
6803
6804     # fill and remember the beparams dict
6805     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6806     self.be_full = cluster.SimpleFillBE(self.op.beparams)
6807
6808     # build os parameters
6809     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6810
6811     # now that hvp/bep are in final format, let's reset to defaults,
6812     # if told to do so
6813     if self.op.identify_defaults:
6814       self._RevertToDefaults(cluster)
6815
6816     # NIC buildup
6817     self.nics = []
6818     for idx, nic in enumerate(self.op.nics):
6819       nic_mode_req = nic.get("mode", None)
6820       nic_mode = nic_mode_req
6821       if nic_mode is None:
6822         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6823
6824       # in routed mode, for the first nic, the default ip is 'auto'
6825       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6826         default_ip_mode = constants.VALUE_AUTO
6827       else:
6828         default_ip_mode = constants.VALUE_NONE
6829
6830       # ip validity checks
6831       ip = nic.get("ip", default_ip_mode)
6832       if ip is None or ip.lower() == constants.VALUE_NONE:
6833         nic_ip = None
6834       elif ip.lower() == constants.VALUE_AUTO:
6835         if not self.op.name_check:
6836           raise errors.OpPrereqError("IP address set to auto but name checks"
6837                                      " have been skipped. Aborting.",
6838                                      errors.ECODE_INVAL)
6839         nic_ip = self.hostname1.ip
6840       else:
6841         if not utils.IsValidIP(ip):
6842           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6843                                      " like a valid IP" % ip,
6844                                      errors.ECODE_INVAL)
6845         nic_ip = ip
6846
6847       # TODO: check the ip address for uniqueness
6848       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6849         raise errors.OpPrereqError("Routed nic mode requires an ip address",
6850                                    errors.ECODE_INVAL)
6851
6852       # MAC address verification
6853       mac = nic.get("mac", constants.VALUE_AUTO)
6854       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6855         mac = utils.NormalizeAndValidateMac(mac)
6856
6857         try:
6858           self.cfg.ReserveMAC(mac, self.proc.GetECId())
6859         except errors.ReservationError:
6860           raise errors.OpPrereqError("MAC address %s already in use"
6861                                      " in cluster" % mac,
6862                                      errors.ECODE_NOTUNIQUE)
6863
6864       # bridge verification
6865       bridge = nic.get("bridge", None)
6866       link = nic.get("link", None)
6867       if bridge and link:
6868         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6869                                    " at the same time", errors.ECODE_INVAL)
6870       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6871         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6872                                    errors.ECODE_INVAL)
6873       elif bridge:
6874         link = bridge
6875
6876       nicparams = {}
6877       if nic_mode_req:
6878         nicparams[constants.NIC_MODE] = nic_mode_req
6879       if link:
6880         nicparams[constants.NIC_LINK] = link
6881
6882       check_params = cluster.SimpleFillNIC(nicparams)
6883       objects.NIC.CheckParameterSyntax(check_params)
6884       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6885
6886     # disk checks/pre-build
6887     self.disks = []
6888     for disk in self.op.disks:
6889       mode = disk.get("mode", constants.DISK_RDWR)
6890       if mode not in constants.DISK_ACCESS_SET:
6891         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6892                                    mode, errors.ECODE_INVAL)
6893       size = disk.get("size", None)
6894       if size is None:
6895         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6896       try:
6897         size = int(size)
6898       except (TypeError, ValueError):
6899         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6900                                    errors.ECODE_INVAL)
6901       new_disk = {"size": size, "mode": mode}
6902       if "adopt" in disk:
6903         new_disk["adopt"] = disk["adopt"]
6904       self.disks.append(new_disk)
6905
6906     if self.op.mode == constants.INSTANCE_IMPORT:
6907
6908       # Check that the new instance doesn't have less disks than the export
6909       instance_disks = len(self.disks)
6910       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6911       if instance_disks < export_disks:
6912         raise errors.OpPrereqError("Not enough disks to import."
6913                                    " (instance: %d, export: %d)" %
6914                                    (instance_disks, export_disks),
6915                                    errors.ECODE_INVAL)
6916
6917       disk_images = []
6918       for idx in range(export_disks):
6919         option = 'disk%d_dump' % idx
6920         if export_info.has_option(constants.INISECT_INS, option):
6921           # FIXME: are the old os-es, disk sizes, etc. useful?
6922           export_name = export_info.get(constants.INISECT_INS, option)
6923           image = utils.PathJoin(self.op.src_path, export_name)
6924           disk_images.append(image)
6925         else:
6926           disk_images.append(False)
6927
6928       self.src_images = disk_images
6929
6930       old_name = export_info.get(constants.INISECT_INS, 'name')
6931       try:
6932         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6933       except (TypeError, ValueError), err:
6934         raise errors.OpPrereqError("Invalid export file, nic_count is not"
6935                                    " an integer: %s" % str(err),
6936                                    errors.ECODE_STATE)
6937       if self.op.instance_name == old_name:
6938         for idx, nic in enumerate(self.nics):
6939           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6940             nic_mac_ini = 'nic%d_mac' % idx
6941             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6942
6943     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6944
6945     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6946     if self.op.ip_check:
6947       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6948         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6949                                    (self.check_ip, self.op.instance_name),
6950                                    errors.ECODE_NOTUNIQUE)
6951
6952     #### mac address generation
6953     # By generating here the mac address both the allocator and the hooks get
6954     # the real final mac address rather than the 'auto' or 'generate' value.
6955     # There is a race condition between the generation and the instance object
6956     # creation, which means that we know the mac is valid now, but we're not
6957     # sure it will be when we actually add the instance. If things go bad
6958     # adding the instance will abort because of a duplicate mac, and the
6959     # creation job will fail.
6960     for nic in self.nics:
6961       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6962         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6963
6964     #### allocator run
6965
6966     if self.op.iallocator is not None:
6967       self._RunAllocator()
6968
6969     #### node related checks
6970
6971     # check primary node
6972     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6973     assert self.pnode is not None, \
6974       "Cannot retrieve locked node %s" % self.op.pnode
6975     if pnode.offline:
6976       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6977                                  pnode.name, errors.ECODE_STATE)
6978     if pnode.drained:
6979       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6980                                  pnode.name, errors.ECODE_STATE)
6981
6982     self.secondaries = []
6983
6984     # mirror node verification
6985     if self.op.disk_template in constants.DTS_NET_MIRROR:
6986       if self.op.snode is None:
6987         raise errors.OpPrereqError("The networked disk templates need"
6988                                    " a mirror node", errors.ECODE_INVAL)
6989       if self.op.snode == pnode.name:
6990         raise errors.OpPrereqError("The secondary node cannot be the"
6991                                    " primary node.", errors.ECODE_INVAL)
6992       _CheckNodeOnline(self, self.op.snode)
6993       _CheckNodeNotDrained(self, self.op.snode)
6994       self.secondaries.append(self.op.snode)
6995
6996     nodenames = [pnode.name] + self.secondaries
6997
6998     req_size = _ComputeDiskSize(self.op.disk_template,
6999                                 self.disks)
7000
7001     # Check lv size requirements, if not adopting
7002     if req_size is not None and not self.adopt_disks:
7003       _CheckNodesFreeDisk(self, nodenames, req_size)
7004
7005     if self.adopt_disks: # instead, we must check the adoption data
7006       all_lvs = set([i["adopt"] for i in self.disks])
7007       if len(all_lvs) != len(self.disks):
7008         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7009                                    errors.ECODE_INVAL)
7010       for lv_name in all_lvs:
7011         try:
7012           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7013         except errors.ReservationError:
7014           raise errors.OpPrereqError("LV named %s used by another instance" %
7015                                      lv_name, errors.ECODE_NOTUNIQUE)
7016
7017       node_lvs = self.rpc.call_lv_list([pnode.name],
7018                                        self.cfg.GetVGName())[pnode.name]
7019       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7020       node_lvs = node_lvs.payload
7021       delta = all_lvs.difference(node_lvs.keys())
7022       if delta:
7023         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7024                                    utils.CommaJoin(delta),
7025                                    errors.ECODE_INVAL)
7026       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7027       if online_lvs:
7028         raise errors.OpPrereqError("Online logical volumes found, cannot"
7029                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7030                                    errors.ECODE_STATE)
7031       # update the size of disk based on what is found
7032       for dsk in self.disks:
7033         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7034
7035     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7036
7037     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7038     # check OS parameters (remotely)
7039     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7040
7041     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7042
7043     # memory check on primary node
7044     if self.op.start:
7045       _CheckNodeFreeMemory(self, self.pnode.name,
7046                            "creating instance %s" % self.op.instance_name,
7047                            self.be_full[constants.BE_MEMORY],
7048                            self.op.hypervisor)
7049
7050     self.dry_run_result = list(nodenames)
7051
7052   def Exec(self, feedback_fn):
7053     """Create and add the instance to the cluster.
7054
7055     """
7056     instance = self.op.instance_name
7057     pnode_name = self.pnode.name
7058
7059     ht_kind = self.op.hypervisor
7060     if ht_kind in constants.HTS_REQ_PORT:
7061       network_port = self.cfg.AllocatePort()
7062     else:
7063       network_port = None
7064
7065     if constants.ENABLE_FILE_STORAGE:
7066       # this is needed because os.path.join does not accept None arguments
7067       if self.op.file_storage_dir is None:
7068         string_file_storage_dir = ""
7069       else:
7070         string_file_storage_dir = self.op.file_storage_dir
7071
7072       # build the full file storage dir path
7073       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7074                                         string_file_storage_dir, instance)
7075     else:
7076       file_storage_dir = ""
7077
7078     disks = _GenerateDiskTemplate(self,
7079                                   self.op.disk_template,
7080                                   instance, pnode_name,
7081                                   self.secondaries,
7082                                   self.disks,
7083                                   file_storage_dir,
7084                                   self.op.file_driver,
7085                                   0)
7086
7087     iobj = objects.Instance(name=instance, os=self.op.os_type,
7088                             primary_node=pnode_name,
7089                             nics=self.nics, disks=disks,
7090                             disk_template=self.op.disk_template,
7091                             admin_up=False,
7092                             network_port=network_port,
7093                             beparams=self.op.beparams,
7094                             hvparams=self.op.hvparams,
7095                             hypervisor=self.op.hypervisor,
7096                             osparams=self.op.osparams,
7097                             )
7098
7099     if self.adopt_disks:
7100       # rename LVs to the newly-generated names; we need to construct
7101       # 'fake' LV disks with the old data, plus the new unique_id
7102       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7103       rename_to = []
7104       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7105         rename_to.append(t_dsk.logical_id)
7106         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7107         self.cfg.SetDiskID(t_dsk, pnode_name)
7108       result = self.rpc.call_blockdev_rename(pnode_name,
7109                                              zip(tmp_disks, rename_to))
7110       result.Raise("Failed to rename adoped LVs")
7111     else:
7112       feedback_fn("* creating instance disks...")
7113       try:
7114         _CreateDisks(self, iobj)
7115       except errors.OpExecError:
7116         self.LogWarning("Device creation failed, reverting...")
7117         try:
7118           _RemoveDisks(self, iobj)
7119         finally:
7120           self.cfg.ReleaseDRBDMinors(instance)
7121           raise
7122
7123     feedback_fn("adding instance %s to cluster config" % instance)
7124
7125     self.cfg.AddInstance(iobj, self.proc.GetECId())
7126
7127     # Declare that we don't want to remove the instance lock anymore, as we've
7128     # added the instance to the config
7129     del self.remove_locks[locking.LEVEL_INSTANCE]
7130     # Unlock all the nodes
7131     if self.op.mode == constants.INSTANCE_IMPORT:
7132       nodes_keep = [self.op.src_node]
7133       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7134                        if node != self.op.src_node]
7135       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7136       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7137     else:
7138       self.context.glm.release(locking.LEVEL_NODE)
7139       del self.acquired_locks[locking.LEVEL_NODE]
7140
7141     if self.op.wait_for_sync:
7142       disk_abort = not _WaitForSync(self, iobj)
7143     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7144       # make sure the disks are not degraded (still sync-ing is ok)
7145       time.sleep(15)
7146       feedback_fn("* checking mirrors status")
7147       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7148     else:
7149       disk_abort = False
7150
7151     if disk_abort:
7152       _RemoveDisks(self, iobj)
7153       self.cfg.RemoveInstance(iobj.name)
7154       # Make sure the instance lock gets removed
7155       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7156       raise errors.OpExecError("There are some degraded disks for"
7157                                " this instance")
7158
7159     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7160       if self.op.mode == constants.INSTANCE_CREATE:
7161         if not self.op.no_install:
7162           feedback_fn("* running the instance OS create scripts...")
7163           # FIXME: pass debug option from opcode to backend
7164           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7165                                                  self.op.debug_level)
7166           result.Raise("Could not add os for instance %s"
7167                        " on node %s" % (instance, pnode_name))
7168
7169       elif self.op.mode == constants.INSTANCE_IMPORT:
7170         feedback_fn("* running the instance OS import scripts...")
7171
7172         transfers = []
7173
7174         for idx, image in enumerate(self.src_images):
7175           if not image:
7176             continue
7177
7178           # FIXME: pass debug option from opcode to backend
7179           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7180                                              constants.IEIO_FILE, (image, ),
7181                                              constants.IEIO_SCRIPT,
7182                                              (iobj.disks[idx], idx),
7183                                              None)
7184           transfers.append(dt)
7185
7186         import_result = \
7187           masterd.instance.TransferInstanceData(self, feedback_fn,
7188                                                 self.op.src_node, pnode_name,
7189                                                 self.pnode.secondary_ip,
7190                                                 iobj, transfers)
7191         if not compat.all(import_result):
7192           self.LogWarning("Some disks for instance %s on node %s were not"
7193                           " imported successfully" % (instance, pnode_name))
7194
7195       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7196         feedback_fn("* preparing remote import...")
7197         connect_timeout = constants.RIE_CONNECT_TIMEOUT
7198         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7199
7200         disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7201                                                      self.source_x509_ca,
7202                                                      self._cds, timeouts)
7203         if not compat.all(disk_results):
7204           # TODO: Should the instance still be started, even if some disks
7205           # failed to import (valid for local imports, too)?
7206           self.LogWarning("Some disks for instance %s on node %s were not"
7207                           " imported successfully" % (instance, pnode_name))
7208
7209         # Run rename script on newly imported instance
7210         assert iobj.name == instance
7211         feedback_fn("Running rename script for %s" % instance)
7212         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7213                                                    self.source_instance_name,
7214                                                    self.op.debug_level)
7215         if result.fail_msg:
7216           self.LogWarning("Failed to run rename script for %s on node"
7217                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7218
7219       else:
7220         # also checked in the prereq part
7221         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7222                                      % self.op.mode)
7223
7224     if self.op.start:
7225       iobj.admin_up = True
7226       self.cfg.Update(iobj, feedback_fn)
7227       logging.info("Starting instance %s on node %s", instance, pnode_name)
7228       feedback_fn("* starting instance...")
7229       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7230       result.Raise("Could not start instance")
7231
7232     return list(iobj.all_nodes)
7233
7234
7235 class LUConnectConsole(NoHooksLU):
7236   """Connect to an instance's console.
7237
7238   This is somewhat special in that it returns the command line that
7239   you need to run on the master node in order to connect to the
7240   console.
7241
7242   """
7243   _OP_REQP = [("instance_name", _TNonEmptyString)]
7244   REQ_BGL = False
7245
7246   def ExpandNames(self):
7247     self._ExpandAndLockInstance()
7248
7249   def CheckPrereq(self):
7250     """Check prerequisites.
7251
7252     This checks that the instance is in the cluster.
7253
7254     """
7255     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7256     assert self.instance is not None, \
7257       "Cannot retrieve locked instance %s" % self.op.instance_name
7258     _CheckNodeOnline(self, self.instance.primary_node)
7259
7260   def Exec(self, feedback_fn):
7261     """Connect to the console of an instance
7262
7263     """
7264     instance = self.instance
7265     node = instance.primary_node
7266
7267     node_insts = self.rpc.call_instance_list([node],
7268                                              [instance.hypervisor])[node]
7269     node_insts.Raise("Can't get node information from %s" % node)
7270
7271     if instance.name not in node_insts.payload:
7272       raise errors.OpExecError("Instance %s is not running." % instance.name)
7273
7274     logging.debug("Connecting to console of %s on %s", instance.name, node)
7275
7276     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7277     cluster = self.cfg.GetClusterInfo()
7278     # beparams and hvparams are passed separately, to avoid editing the
7279     # instance and then saving the defaults in the instance itself.
7280     hvparams = cluster.FillHV(instance)
7281     beparams = cluster.FillBE(instance)
7282     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7283
7284     # build ssh cmdline
7285     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7286
7287
7288 class LUReplaceDisks(LogicalUnit):
7289   """Replace the disks of an instance.
7290
7291   """
7292   HPATH = "mirrors-replace"
7293   HTYPE = constants.HTYPE_INSTANCE
7294   _OP_REQP = [
7295     ("instance_name", _TNonEmptyString),
7296     ("mode", _TElemOf(constants.REPLACE_MODES)),
7297     ("disks", _TListOf(_TPositiveInt)),
7298     ]
7299   _OP_DEFS = [
7300     ("remote_node", None),
7301     ("iallocator", None),
7302     ("early_release", None),
7303     ]
7304   REQ_BGL = False
7305
7306   def CheckArguments(self):
7307     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7308                                   self.op.iallocator)
7309
7310   def ExpandNames(self):
7311     self._ExpandAndLockInstance()
7312
7313     if self.op.iallocator is not None:
7314       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7315
7316     elif self.op.remote_node is not None:
7317       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7318       self.op.remote_node = remote_node
7319
7320       # Warning: do not remove the locking of the new secondary here
7321       # unless DRBD8.AddChildren is changed to work in parallel;
7322       # currently it doesn't since parallel invocations of
7323       # FindUnusedMinor will conflict
7324       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7325       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7326
7327     else:
7328       self.needed_locks[locking.LEVEL_NODE] = []
7329       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7330
7331     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7332                                    self.op.iallocator, self.op.remote_node,
7333                                    self.op.disks, False, self.op.early_release)
7334
7335     self.tasklets = [self.replacer]
7336
7337   def DeclareLocks(self, level):
7338     # If we're not already locking all nodes in the set we have to declare the
7339     # instance's primary/secondary nodes.
7340     if (level == locking.LEVEL_NODE and
7341         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7342       self._LockInstancesNodes()
7343
7344   def BuildHooksEnv(self):
7345     """Build hooks env.
7346
7347     This runs on the master, the primary and all the secondaries.
7348
7349     """
7350     instance = self.replacer.instance
7351     env = {
7352       "MODE": self.op.mode,
7353       "NEW_SECONDARY": self.op.remote_node,
7354       "OLD_SECONDARY": instance.secondary_nodes[0],
7355       }
7356     env.update(_BuildInstanceHookEnvByObject(self, instance))
7357     nl = [
7358       self.cfg.GetMasterNode(),
7359       instance.primary_node,
7360       ]
7361     if self.op.remote_node is not None:
7362       nl.append(self.op.remote_node)
7363     return env, nl, nl
7364
7365
7366 class TLReplaceDisks(Tasklet):
7367   """Replaces disks for an instance.
7368
7369   Note: Locking is not within the scope of this class.
7370
7371   """
7372   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7373                disks, delay_iallocator, early_release):
7374     """Initializes this class.
7375
7376     """
7377     Tasklet.__init__(self, lu)
7378
7379     # Parameters
7380     self.instance_name = instance_name
7381     self.mode = mode
7382     self.iallocator_name = iallocator_name
7383     self.remote_node = remote_node
7384     self.disks = disks
7385     self.delay_iallocator = delay_iallocator
7386     self.early_release = early_release
7387
7388     # Runtime data
7389     self.instance = None
7390     self.new_node = None
7391     self.target_node = None
7392     self.other_node = None
7393     self.remote_node_info = None
7394     self.node_secondary_ip = None
7395
7396   @staticmethod
7397   def CheckArguments(mode, remote_node, iallocator):
7398     """Helper function for users of this class.
7399
7400     """
7401     # check for valid parameter combination
7402     if mode == constants.REPLACE_DISK_CHG:
7403       if remote_node is None and iallocator is None:
7404         raise errors.OpPrereqError("When changing the secondary either an"
7405                                    " iallocator script must be used or the"
7406                                    " new node given", errors.ECODE_INVAL)
7407
7408       if remote_node is not None and iallocator is not None:
7409         raise errors.OpPrereqError("Give either the iallocator or the new"
7410                                    " secondary, not both", errors.ECODE_INVAL)
7411
7412     elif remote_node is not None or iallocator is not None:
7413       # Not replacing the secondary
7414       raise errors.OpPrereqError("The iallocator and new node options can"
7415                                  " only be used when changing the"
7416                                  " secondary node", errors.ECODE_INVAL)
7417
7418   @staticmethod
7419   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7420     """Compute a new secondary node using an IAllocator.
7421
7422     """
7423     ial = IAllocator(lu.cfg, lu.rpc,
7424                      mode=constants.IALLOCATOR_MODE_RELOC,
7425                      name=instance_name,
7426                      relocate_from=relocate_from)
7427
7428     ial.Run(iallocator_name)
7429
7430     if not ial.success:
7431       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7432                                  " %s" % (iallocator_name, ial.info),
7433                                  errors.ECODE_NORES)
7434
7435     if len(ial.result) != ial.required_nodes:
7436       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7437                                  " of nodes (%s), required %s" %
7438                                  (iallocator_name,
7439                                   len(ial.result), ial.required_nodes),
7440                                  errors.ECODE_FAULT)
7441
7442     remote_node_name = ial.result[0]
7443
7444     lu.LogInfo("Selected new secondary for instance '%s': %s",
7445                instance_name, remote_node_name)
7446
7447     return remote_node_name
7448
7449   def _FindFaultyDisks(self, node_name):
7450     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7451                                     node_name, True)
7452
7453   def CheckPrereq(self):
7454     """Check prerequisites.
7455
7456     This checks that the instance is in the cluster.
7457
7458     """
7459     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7460     assert instance is not None, \
7461       "Cannot retrieve locked instance %s" % self.instance_name
7462
7463     if instance.disk_template != constants.DT_DRBD8:
7464       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7465                                  " instances", errors.ECODE_INVAL)
7466
7467     if len(instance.secondary_nodes) != 1:
7468       raise errors.OpPrereqError("The instance has a strange layout,"
7469                                  " expected one secondary but found %d" %
7470                                  len(instance.secondary_nodes),
7471                                  errors.ECODE_FAULT)
7472
7473     if not self.delay_iallocator:
7474       self._CheckPrereq2()
7475
7476   def _CheckPrereq2(self):
7477     """Check prerequisites, second part.
7478
7479     This function should always be part of CheckPrereq. It was separated and is
7480     now called from Exec because during node evacuation iallocator was only
7481     called with an unmodified cluster model, not taking planned changes into
7482     account.
7483
7484     """
7485     instance = self.instance
7486     secondary_node = instance.secondary_nodes[0]
7487
7488     if self.iallocator_name is None:
7489       remote_node = self.remote_node
7490     else:
7491       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7492                                        instance.name, instance.secondary_nodes)
7493
7494     if remote_node is not None:
7495       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7496       assert self.remote_node_info is not None, \
7497         "Cannot retrieve locked node %s" % remote_node
7498     else:
7499       self.remote_node_info = None
7500
7501     if remote_node == self.instance.primary_node:
7502       raise errors.OpPrereqError("The specified node is the primary node of"
7503                                  " the instance.", errors.ECODE_INVAL)
7504
7505     if remote_node == secondary_node:
7506       raise errors.OpPrereqError("The specified node is already the"
7507                                  " secondary node of the instance.",
7508                                  errors.ECODE_INVAL)
7509
7510     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7511                                     constants.REPLACE_DISK_CHG):
7512       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7513                                  errors.ECODE_INVAL)
7514
7515     if self.mode == constants.REPLACE_DISK_AUTO:
7516       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7517       faulty_secondary = self._FindFaultyDisks(secondary_node)
7518
7519       if faulty_primary and faulty_secondary:
7520         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7521                                    " one node and can not be repaired"
7522                                    " automatically" % self.instance_name,
7523                                    errors.ECODE_STATE)
7524
7525       if faulty_primary:
7526         self.disks = faulty_primary
7527         self.target_node = instance.primary_node
7528         self.other_node = secondary_node
7529         check_nodes = [self.target_node, self.other_node]
7530       elif faulty_secondary:
7531         self.disks = faulty_secondary
7532         self.target_node = secondary_node
7533         self.other_node = instance.primary_node
7534         check_nodes = [self.target_node, self.other_node]
7535       else:
7536         self.disks = []
7537         check_nodes = []
7538
7539     else:
7540       # Non-automatic modes
7541       if self.mode == constants.REPLACE_DISK_PRI:
7542         self.target_node = instance.primary_node
7543         self.other_node = secondary_node
7544         check_nodes = [self.target_node, self.other_node]
7545
7546       elif self.mode == constants.REPLACE_DISK_SEC:
7547         self.target_node = secondary_node
7548         self.other_node = instance.primary_node
7549         check_nodes = [self.target_node, self.other_node]
7550
7551       elif self.mode == constants.REPLACE_DISK_CHG:
7552         self.new_node = remote_node
7553         self.other_node = instance.primary_node
7554         self.target_node = secondary_node
7555         check_nodes = [self.new_node, self.other_node]
7556
7557         _CheckNodeNotDrained(self.lu, remote_node)
7558
7559         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7560         assert old_node_info is not None
7561         if old_node_info.offline and not self.early_release:
7562           # doesn't make sense to delay the release
7563           self.early_release = True
7564           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7565                           " early-release mode", secondary_node)
7566
7567       else:
7568         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7569                                      self.mode)
7570
7571       # If not specified all disks should be replaced
7572       if not self.disks:
7573         self.disks = range(len(self.instance.disks))
7574
7575     for node in check_nodes:
7576       _CheckNodeOnline(self.lu, node)
7577
7578     # Check whether disks are valid
7579     for disk_idx in self.disks:
7580       instance.FindDisk(disk_idx)
7581
7582     # Get secondary node IP addresses
7583     node_2nd_ip = {}
7584
7585     for node_name in [self.target_node, self.other_node, self.new_node]:
7586       if node_name is not None:
7587         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7588
7589     self.node_secondary_ip = node_2nd_ip
7590
7591   def Exec(self, feedback_fn):
7592     """Execute disk replacement.
7593
7594     This dispatches the disk replacement to the appropriate handler.
7595
7596     """
7597     if self.delay_iallocator:
7598       self._CheckPrereq2()
7599
7600     if not self.disks:
7601       feedback_fn("No disks need replacement")
7602       return
7603
7604     feedback_fn("Replacing disk(s) %s for %s" %
7605                 (utils.CommaJoin(self.disks), self.instance.name))
7606
7607     activate_disks = (not self.instance.admin_up)
7608
7609     # Activate the instance disks if we're replacing them on a down instance
7610     if activate_disks:
7611       _StartInstanceDisks(self.lu, self.instance, True)
7612
7613     try:
7614       # Should we replace the secondary node?
7615       if self.new_node is not None:
7616         fn = self._ExecDrbd8Secondary
7617       else:
7618         fn = self._ExecDrbd8DiskOnly
7619
7620       return fn(feedback_fn)
7621
7622     finally:
7623       # Deactivate the instance disks if we're replacing them on a
7624       # down instance
7625       if activate_disks:
7626         _SafeShutdownInstanceDisks(self.lu, self.instance)
7627
7628   def _CheckVolumeGroup(self, nodes):
7629     self.lu.LogInfo("Checking volume groups")
7630
7631     vgname = self.cfg.GetVGName()
7632
7633     # Make sure volume group exists on all involved nodes
7634     results = self.rpc.call_vg_list(nodes)
7635     if not results:
7636       raise errors.OpExecError("Can't list volume groups on the nodes")
7637
7638     for node in nodes:
7639       res = results[node]
7640       res.Raise("Error checking node %s" % node)
7641       if vgname not in res.payload:
7642         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7643                                  (vgname, node))
7644
7645   def _CheckDisksExistence(self, nodes):
7646     # Check disk existence
7647     for idx, dev in enumerate(self.instance.disks):
7648       if idx not in self.disks:
7649         continue
7650
7651       for node in nodes:
7652         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7653         self.cfg.SetDiskID(dev, node)
7654
7655         result = self.rpc.call_blockdev_find(node, dev)
7656
7657         msg = result.fail_msg
7658         if msg or not result.payload:
7659           if not msg:
7660             msg = "disk not found"
7661           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7662                                    (idx, node, msg))
7663
7664   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7665     for idx, dev in enumerate(self.instance.disks):
7666       if idx not in self.disks:
7667         continue
7668
7669       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7670                       (idx, node_name))
7671
7672       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7673                                    ldisk=ldisk):
7674         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7675                                  " replace disks for instance %s" %
7676                                  (node_name, self.instance.name))
7677
7678   def _CreateNewStorage(self, node_name):
7679     vgname = self.cfg.GetVGName()
7680     iv_names = {}
7681
7682     for idx, dev in enumerate(self.instance.disks):
7683       if idx not in self.disks:
7684         continue
7685
7686       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7687
7688       self.cfg.SetDiskID(dev, node_name)
7689
7690       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7691       names = _GenerateUniqueNames(self.lu, lv_names)
7692
7693       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7694                              logical_id=(vgname, names[0]))
7695       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7696                              logical_id=(vgname, names[1]))
7697
7698       new_lvs = [lv_data, lv_meta]
7699       old_lvs = dev.children
7700       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7701
7702       # we pass force_create=True to force the LVM creation
7703       for new_lv in new_lvs:
7704         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7705                         _GetInstanceInfoText(self.instance), False)
7706
7707     return iv_names
7708
7709   def _CheckDevices(self, node_name, iv_names):
7710     for name, (dev, _, _) in iv_names.iteritems():
7711       self.cfg.SetDiskID(dev, node_name)
7712
7713       result = self.rpc.call_blockdev_find(node_name, dev)
7714
7715       msg = result.fail_msg
7716       if msg or not result.payload:
7717         if not msg:
7718           msg = "disk not found"
7719         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7720                                  (name, msg))
7721
7722       if result.payload.is_degraded:
7723         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7724
7725   def _RemoveOldStorage(self, node_name, iv_names):
7726     for name, (_, old_lvs, _) in iv_names.iteritems():
7727       self.lu.LogInfo("Remove logical volumes for %s" % name)
7728
7729       for lv in old_lvs:
7730         self.cfg.SetDiskID(lv, node_name)
7731
7732         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7733         if msg:
7734           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7735                              hint="remove unused LVs manually")
7736
7737   def _ReleaseNodeLock(self, node_name):
7738     """Releases the lock for a given node."""
7739     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7740
7741   def _ExecDrbd8DiskOnly(self, feedback_fn):
7742     """Replace a disk on the primary or secondary for DRBD 8.
7743
7744     The algorithm for replace is quite complicated:
7745
7746       1. for each disk to be replaced:
7747
7748         1. create new LVs on the target node with unique names
7749         1. detach old LVs from the drbd device
7750         1. rename old LVs to name_replaced.<time_t>
7751         1. rename new LVs to old LVs
7752         1. attach the new LVs (with the old names now) to the drbd device
7753
7754       1. wait for sync across all devices
7755
7756       1. for each modified disk:
7757
7758         1. remove old LVs (which have the name name_replaces.<time_t>)
7759
7760     Failures are not very well handled.
7761
7762     """
7763     steps_total = 6
7764
7765     # Step: check device activation
7766     self.lu.LogStep(1, steps_total, "Check device existence")
7767     self._CheckDisksExistence([self.other_node, self.target_node])
7768     self._CheckVolumeGroup([self.target_node, self.other_node])
7769
7770     # Step: check other node consistency
7771     self.lu.LogStep(2, steps_total, "Check peer consistency")
7772     self._CheckDisksConsistency(self.other_node,
7773                                 self.other_node == self.instance.primary_node,
7774                                 False)
7775
7776     # Step: create new storage
7777     self.lu.LogStep(3, steps_total, "Allocate new storage")
7778     iv_names = self._CreateNewStorage(self.target_node)
7779
7780     # Step: for each lv, detach+rename*2+attach
7781     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7782     for dev, old_lvs, new_lvs in iv_names.itervalues():
7783       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7784
7785       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7786                                                      old_lvs)
7787       result.Raise("Can't detach drbd from local storage on node"
7788                    " %s for device %s" % (self.target_node, dev.iv_name))
7789       #dev.children = []
7790       #cfg.Update(instance)
7791
7792       # ok, we created the new LVs, so now we know we have the needed
7793       # storage; as such, we proceed on the target node to rename
7794       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7795       # using the assumption that logical_id == physical_id (which in
7796       # turn is the unique_id on that node)
7797
7798       # FIXME(iustin): use a better name for the replaced LVs
7799       temp_suffix = int(time.time())
7800       ren_fn = lambda d, suff: (d.physical_id[0],
7801                                 d.physical_id[1] + "_replaced-%s" % suff)
7802
7803       # Build the rename list based on what LVs exist on the node
7804       rename_old_to_new = []
7805       for to_ren in old_lvs:
7806         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7807         if not result.fail_msg and result.payload:
7808           # device exists
7809           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7810
7811       self.lu.LogInfo("Renaming the old LVs on the target node")
7812       result = self.rpc.call_blockdev_rename(self.target_node,
7813                                              rename_old_to_new)
7814       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7815
7816       # Now we rename the new LVs to the old LVs
7817       self.lu.LogInfo("Renaming the new LVs on the target node")
7818       rename_new_to_old = [(new, old.physical_id)
7819                            for old, new in zip(old_lvs, new_lvs)]
7820       result = self.rpc.call_blockdev_rename(self.target_node,
7821                                              rename_new_to_old)
7822       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7823
7824       for old, new in zip(old_lvs, new_lvs):
7825         new.logical_id = old.logical_id
7826         self.cfg.SetDiskID(new, self.target_node)
7827
7828       for disk in old_lvs:
7829         disk.logical_id = ren_fn(disk, temp_suffix)
7830         self.cfg.SetDiskID(disk, self.target_node)
7831
7832       # Now that the new lvs have the old name, we can add them to the device
7833       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7834       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7835                                                   new_lvs)
7836       msg = result.fail_msg
7837       if msg:
7838         for new_lv in new_lvs:
7839           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7840                                                new_lv).fail_msg
7841           if msg2:
7842             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7843                                hint=("cleanup manually the unused logical"
7844                                      "volumes"))
7845         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7846
7847       dev.children = new_lvs
7848
7849       self.cfg.Update(self.instance, feedback_fn)
7850
7851     cstep = 5
7852     if self.early_release:
7853       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7854       cstep += 1
7855       self._RemoveOldStorage(self.target_node, iv_names)
7856       # WARNING: we release both node locks here, do not do other RPCs
7857       # than WaitForSync to the primary node
7858       self._ReleaseNodeLock([self.target_node, self.other_node])
7859
7860     # Wait for sync
7861     # This can fail as the old devices are degraded and _WaitForSync
7862     # does a combined result over all disks, so we don't check its return value
7863     self.lu.LogStep(cstep, steps_total, "Sync devices")
7864     cstep += 1
7865     _WaitForSync(self.lu, self.instance)
7866
7867     # Check all devices manually
7868     self._CheckDevices(self.instance.primary_node, iv_names)
7869
7870     # Step: remove old storage
7871     if not self.early_release:
7872       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7873       cstep += 1
7874       self._RemoveOldStorage(self.target_node, iv_names)
7875
7876   def _ExecDrbd8Secondary(self, feedback_fn):
7877     """Replace the secondary node for DRBD 8.
7878
7879     The algorithm for replace is quite complicated:
7880       - for all disks of the instance:
7881         - create new LVs on the new node with same names
7882         - shutdown the drbd device on the old secondary
7883         - disconnect the drbd network on the primary
7884         - create the drbd device on the new secondary
7885         - network attach the drbd on the primary, using an artifice:
7886           the drbd code for Attach() will connect to the network if it
7887           finds a device which is connected to the good local disks but
7888           not network enabled
7889       - wait for sync across all devices
7890       - remove all disks from the old secondary
7891
7892     Failures are not very well handled.
7893
7894     """
7895     steps_total = 6
7896
7897     # Step: check device activation
7898     self.lu.LogStep(1, steps_total, "Check device existence")
7899     self._CheckDisksExistence([self.instance.primary_node])
7900     self._CheckVolumeGroup([self.instance.primary_node])
7901
7902     # Step: check other node consistency
7903     self.lu.LogStep(2, steps_total, "Check peer consistency")
7904     self._CheckDisksConsistency(self.instance.primary_node, True, True)
7905
7906     # Step: create new storage
7907     self.lu.LogStep(3, steps_total, "Allocate new storage")
7908     for idx, dev in enumerate(self.instance.disks):
7909       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7910                       (self.new_node, idx))
7911       # we pass force_create=True to force LVM creation
7912       for new_lv in dev.children:
7913         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7914                         _GetInstanceInfoText(self.instance), False)
7915
7916     # Step 4: dbrd minors and drbd setups changes
7917     # after this, we must manually remove the drbd minors on both the
7918     # error and the success paths
7919     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7920     minors = self.cfg.AllocateDRBDMinor([self.new_node
7921                                          for dev in self.instance.disks],
7922                                         self.instance.name)
7923     logging.debug("Allocated minors %r", minors)
7924
7925     iv_names = {}
7926     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7927       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7928                       (self.new_node, idx))
7929       # create new devices on new_node; note that we create two IDs:
7930       # one without port, so the drbd will be activated without
7931       # networking information on the new node at this stage, and one
7932       # with network, for the latter activation in step 4
7933       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7934       if self.instance.primary_node == o_node1:
7935         p_minor = o_minor1
7936       else:
7937         assert self.instance.primary_node == o_node2, "Three-node instance?"
7938         p_minor = o_minor2
7939
7940       new_alone_id = (self.instance.primary_node, self.new_node, None,
7941                       p_minor, new_minor, o_secret)
7942       new_net_id = (self.instance.primary_node, self.new_node, o_port,
7943                     p_minor, new_minor, o_secret)
7944
7945       iv_names[idx] = (dev, dev.children, new_net_id)
7946       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7947                     new_net_id)
7948       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7949                               logical_id=new_alone_id,
7950                               children=dev.children,
7951                               size=dev.size)
7952       try:
7953         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7954                               _GetInstanceInfoText(self.instance), False)
7955       except errors.GenericError:
7956         self.cfg.ReleaseDRBDMinors(self.instance.name)
7957         raise
7958
7959     # We have new devices, shutdown the drbd on the old secondary
7960     for idx, dev in enumerate(self.instance.disks):
7961       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7962       self.cfg.SetDiskID(dev, self.target_node)
7963       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7964       if msg:
7965         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7966                            "node: %s" % (idx, msg),
7967                            hint=("Please cleanup this device manually as"
7968                                  " soon as possible"))
7969
7970     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7971     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7972                                                self.node_secondary_ip,
7973                                                self.instance.disks)\
7974                                               [self.instance.primary_node]
7975
7976     msg = result.fail_msg
7977     if msg:
7978       # detaches didn't succeed (unlikely)
7979       self.cfg.ReleaseDRBDMinors(self.instance.name)
7980       raise errors.OpExecError("Can't detach the disks from the network on"
7981                                " old node: %s" % (msg,))
7982
7983     # if we managed to detach at least one, we update all the disks of
7984     # the instance to point to the new secondary
7985     self.lu.LogInfo("Updating instance configuration")
7986     for dev, _, new_logical_id in iv_names.itervalues():
7987       dev.logical_id = new_logical_id
7988       self.cfg.SetDiskID(dev, self.instance.primary_node)
7989
7990     self.cfg.Update(self.instance, feedback_fn)
7991
7992     # and now perform the drbd attach
7993     self.lu.LogInfo("Attaching primary drbds to new secondary"
7994                     " (standalone => connected)")
7995     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7996                                             self.new_node],
7997                                            self.node_secondary_ip,
7998                                            self.instance.disks,
7999                                            self.instance.name,
8000                                            False)
8001     for to_node, to_result in result.items():
8002       msg = to_result.fail_msg
8003       if msg:
8004         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8005                            to_node, msg,
8006                            hint=("please do a gnt-instance info to see the"
8007                                  " status of disks"))
8008     cstep = 5
8009     if self.early_release:
8010       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8011       cstep += 1
8012       self._RemoveOldStorage(self.target_node, iv_names)
8013       # WARNING: we release all node locks here, do not do other RPCs
8014       # than WaitForSync to the primary node
8015       self._ReleaseNodeLock([self.instance.primary_node,
8016                              self.target_node,
8017                              self.new_node])
8018
8019     # Wait for sync
8020     # This can fail as the old devices are degraded and _WaitForSync
8021     # does a combined result over all disks, so we don't check its return value
8022     self.lu.LogStep(cstep, steps_total, "Sync devices")
8023     cstep += 1
8024     _WaitForSync(self.lu, self.instance)
8025
8026     # Check all devices manually
8027     self._CheckDevices(self.instance.primary_node, iv_names)
8028
8029     # Step: remove old storage
8030     if not self.early_release:
8031       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8032       self._RemoveOldStorage(self.target_node, iv_names)
8033
8034
8035 class LURepairNodeStorage(NoHooksLU):
8036   """Repairs the volume group on a node.
8037
8038   """
8039   _OP_REQP = [("node_name", _TNonEmptyString)]
8040   REQ_BGL = False
8041
8042   def CheckArguments(self):
8043     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8044
8045     _CheckStorageType(self.op.storage_type)
8046
8047     storage_type = self.op.storage_type
8048
8049     if (constants.SO_FIX_CONSISTENCY not in
8050         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8051       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8052                                  " repaired" % storage_type,
8053                                  errors.ECODE_INVAL)
8054
8055   def ExpandNames(self):
8056     self.needed_locks = {
8057       locking.LEVEL_NODE: [self.op.node_name],
8058       }
8059
8060   def _CheckFaultyDisks(self, instance, node_name):
8061     """Ensure faulty disks abort the opcode or at least warn."""
8062     try:
8063       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8064                                   node_name, True):
8065         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8066                                    " node '%s'" % (instance.name, node_name),
8067                                    errors.ECODE_STATE)
8068     except errors.OpPrereqError, err:
8069       if self.op.ignore_consistency:
8070         self.proc.LogWarning(str(err.args[0]))
8071       else:
8072         raise
8073
8074   def CheckPrereq(self):
8075     """Check prerequisites.
8076
8077     """
8078     # Check whether any instance on this node has faulty disks
8079     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8080       if not inst.admin_up:
8081         continue
8082       check_nodes = set(inst.all_nodes)
8083       check_nodes.discard(self.op.node_name)
8084       for inst_node_name in check_nodes:
8085         self._CheckFaultyDisks(inst, inst_node_name)
8086
8087   def Exec(self, feedback_fn):
8088     feedback_fn("Repairing storage unit '%s' on %s ..." %
8089                 (self.op.name, self.op.node_name))
8090
8091     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8092     result = self.rpc.call_storage_execute(self.op.node_name,
8093                                            self.op.storage_type, st_args,
8094                                            self.op.name,
8095                                            constants.SO_FIX_CONSISTENCY)
8096     result.Raise("Failed to repair storage unit '%s' on %s" %
8097                  (self.op.name, self.op.node_name))
8098
8099
8100 class LUNodeEvacuationStrategy(NoHooksLU):
8101   """Computes the node evacuation strategy.
8102
8103   """
8104   _OP_REQP = [("nodes", _TListOf(_TNonEmptyString))]
8105   _OP_DEFS = [
8106     ("remote_node", None),
8107     ("iallocator", None),
8108     ]
8109   REQ_BGL = False
8110
8111   def CheckArguments(self):
8112     if self.op.remote_node is not None and self.op.iallocator is not None:
8113       raise errors.OpPrereqError("Give either the iallocator or the new"
8114                                  " secondary, not both", errors.ECODE_INVAL)
8115
8116   def ExpandNames(self):
8117     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8118     self.needed_locks = locks = {}
8119     if self.op.remote_node is None:
8120       locks[locking.LEVEL_NODE] = locking.ALL_SET
8121     else:
8122       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8123       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8124
8125   def Exec(self, feedback_fn):
8126     if self.op.remote_node is not None:
8127       instances = []
8128       for node in self.op.nodes:
8129         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8130       result = []
8131       for i in instances:
8132         if i.primary_node == self.op.remote_node:
8133           raise errors.OpPrereqError("Node %s is the primary node of"
8134                                      " instance %s, cannot use it as"
8135                                      " secondary" %
8136                                      (self.op.remote_node, i.name),
8137                                      errors.ECODE_INVAL)
8138         result.append([i.name, self.op.remote_node])
8139     else:
8140       ial = IAllocator(self.cfg, self.rpc,
8141                        mode=constants.IALLOCATOR_MODE_MEVAC,
8142                        evac_nodes=self.op.nodes)
8143       ial.Run(self.op.iallocator, validate=True)
8144       if not ial.success:
8145         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8146                                  errors.ECODE_NORES)
8147       result = ial.result
8148     return result
8149
8150
8151 class LUGrowDisk(LogicalUnit):
8152   """Grow a disk of an instance.
8153
8154   """
8155   HPATH = "disk-grow"
8156   HTYPE = constants.HTYPE_INSTANCE
8157   _OP_REQP = [
8158     ("instance_name", _TNonEmptyString),
8159     ("disk", _TInt),
8160     ("amount", _TInt),
8161     ("wait_for_sync", _TBool),
8162     ]
8163   REQ_BGL = False
8164
8165   def ExpandNames(self):
8166     self._ExpandAndLockInstance()
8167     self.needed_locks[locking.LEVEL_NODE] = []
8168     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8169
8170   def DeclareLocks(self, level):
8171     if level == locking.LEVEL_NODE:
8172       self._LockInstancesNodes()
8173
8174   def BuildHooksEnv(self):
8175     """Build hooks env.
8176
8177     This runs on the master, the primary and all the secondaries.
8178
8179     """
8180     env = {
8181       "DISK": self.op.disk,
8182       "AMOUNT": self.op.amount,
8183       }
8184     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8185     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8186     return env, nl, nl
8187
8188   def CheckPrereq(self):
8189     """Check prerequisites.
8190
8191     This checks that the instance is in the cluster.
8192
8193     """
8194     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8195     assert instance is not None, \
8196       "Cannot retrieve locked instance %s" % self.op.instance_name
8197     nodenames = list(instance.all_nodes)
8198     for node in nodenames:
8199       _CheckNodeOnline(self, node)
8200
8201     self.instance = instance
8202
8203     if instance.disk_template not in constants.DTS_GROWABLE:
8204       raise errors.OpPrereqError("Instance's disk layout does not support"
8205                                  " growing.", errors.ECODE_INVAL)
8206
8207     self.disk = instance.FindDisk(self.op.disk)
8208
8209     if instance.disk_template != constants.DT_FILE:
8210       # TODO: check the free disk space for file, when that feature will be
8211       # supported
8212       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8213
8214   def Exec(self, feedback_fn):
8215     """Execute disk grow.
8216
8217     """
8218     instance = self.instance
8219     disk = self.disk
8220
8221     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8222     if not disks_ok:
8223       raise errors.OpExecError("Cannot activate block device to grow")
8224
8225     for node in instance.all_nodes:
8226       self.cfg.SetDiskID(disk, node)
8227       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8228       result.Raise("Grow request failed to node %s" % node)
8229
8230       # TODO: Rewrite code to work properly
8231       # DRBD goes into sync mode for a short amount of time after executing the
8232       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8233       # calling "resize" in sync mode fails. Sleeping for a short amount of
8234       # time is a work-around.
8235       time.sleep(5)
8236
8237     disk.RecordGrow(self.op.amount)
8238     self.cfg.Update(instance, feedback_fn)
8239     if self.op.wait_for_sync:
8240       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8241       if disk_abort:
8242         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8243                              " status.\nPlease check the instance.")
8244       if not instance.admin_up:
8245         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8246     elif not instance.admin_up:
8247       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8248                            " not supposed to be running because no wait for"
8249                            " sync mode was requested.")
8250
8251
8252 class LUQueryInstanceData(NoHooksLU):
8253   """Query runtime instance data.
8254
8255   """
8256   _OP_REQP = [
8257     ("instances", _TListOf(_TNonEmptyString)),
8258     ("static", _TBool),
8259     ]
8260   REQ_BGL = False
8261
8262   def ExpandNames(self):
8263     self.needed_locks = {}
8264     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8265
8266     if self.op.instances:
8267       self.wanted_names = []
8268       for name in self.op.instances:
8269         full_name = _ExpandInstanceName(self.cfg, name)
8270         self.wanted_names.append(full_name)
8271       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8272     else:
8273       self.wanted_names = None
8274       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8275
8276     self.needed_locks[locking.LEVEL_NODE] = []
8277     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8278
8279   def DeclareLocks(self, level):
8280     if level == locking.LEVEL_NODE:
8281       self._LockInstancesNodes()
8282
8283   def CheckPrereq(self):
8284     """Check prerequisites.
8285
8286     This only checks the optional instance list against the existing names.
8287
8288     """
8289     if self.wanted_names is None:
8290       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8291
8292     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8293                              in self.wanted_names]
8294
8295   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8296     """Returns the status of a block device
8297
8298     """
8299     if self.op.static or not node:
8300       return None
8301
8302     self.cfg.SetDiskID(dev, node)
8303
8304     result = self.rpc.call_blockdev_find(node, dev)
8305     if result.offline:
8306       return None
8307
8308     result.Raise("Can't compute disk status for %s" % instance_name)
8309
8310     status = result.payload
8311     if status is None:
8312       return None
8313
8314     return (status.dev_path, status.major, status.minor,
8315             status.sync_percent, status.estimated_time,
8316             status.is_degraded, status.ldisk_status)
8317
8318   def _ComputeDiskStatus(self, instance, snode, dev):
8319     """Compute block device status.
8320
8321     """
8322     if dev.dev_type in constants.LDS_DRBD:
8323       # we change the snode then (otherwise we use the one passed in)
8324       if dev.logical_id[0] == instance.primary_node:
8325         snode = dev.logical_id[1]
8326       else:
8327         snode = dev.logical_id[0]
8328
8329     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8330                                               instance.name, dev)
8331     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8332
8333     if dev.children:
8334       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8335                       for child in dev.children]
8336     else:
8337       dev_children = []
8338
8339     data = {
8340       "iv_name": dev.iv_name,
8341       "dev_type": dev.dev_type,
8342       "logical_id": dev.logical_id,
8343       "physical_id": dev.physical_id,
8344       "pstatus": dev_pstatus,
8345       "sstatus": dev_sstatus,
8346       "children": dev_children,
8347       "mode": dev.mode,
8348       "size": dev.size,
8349       }
8350
8351     return data
8352
8353   def Exec(self, feedback_fn):
8354     """Gather and return data"""
8355     result = {}
8356
8357     cluster = self.cfg.GetClusterInfo()
8358
8359     for instance in self.wanted_instances:
8360       if not self.op.static:
8361         remote_info = self.rpc.call_instance_info(instance.primary_node,
8362                                                   instance.name,
8363                                                   instance.hypervisor)
8364         remote_info.Raise("Error checking node %s" % instance.primary_node)
8365         remote_info = remote_info.payload
8366         if remote_info and "state" in remote_info:
8367           remote_state = "up"
8368         else:
8369           remote_state = "down"
8370       else:
8371         remote_state = None
8372       if instance.admin_up:
8373         config_state = "up"
8374       else:
8375         config_state = "down"
8376
8377       disks = [self._ComputeDiskStatus(instance, None, device)
8378                for device in instance.disks]
8379
8380       idict = {
8381         "name": instance.name,
8382         "config_state": config_state,
8383         "run_state": remote_state,
8384         "pnode": instance.primary_node,
8385         "snodes": instance.secondary_nodes,
8386         "os": instance.os,
8387         # this happens to be the same format used for hooks
8388         "nics": _NICListToTuple(self, instance.nics),
8389         "disk_template": instance.disk_template,
8390         "disks": disks,
8391         "hypervisor": instance.hypervisor,
8392         "network_port": instance.network_port,
8393         "hv_instance": instance.hvparams,
8394         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8395         "be_instance": instance.beparams,
8396         "be_actual": cluster.FillBE(instance),
8397         "os_instance": instance.osparams,
8398         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8399         "serial_no": instance.serial_no,
8400         "mtime": instance.mtime,
8401         "ctime": instance.ctime,
8402         "uuid": instance.uuid,
8403         }
8404
8405       result[instance.name] = idict
8406
8407     return result
8408
8409
8410 class LUSetInstanceParams(LogicalUnit):
8411   """Modifies an instances's parameters.
8412
8413   """
8414   HPATH = "instance-modify"
8415   HTYPE = constants.HTYPE_INSTANCE
8416   _OP_REQP = [("instance_name", _TNonEmptyString)]
8417   _OP_DEFS = [
8418     ("nics", _EmptyList),
8419     ("disks", _EmptyList),
8420     ("beparams", _EmptyDict),
8421     ("hvparams", _EmptyDict),
8422     ("disk_template", None),
8423     ("remote_node", None),
8424     ("os_name", None),
8425     ("force_variant", False),
8426     ("osparams", None),
8427     ("force", False),
8428     ]
8429   REQ_BGL = False
8430
8431   def CheckArguments(self):
8432     if not (self.op.nics or self.op.disks or self.op.disk_template or
8433             self.op.hvparams or self.op.beparams or self.op.os_name):
8434       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8435
8436     if self.op.hvparams:
8437       _CheckGlobalHvParams(self.op.hvparams)
8438
8439     # Disk validation
8440     disk_addremove = 0
8441     for disk_op, disk_dict in self.op.disks:
8442       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8443       if disk_op == constants.DDM_REMOVE:
8444         disk_addremove += 1
8445         continue
8446       elif disk_op == constants.DDM_ADD:
8447         disk_addremove += 1
8448       else:
8449         if not isinstance(disk_op, int):
8450           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8451         if not isinstance(disk_dict, dict):
8452           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8453           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8454
8455       if disk_op == constants.DDM_ADD:
8456         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8457         if mode not in constants.DISK_ACCESS_SET:
8458           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8459                                      errors.ECODE_INVAL)
8460         size = disk_dict.get('size', None)
8461         if size is None:
8462           raise errors.OpPrereqError("Required disk parameter size missing",
8463                                      errors.ECODE_INVAL)
8464         try:
8465           size = int(size)
8466         except (TypeError, ValueError), err:
8467           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8468                                      str(err), errors.ECODE_INVAL)
8469         disk_dict['size'] = size
8470       else:
8471         # modification of disk
8472         if 'size' in disk_dict:
8473           raise errors.OpPrereqError("Disk size change not possible, use"
8474                                      " grow-disk", errors.ECODE_INVAL)
8475
8476     if disk_addremove > 1:
8477       raise errors.OpPrereqError("Only one disk add or remove operation"
8478                                  " supported at a time", errors.ECODE_INVAL)
8479
8480     if self.op.disks and self.op.disk_template is not None:
8481       raise errors.OpPrereqError("Disk template conversion and other disk"
8482                                  " changes not supported at the same time",
8483                                  errors.ECODE_INVAL)
8484
8485     if self.op.disk_template:
8486       _CheckDiskTemplate(self.op.disk_template)
8487       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8488           self.op.remote_node is None):
8489         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8490                                    " one requires specifying a secondary node",
8491                                    errors.ECODE_INVAL)
8492
8493     # NIC validation
8494     nic_addremove = 0
8495     for nic_op, nic_dict in self.op.nics:
8496       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8497       if nic_op == constants.DDM_REMOVE:
8498         nic_addremove += 1
8499         continue
8500       elif nic_op == constants.DDM_ADD:
8501         nic_addremove += 1
8502       else:
8503         if not isinstance(nic_op, int):
8504           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8505         if not isinstance(nic_dict, dict):
8506           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8507           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8508
8509       # nic_dict should be a dict
8510       nic_ip = nic_dict.get('ip', None)
8511       if nic_ip is not None:
8512         if nic_ip.lower() == constants.VALUE_NONE:
8513           nic_dict['ip'] = None
8514         else:
8515           if not utils.IsValidIP(nic_ip):
8516             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8517                                        errors.ECODE_INVAL)
8518
8519       nic_bridge = nic_dict.get('bridge', None)
8520       nic_link = nic_dict.get('link', None)
8521       if nic_bridge and nic_link:
8522         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8523                                    " at the same time", errors.ECODE_INVAL)
8524       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8525         nic_dict['bridge'] = None
8526       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8527         nic_dict['link'] = None
8528
8529       if nic_op == constants.DDM_ADD:
8530         nic_mac = nic_dict.get('mac', None)
8531         if nic_mac is None:
8532           nic_dict['mac'] = constants.VALUE_AUTO
8533
8534       if 'mac' in nic_dict:
8535         nic_mac = nic_dict['mac']
8536         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8537           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8538
8539         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8540           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8541                                      " modifying an existing nic",
8542                                      errors.ECODE_INVAL)
8543
8544     if nic_addremove > 1:
8545       raise errors.OpPrereqError("Only one NIC add or remove operation"
8546                                  " supported at a time", errors.ECODE_INVAL)
8547
8548   def ExpandNames(self):
8549     self._ExpandAndLockInstance()
8550     self.needed_locks[locking.LEVEL_NODE] = []
8551     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8552
8553   def DeclareLocks(self, level):
8554     if level == locking.LEVEL_NODE:
8555       self._LockInstancesNodes()
8556       if self.op.disk_template and self.op.remote_node:
8557         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8558         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8559
8560   def BuildHooksEnv(self):
8561     """Build hooks env.
8562
8563     This runs on the master, primary and secondaries.
8564
8565     """
8566     args = dict()
8567     if constants.BE_MEMORY in self.be_new:
8568       args['memory'] = self.be_new[constants.BE_MEMORY]
8569     if constants.BE_VCPUS in self.be_new:
8570       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8571     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8572     # information at all.
8573     if self.op.nics:
8574       args['nics'] = []
8575       nic_override = dict(self.op.nics)
8576       for idx, nic in enumerate(self.instance.nics):
8577         if idx in nic_override:
8578           this_nic_override = nic_override[idx]
8579         else:
8580           this_nic_override = {}
8581         if 'ip' in this_nic_override:
8582           ip = this_nic_override['ip']
8583         else:
8584           ip = nic.ip
8585         if 'mac' in this_nic_override:
8586           mac = this_nic_override['mac']
8587         else:
8588           mac = nic.mac
8589         if idx in self.nic_pnew:
8590           nicparams = self.nic_pnew[idx]
8591         else:
8592           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8593         mode = nicparams[constants.NIC_MODE]
8594         link = nicparams[constants.NIC_LINK]
8595         args['nics'].append((ip, mac, mode, link))
8596       if constants.DDM_ADD in nic_override:
8597         ip = nic_override[constants.DDM_ADD].get('ip', None)
8598         mac = nic_override[constants.DDM_ADD]['mac']
8599         nicparams = self.nic_pnew[constants.DDM_ADD]
8600         mode = nicparams[constants.NIC_MODE]
8601         link = nicparams[constants.NIC_LINK]
8602         args['nics'].append((ip, mac, mode, link))
8603       elif constants.DDM_REMOVE in nic_override:
8604         del args['nics'][-1]
8605
8606     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8607     if self.op.disk_template:
8608       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8609     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8610     return env, nl, nl
8611
8612   def CheckPrereq(self):
8613     """Check prerequisites.
8614
8615     This only checks the instance list against the existing names.
8616
8617     """
8618     # checking the new params on the primary/secondary nodes
8619
8620     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8621     cluster = self.cluster = self.cfg.GetClusterInfo()
8622     assert self.instance is not None, \
8623       "Cannot retrieve locked instance %s" % self.op.instance_name
8624     pnode = instance.primary_node
8625     nodelist = list(instance.all_nodes)
8626
8627     # OS change
8628     if self.op.os_name and not self.op.force:
8629       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8630                       self.op.force_variant)
8631       instance_os = self.op.os_name
8632     else:
8633       instance_os = instance.os
8634
8635     if self.op.disk_template:
8636       if instance.disk_template == self.op.disk_template:
8637         raise errors.OpPrereqError("Instance already has disk template %s" %
8638                                    instance.disk_template, errors.ECODE_INVAL)
8639
8640       if (instance.disk_template,
8641           self.op.disk_template) not in self._DISK_CONVERSIONS:
8642         raise errors.OpPrereqError("Unsupported disk template conversion from"
8643                                    " %s to %s" % (instance.disk_template,
8644                                                   self.op.disk_template),
8645                                    errors.ECODE_INVAL)
8646       if self.op.disk_template in constants.DTS_NET_MIRROR:
8647         _CheckNodeOnline(self, self.op.remote_node)
8648         _CheckNodeNotDrained(self, self.op.remote_node)
8649         disks = [{"size": d.size} for d in instance.disks]
8650         required = _ComputeDiskSize(self.op.disk_template, disks)
8651         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8652         _CheckInstanceDown(self, instance, "cannot change disk template")
8653
8654     # hvparams processing
8655     if self.op.hvparams:
8656       hv_type = instance.hypervisor
8657       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8658       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8659       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8660
8661       # local check
8662       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8663       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8664       self.hv_new = hv_new # the new actual values
8665       self.hv_inst = i_hvdict # the new dict (without defaults)
8666     else:
8667       self.hv_new = self.hv_inst = {}
8668
8669     # beparams processing
8670     if self.op.beparams:
8671       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8672                                    use_none=True)
8673       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8674       be_new = cluster.SimpleFillBE(i_bedict)
8675       self.be_new = be_new # the new actual values
8676       self.be_inst = i_bedict # the new dict (without defaults)
8677     else:
8678       self.be_new = self.be_inst = {}
8679
8680     # osparams processing
8681     if self.op.osparams:
8682       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8683       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8684       self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8685       self.os_inst = i_osdict # the new dict (without defaults)
8686     else:
8687       self.os_new = self.os_inst = {}
8688
8689     self.warn = []
8690
8691     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8692       mem_check_list = [pnode]
8693       if be_new[constants.BE_AUTO_BALANCE]:
8694         # either we changed auto_balance to yes or it was from before
8695         mem_check_list.extend(instance.secondary_nodes)
8696       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8697                                                   instance.hypervisor)
8698       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8699                                          instance.hypervisor)
8700       pninfo = nodeinfo[pnode]
8701       msg = pninfo.fail_msg
8702       if msg:
8703         # Assume the primary node is unreachable and go ahead
8704         self.warn.append("Can't get info from primary node %s: %s" %
8705                          (pnode,  msg))
8706       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8707         self.warn.append("Node data from primary node %s doesn't contain"
8708                          " free memory information" % pnode)
8709       elif instance_info.fail_msg:
8710         self.warn.append("Can't get instance runtime information: %s" %
8711                         instance_info.fail_msg)
8712       else:
8713         if instance_info.payload:
8714           current_mem = int(instance_info.payload['memory'])
8715         else:
8716           # Assume instance not running
8717           # (there is a slight race condition here, but it's not very probable,
8718           # and we have no other way to check)
8719           current_mem = 0
8720         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8721                     pninfo.payload['memory_free'])
8722         if miss_mem > 0:
8723           raise errors.OpPrereqError("This change will prevent the instance"
8724                                      " from starting, due to %d MB of memory"
8725                                      " missing on its primary node" % miss_mem,
8726                                      errors.ECODE_NORES)
8727
8728       if be_new[constants.BE_AUTO_BALANCE]:
8729         for node, nres in nodeinfo.items():
8730           if node not in instance.secondary_nodes:
8731             continue
8732           msg = nres.fail_msg
8733           if msg:
8734             self.warn.append("Can't get info from secondary node %s: %s" %
8735                              (node, msg))
8736           elif not isinstance(nres.payload.get('memory_free', None), int):
8737             self.warn.append("Secondary node %s didn't return free"
8738                              " memory information" % node)
8739           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8740             self.warn.append("Not enough memory to failover instance to"
8741                              " secondary node %s" % node)
8742
8743     # NIC processing
8744     self.nic_pnew = {}
8745     self.nic_pinst = {}
8746     for nic_op, nic_dict in self.op.nics:
8747       if nic_op == constants.DDM_REMOVE:
8748         if not instance.nics:
8749           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8750                                      errors.ECODE_INVAL)
8751         continue
8752       if nic_op != constants.DDM_ADD:
8753         # an existing nic
8754         if not instance.nics:
8755           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8756                                      " no NICs" % nic_op,
8757                                      errors.ECODE_INVAL)
8758         if nic_op < 0 or nic_op >= len(instance.nics):
8759           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8760                                      " are 0 to %d" %
8761                                      (nic_op, len(instance.nics) - 1),
8762                                      errors.ECODE_INVAL)
8763         old_nic_params = instance.nics[nic_op].nicparams
8764         old_nic_ip = instance.nics[nic_op].ip
8765       else:
8766         old_nic_params = {}
8767         old_nic_ip = None
8768
8769       update_params_dict = dict([(key, nic_dict[key])
8770                                  for key in constants.NICS_PARAMETERS
8771                                  if key in nic_dict])
8772
8773       if 'bridge' in nic_dict:
8774         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8775
8776       new_nic_params = _GetUpdatedParams(old_nic_params,
8777                                          update_params_dict)
8778       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8779       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8780       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8781       self.nic_pinst[nic_op] = new_nic_params
8782       self.nic_pnew[nic_op] = new_filled_nic_params
8783       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8784
8785       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8786         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8787         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8788         if msg:
8789           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8790           if self.op.force:
8791             self.warn.append(msg)
8792           else:
8793             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8794       if new_nic_mode == constants.NIC_MODE_ROUTED:
8795         if 'ip' in nic_dict:
8796           nic_ip = nic_dict['ip']
8797         else:
8798           nic_ip = old_nic_ip
8799         if nic_ip is None:
8800           raise errors.OpPrereqError('Cannot set the nic ip to None'
8801                                      ' on a routed nic', errors.ECODE_INVAL)
8802       if 'mac' in nic_dict:
8803         nic_mac = nic_dict['mac']
8804         if nic_mac is None:
8805           raise errors.OpPrereqError('Cannot set the nic mac to None',
8806                                      errors.ECODE_INVAL)
8807         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8808           # otherwise generate the mac
8809           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8810         else:
8811           # or validate/reserve the current one
8812           try:
8813             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8814           except errors.ReservationError:
8815             raise errors.OpPrereqError("MAC address %s already in use"
8816                                        " in cluster" % nic_mac,
8817                                        errors.ECODE_NOTUNIQUE)
8818
8819     # DISK processing
8820     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8821       raise errors.OpPrereqError("Disk operations not supported for"
8822                                  " diskless instances",
8823                                  errors.ECODE_INVAL)
8824     for disk_op, _ in self.op.disks:
8825       if disk_op == constants.DDM_REMOVE:
8826         if len(instance.disks) == 1:
8827           raise errors.OpPrereqError("Cannot remove the last disk of"
8828                                      " an instance", errors.ECODE_INVAL)
8829         _CheckInstanceDown(self, instance, "cannot remove disks")
8830
8831       if (disk_op == constants.DDM_ADD and
8832           len(instance.nics) >= constants.MAX_DISKS):
8833         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8834                                    " add more" % constants.MAX_DISKS,
8835                                    errors.ECODE_STATE)
8836       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8837         # an existing disk
8838         if disk_op < 0 or disk_op >= len(instance.disks):
8839           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8840                                      " are 0 to %d" %
8841                                      (disk_op, len(instance.disks)),
8842                                      errors.ECODE_INVAL)
8843
8844     return
8845
8846   def _ConvertPlainToDrbd(self, feedback_fn):
8847     """Converts an instance from plain to drbd.
8848
8849     """
8850     feedback_fn("Converting template to drbd")
8851     instance = self.instance
8852     pnode = instance.primary_node
8853     snode = self.op.remote_node
8854
8855     # create a fake disk info for _GenerateDiskTemplate
8856     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8857     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8858                                       instance.name, pnode, [snode],
8859                                       disk_info, None, None, 0)
8860     info = _GetInstanceInfoText(instance)
8861     feedback_fn("Creating aditional volumes...")
8862     # first, create the missing data and meta devices
8863     for disk in new_disks:
8864       # unfortunately this is... not too nice
8865       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8866                             info, True)
8867       for child in disk.children:
8868         _CreateSingleBlockDev(self, snode, instance, child, info, True)
8869     # at this stage, all new LVs have been created, we can rename the
8870     # old ones
8871     feedback_fn("Renaming original volumes...")
8872     rename_list = [(o, n.children[0].logical_id)
8873                    for (o, n) in zip(instance.disks, new_disks)]
8874     result = self.rpc.call_blockdev_rename(pnode, rename_list)
8875     result.Raise("Failed to rename original LVs")
8876
8877     feedback_fn("Initializing DRBD devices...")
8878     # all child devices are in place, we can now create the DRBD devices
8879     for disk in new_disks:
8880       for node in [pnode, snode]:
8881         f_create = node == pnode
8882         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8883
8884     # at this point, the instance has been modified
8885     instance.disk_template = constants.DT_DRBD8
8886     instance.disks = new_disks
8887     self.cfg.Update(instance, feedback_fn)
8888
8889     # disks are created, waiting for sync
8890     disk_abort = not _WaitForSync(self, instance)
8891     if disk_abort:
8892       raise errors.OpExecError("There are some degraded disks for"
8893                                " this instance, please cleanup manually")
8894
8895   def _ConvertDrbdToPlain(self, feedback_fn):
8896     """Converts an instance from drbd to plain.
8897
8898     """
8899     instance = self.instance
8900     assert len(instance.secondary_nodes) == 1
8901     pnode = instance.primary_node
8902     snode = instance.secondary_nodes[0]
8903     feedback_fn("Converting template to plain")
8904
8905     old_disks = instance.disks
8906     new_disks = [d.children[0] for d in old_disks]
8907
8908     # copy over size and mode
8909     for parent, child in zip(old_disks, new_disks):
8910       child.size = parent.size
8911       child.mode = parent.mode
8912
8913     # update instance structure
8914     instance.disks = new_disks
8915     instance.disk_template = constants.DT_PLAIN
8916     self.cfg.Update(instance, feedback_fn)
8917
8918     feedback_fn("Removing volumes on the secondary node...")
8919     for disk in old_disks:
8920       self.cfg.SetDiskID(disk, snode)
8921       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8922       if msg:
8923         self.LogWarning("Could not remove block device %s on node %s,"
8924                         " continuing anyway: %s", disk.iv_name, snode, msg)
8925
8926     feedback_fn("Removing unneeded volumes on the primary node...")
8927     for idx, disk in enumerate(old_disks):
8928       meta = disk.children[1]
8929       self.cfg.SetDiskID(meta, pnode)
8930       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8931       if msg:
8932         self.LogWarning("Could not remove metadata for disk %d on node %s,"
8933                         " continuing anyway: %s", idx, pnode, msg)
8934
8935
8936   def Exec(self, feedback_fn):
8937     """Modifies an instance.
8938
8939     All parameters take effect only at the next restart of the instance.
8940
8941     """
8942     # Process here the warnings from CheckPrereq, as we don't have a
8943     # feedback_fn there.
8944     for warn in self.warn:
8945       feedback_fn("WARNING: %s" % warn)
8946
8947     result = []
8948     instance = self.instance
8949     # disk changes
8950     for disk_op, disk_dict in self.op.disks:
8951       if disk_op == constants.DDM_REMOVE:
8952         # remove the last disk
8953         device = instance.disks.pop()
8954         device_idx = len(instance.disks)
8955         for node, disk in device.ComputeNodeTree(instance.primary_node):
8956           self.cfg.SetDiskID(disk, node)
8957           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8958           if msg:
8959             self.LogWarning("Could not remove disk/%d on node %s: %s,"
8960                             " continuing anyway", device_idx, node, msg)
8961         result.append(("disk/%d" % device_idx, "remove"))
8962       elif disk_op == constants.DDM_ADD:
8963         # add a new disk
8964         if instance.disk_template == constants.DT_FILE:
8965           file_driver, file_path = instance.disks[0].logical_id
8966           file_path = os.path.dirname(file_path)
8967         else:
8968           file_driver = file_path = None
8969         disk_idx_base = len(instance.disks)
8970         new_disk = _GenerateDiskTemplate(self,
8971                                          instance.disk_template,
8972                                          instance.name, instance.primary_node,
8973                                          instance.secondary_nodes,
8974                                          [disk_dict],
8975                                          file_path,
8976                                          file_driver,
8977                                          disk_idx_base)[0]
8978         instance.disks.append(new_disk)
8979         info = _GetInstanceInfoText(instance)
8980
8981         logging.info("Creating volume %s for instance %s",
8982                      new_disk.iv_name, instance.name)
8983         # Note: this needs to be kept in sync with _CreateDisks
8984         #HARDCODE
8985         for node in instance.all_nodes:
8986           f_create = node == instance.primary_node
8987           try:
8988             _CreateBlockDev(self, node, instance, new_disk,
8989                             f_create, info, f_create)
8990           except errors.OpExecError, err:
8991             self.LogWarning("Failed to create volume %s (%s) on"
8992                             " node %s: %s",
8993                             new_disk.iv_name, new_disk, node, err)
8994         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8995                        (new_disk.size, new_disk.mode)))
8996       else:
8997         # change a given disk
8998         instance.disks[disk_op].mode = disk_dict['mode']
8999         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9000
9001     if self.op.disk_template:
9002       r_shut = _ShutdownInstanceDisks(self, instance)
9003       if not r_shut:
9004         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9005                                  " proceed with disk template conversion")
9006       mode = (instance.disk_template, self.op.disk_template)
9007       try:
9008         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9009       except:
9010         self.cfg.ReleaseDRBDMinors(instance.name)
9011         raise
9012       result.append(("disk_template", self.op.disk_template))
9013
9014     # NIC changes
9015     for nic_op, nic_dict in self.op.nics:
9016       if nic_op == constants.DDM_REMOVE:
9017         # remove the last nic
9018         del instance.nics[-1]
9019         result.append(("nic.%d" % len(instance.nics), "remove"))
9020       elif nic_op == constants.DDM_ADD:
9021         # mac and bridge should be set, by now
9022         mac = nic_dict['mac']
9023         ip = nic_dict.get('ip', None)
9024         nicparams = self.nic_pinst[constants.DDM_ADD]
9025         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9026         instance.nics.append(new_nic)
9027         result.append(("nic.%d" % (len(instance.nics) - 1),
9028                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9029                        (new_nic.mac, new_nic.ip,
9030                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9031                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9032                        )))
9033       else:
9034         for key in 'mac', 'ip':
9035           if key in nic_dict:
9036             setattr(instance.nics[nic_op], key, nic_dict[key])
9037         if nic_op in self.nic_pinst:
9038           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9039         for key, val in nic_dict.iteritems():
9040           result.append(("nic.%s/%d" % (key, nic_op), val))
9041
9042     # hvparams changes
9043     if self.op.hvparams:
9044       instance.hvparams = self.hv_inst
9045       for key, val in self.op.hvparams.iteritems():
9046         result.append(("hv/%s" % key, val))
9047
9048     # beparams changes
9049     if self.op.beparams:
9050       instance.beparams = self.be_inst
9051       for key, val in self.op.beparams.iteritems():
9052         result.append(("be/%s" % key, val))
9053
9054     # OS change
9055     if self.op.os_name:
9056       instance.os = self.op.os_name
9057
9058     # osparams changes
9059     if self.op.osparams:
9060       instance.osparams = self.os_inst
9061       for key, val in self.op.osparams.iteritems():
9062         result.append(("os/%s" % key, val))
9063
9064     self.cfg.Update(instance, feedback_fn)
9065
9066     return result
9067
9068   _DISK_CONVERSIONS = {
9069     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9070     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9071     }
9072
9073
9074 class LUQueryExports(NoHooksLU):
9075   """Query the exports list
9076
9077   """
9078   _OP_REQP = [("nodes", _TListOf(_TNonEmptyString))]
9079   REQ_BGL = False
9080
9081   def ExpandNames(self):
9082     self.needed_locks = {}
9083     self.share_locks[locking.LEVEL_NODE] = 1
9084     if not self.op.nodes:
9085       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9086     else:
9087       self.needed_locks[locking.LEVEL_NODE] = \
9088         _GetWantedNodes(self, self.op.nodes)
9089
9090   def Exec(self, feedback_fn):
9091     """Compute the list of all the exported system images.
9092
9093     @rtype: dict
9094     @return: a dictionary with the structure node->(export-list)
9095         where export-list is a list of the instances exported on
9096         that node.
9097
9098     """
9099     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9100     rpcresult = self.rpc.call_export_list(self.nodes)
9101     result = {}
9102     for node in rpcresult:
9103       if rpcresult[node].fail_msg:
9104         result[node] = False
9105       else:
9106         result[node] = rpcresult[node].payload
9107
9108     return result
9109
9110
9111 class LUPrepareExport(NoHooksLU):
9112   """Prepares an instance for an export and returns useful information.
9113
9114   """
9115   _OP_REQP = [
9116     ("instance_name", _TNonEmptyString),
9117     ("mode", _TElemOf(constants.EXPORT_MODES)),
9118     ]
9119   REQ_BGL = False
9120
9121   def ExpandNames(self):
9122     self._ExpandAndLockInstance()
9123
9124   def CheckPrereq(self):
9125     """Check prerequisites.
9126
9127     """
9128     instance_name = self.op.instance_name
9129
9130     self.instance = self.cfg.GetInstanceInfo(instance_name)
9131     assert self.instance is not None, \
9132           "Cannot retrieve locked instance %s" % self.op.instance_name
9133     _CheckNodeOnline(self, self.instance.primary_node)
9134
9135     self._cds = _GetClusterDomainSecret()
9136
9137   def Exec(self, feedback_fn):
9138     """Prepares an instance for an export.
9139
9140     """
9141     instance = self.instance
9142
9143     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9144       salt = utils.GenerateSecret(8)
9145
9146       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9147       result = self.rpc.call_x509_cert_create(instance.primary_node,
9148                                               constants.RIE_CERT_VALIDITY)
9149       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9150
9151       (name, cert_pem) = result.payload
9152
9153       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9154                                              cert_pem)
9155
9156       return {
9157         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9158         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9159                           salt),
9160         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9161         }
9162
9163     return None
9164
9165
9166 class LUExportInstance(LogicalUnit):
9167   """Export an instance to an image in the cluster.
9168
9169   """
9170   HPATH = "instance-export"
9171   HTYPE = constants.HTYPE_INSTANCE
9172   _OP_REQP = [
9173     ("instance_name", _TNonEmptyString),
9174     ("target_node", _TNonEmptyString),
9175     ("shutdown", _TBool),
9176     ("mode", _TElemOf(constants.EXPORT_MODES)),
9177     ]
9178   _OP_DEFS = [
9179     ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT),
9180     ("remove_instance", False),
9181     ("ignore_remove_failures", False),
9182     ("mode", constants.EXPORT_MODE_LOCAL),
9183     ("x509_key_name", None),
9184     ("destination_x509_ca", None),
9185     ]
9186   REQ_BGL = False
9187
9188   def CheckArguments(self):
9189     """Check the arguments.
9190
9191     """
9192     self.x509_key_name = self.op.x509_key_name
9193     self.dest_x509_ca_pem = self.op.destination_x509_ca
9194
9195     if self.op.remove_instance and not self.op.shutdown:
9196       raise errors.OpPrereqError("Can not remove instance without shutting it"
9197                                  " down before")
9198
9199     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9200       if not self.x509_key_name:
9201         raise errors.OpPrereqError("Missing X509 key name for encryption",
9202                                    errors.ECODE_INVAL)
9203
9204       if not self.dest_x509_ca_pem:
9205         raise errors.OpPrereqError("Missing destination X509 CA",
9206                                    errors.ECODE_INVAL)
9207
9208   def ExpandNames(self):
9209     self._ExpandAndLockInstance()
9210
9211     # Lock all nodes for local exports
9212     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9213       # FIXME: lock only instance primary and destination node
9214       #
9215       # Sad but true, for now we have do lock all nodes, as we don't know where
9216       # the previous export might be, and in this LU we search for it and
9217       # remove it from its current node. In the future we could fix this by:
9218       #  - making a tasklet to search (share-lock all), then create the
9219       #    new one, then one to remove, after
9220       #  - removing the removal operation altogether
9221       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9222
9223   def DeclareLocks(self, level):
9224     """Last minute lock declaration."""
9225     # All nodes are locked anyway, so nothing to do here.
9226
9227   def BuildHooksEnv(self):
9228     """Build hooks env.
9229
9230     This will run on the master, primary node and target node.
9231
9232     """
9233     env = {
9234       "EXPORT_MODE": self.op.mode,
9235       "EXPORT_NODE": self.op.target_node,
9236       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9237       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9238       # TODO: Generic function for boolean env variables
9239       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9240       }
9241
9242     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9243
9244     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9245
9246     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9247       nl.append(self.op.target_node)
9248
9249     return env, nl, nl
9250
9251   def CheckPrereq(self):
9252     """Check prerequisites.
9253
9254     This checks that the instance and node names are valid.
9255
9256     """
9257     instance_name = self.op.instance_name
9258
9259     self.instance = self.cfg.GetInstanceInfo(instance_name)
9260     assert self.instance is not None, \
9261           "Cannot retrieve locked instance %s" % self.op.instance_name
9262     _CheckNodeOnline(self, self.instance.primary_node)
9263
9264     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9265       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9266       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9267       assert self.dst_node is not None
9268
9269       _CheckNodeOnline(self, self.dst_node.name)
9270       _CheckNodeNotDrained(self, self.dst_node.name)
9271
9272       self._cds = None
9273       self.dest_disk_info = None
9274       self.dest_x509_ca = None
9275
9276     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9277       self.dst_node = None
9278
9279       if len(self.op.target_node) != len(self.instance.disks):
9280         raise errors.OpPrereqError(("Received destination information for %s"
9281                                     " disks, but instance %s has %s disks") %
9282                                    (len(self.op.target_node), instance_name,
9283                                     len(self.instance.disks)),
9284                                    errors.ECODE_INVAL)
9285
9286       cds = _GetClusterDomainSecret()
9287
9288       # Check X509 key name
9289       try:
9290         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9291       except (TypeError, ValueError), err:
9292         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9293
9294       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9295         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9296                                    errors.ECODE_INVAL)
9297
9298       # Load and verify CA
9299       try:
9300         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9301       except OpenSSL.crypto.Error, err:
9302         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9303                                    (err, ), errors.ECODE_INVAL)
9304
9305       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9306       if errcode is not None:
9307         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9308                                    (msg, ), errors.ECODE_INVAL)
9309
9310       self.dest_x509_ca = cert
9311
9312       # Verify target information
9313       disk_info = []
9314       for idx, disk_data in enumerate(self.op.target_node):
9315         try:
9316           (host, port, magic) = \
9317             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9318         except errors.GenericError, err:
9319           raise errors.OpPrereqError("Target info for disk %s: %s" %
9320                                      (idx, err), errors.ECODE_INVAL)
9321
9322         disk_info.append((host, port, magic))
9323
9324       assert len(disk_info) == len(self.op.target_node)
9325       self.dest_disk_info = disk_info
9326
9327     else:
9328       raise errors.ProgrammerError("Unhandled export mode %r" %
9329                                    self.op.mode)
9330
9331     # instance disk type verification
9332     # TODO: Implement export support for file-based disks
9333     for disk in self.instance.disks:
9334       if disk.dev_type == constants.LD_FILE:
9335         raise errors.OpPrereqError("Export not supported for instances with"
9336                                    " file-based disks", errors.ECODE_INVAL)
9337
9338   def _CleanupExports(self, feedback_fn):
9339     """Removes exports of current instance from all other nodes.
9340
9341     If an instance in a cluster with nodes A..D was exported to node C, its
9342     exports will be removed from the nodes A, B and D.
9343
9344     """
9345     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9346
9347     nodelist = self.cfg.GetNodeList()
9348     nodelist.remove(self.dst_node.name)
9349
9350     # on one-node clusters nodelist will be empty after the removal
9351     # if we proceed the backup would be removed because OpQueryExports
9352     # substitutes an empty list with the full cluster node list.
9353     iname = self.instance.name
9354     if nodelist:
9355       feedback_fn("Removing old exports for instance %s" % iname)
9356       exportlist = self.rpc.call_export_list(nodelist)
9357       for node in exportlist:
9358         if exportlist[node].fail_msg:
9359           continue
9360         if iname in exportlist[node].payload:
9361           msg = self.rpc.call_export_remove(node, iname).fail_msg
9362           if msg:
9363             self.LogWarning("Could not remove older export for instance %s"
9364                             " on node %s: %s", iname, node, msg)
9365
9366   def Exec(self, feedback_fn):
9367     """Export an instance to an image in the cluster.
9368
9369     """
9370     assert self.op.mode in constants.EXPORT_MODES
9371
9372     instance = self.instance
9373     src_node = instance.primary_node
9374
9375     if self.op.shutdown:
9376       # shutdown the instance, but not the disks
9377       feedback_fn("Shutting down instance %s" % instance.name)
9378       result = self.rpc.call_instance_shutdown(src_node, instance,
9379                                                self.op.shutdown_timeout)
9380       # TODO: Maybe ignore failures if ignore_remove_failures is set
9381       result.Raise("Could not shutdown instance %s on"
9382                    " node %s" % (instance.name, src_node))
9383
9384     # set the disks ID correctly since call_instance_start needs the
9385     # correct drbd minor to create the symlinks
9386     for disk in instance.disks:
9387       self.cfg.SetDiskID(disk, src_node)
9388
9389     activate_disks = (not instance.admin_up)
9390
9391     if activate_disks:
9392       # Activate the instance disks if we'exporting a stopped instance
9393       feedback_fn("Activating disks for %s" % instance.name)
9394       _StartInstanceDisks(self, instance, None)
9395
9396     try:
9397       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9398                                                      instance)
9399
9400       helper.CreateSnapshots()
9401       try:
9402         if (self.op.shutdown and instance.admin_up and
9403             not self.op.remove_instance):
9404           assert not activate_disks
9405           feedback_fn("Starting instance %s" % instance.name)
9406           result = self.rpc.call_instance_start(src_node, instance, None, None)
9407           msg = result.fail_msg
9408           if msg:
9409             feedback_fn("Failed to start instance: %s" % msg)
9410             _ShutdownInstanceDisks(self, instance)
9411             raise errors.OpExecError("Could not start instance: %s" % msg)
9412
9413         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9414           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9415         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9416           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9417           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9418
9419           (key_name, _, _) = self.x509_key_name
9420
9421           dest_ca_pem = \
9422             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9423                                             self.dest_x509_ca)
9424
9425           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9426                                                      key_name, dest_ca_pem,
9427                                                      timeouts)
9428       finally:
9429         helper.Cleanup()
9430
9431       # Check for backwards compatibility
9432       assert len(dresults) == len(instance.disks)
9433       assert compat.all(isinstance(i, bool) for i in dresults), \
9434              "Not all results are boolean: %r" % dresults
9435
9436     finally:
9437       if activate_disks:
9438         feedback_fn("Deactivating disks for %s" % instance.name)
9439         _ShutdownInstanceDisks(self, instance)
9440
9441     # Remove instance if requested
9442     if self.op.remove_instance:
9443       if not (compat.all(dresults) and fin_resu):
9444         feedback_fn("Not removing instance %s as parts of the export failed" %
9445                     instance.name)
9446       else:
9447         feedback_fn("Removing instance %s" % instance.name)
9448         _RemoveInstance(self, feedback_fn, instance,
9449                         self.op.ignore_remove_failures)
9450
9451     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9452       self._CleanupExports(feedback_fn)
9453
9454     return fin_resu, dresults
9455
9456
9457 class LURemoveExport(NoHooksLU):
9458   """Remove exports related to the named instance.
9459
9460   """
9461   _OP_REQP = [("instance_name", _TNonEmptyString)]
9462   REQ_BGL = False
9463
9464   def ExpandNames(self):
9465     self.needed_locks = {}
9466     # We need all nodes to be locked in order for RemoveExport to work, but we
9467     # don't need to lock the instance itself, as nothing will happen to it (and
9468     # we can remove exports also for a removed instance)
9469     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9470
9471   def Exec(self, feedback_fn):
9472     """Remove any export.
9473
9474     """
9475     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9476     # If the instance was not found we'll try with the name that was passed in.
9477     # This will only work if it was an FQDN, though.
9478     fqdn_warn = False
9479     if not instance_name:
9480       fqdn_warn = True
9481       instance_name = self.op.instance_name
9482
9483     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9484     exportlist = self.rpc.call_export_list(locked_nodes)
9485     found = False
9486     for node in exportlist:
9487       msg = exportlist[node].fail_msg
9488       if msg:
9489         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9490         continue
9491       if instance_name in exportlist[node].payload:
9492         found = True
9493         result = self.rpc.call_export_remove(node, instance_name)
9494         msg = result.fail_msg
9495         if msg:
9496           logging.error("Could not remove export for instance %s"
9497                         " on node %s: %s", instance_name, node, msg)
9498
9499     if fqdn_warn and not found:
9500       feedback_fn("Export not found. If trying to remove an export belonging"
9501                   " to a deleted instance please use its Fully Qualified"
9502                   " Domain Name.")
9503
9504
9505 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9506   """Generic tags LU.
9507
9508   This is an abstract class which is the parent of all the other tags LUs.
9509
9510   """
9511
9512   def ExpandNames(self):
9513     self.needed_locks = {}
9514     if self.op.kind == constants.TAG_NODE:
9515       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9516       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9517     elif self.op.kind == constants.TAG_INSTANCE:
9518       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9519       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9520
9521   def CheckPrereq(self):
9522     """Check prerequisites.
9523
9524     """
9525     if self.op.kind == constants.TAG_CLUSTER:
9526       self.target = self.cfg.GetClusterInfo()
9527     elif self.op.kind == constants.TAG_NODE:
9528       self.target = self.cfg.GetNodeInfo(self.op.name)
9529     elif self.op.kind == constants.TAG_INSTANCE:
9530       self.target = self.cfg.GetInstanceInfo(self.op.name)
9531     else:
9532       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9533                                  str(self.op.kind), errors.ECODE_INVAL)
9534
9535
9536 class LUGetTags(TagsLU):
9537   """Returns the tags of a given object.
9538
9539   """
9540   _OP_REQP = [
9541     ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9542     ("name", _TNonEmptyString),
9543     ]
9544   REQ_BGL = False
9545
9546   def Exec(self, feedback_fn):
9547     """Returns the tag list.
9548
9549     """
9550     return list(self.target.GetTags())
9551
9552
9553 class LUSearchTags(NoHooksLU):
9554   """Searches the tags for a given pattern.
9555
9556   """
9557   _OP_REQP = [("pattern", _TNonEmptyString)]
9558   REQ_BGL = False
9559
9560   def ExpandNames(self):
9561     self.needed_locks = {}
9562
9563   def CheckPrereq(self):
9564     """Check prerequisites.
9565
9566     This checks the pattern passed for validity by compiling it.
9567
9568     """
9569     try:
9570       self.re = re.compile(self.op.pattern)
9571     except re.error, err:
9572       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9573                                  (self.op.pattern, err), errors.ECODE_INVAL)
9574
9575   def Exec(self, feedback_fn):
9576     """Returns the tag list.
9577
9578     """
9579     cfg = self.cfg
9580     tgts = [("/cluster", cfg.GetClusterInfo())]
9581     ilist = cfg.GetAllInstancesInfo().values()
9582     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9583     nlist = cfg.GetAllNodesInfo().values()
9584     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9585     results = []
9586     for path, target in tgts:
9587       for tag in target.GetTags():
9588         if self.re.search(tag):
9589           results.append((path, tag))
9590     return results
9591
9592
9593 class LUAddTags(TagsLU):
9594   """Sets a tag on a given object.
9595
9596   """
9597   _OP_REQP = [
9598     ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9599     ("name", _TNonEmptyString),
9600     ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9601     ]
9602   REQ_BGL = False
9603
9604   def CheckPrereq(self):
9605     """Check prerequisites.
9606
9607     This checks the type and length of the tag name and value.
9608
9609     """
9610     TagsLU.CheckPrereq(self)
9611     for tag in self.op.tags:
9612       objects.TaggableObject.ValidateTag(tag)
9613
9614   def Exec(self, feedback_fn):
9615     """Sets the tag.
9616
9617     """
9618     try:
9619       for tag in self.op.tags:
9620         self.target.AddTag(tag)
9621     except errors.TagError, err:
9622       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9623     self.cfg.Update(self.target, feedback_fn)
9624
9625
9626 class LUDelTags(TagsLU):
9627   """Delete a list of tags from a given object.
9628
9629   """
9630   _OP_REQP = [
9631     ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9632     ("name", _TNonEmptyString),
9633     ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9634     ]
9635   REQ_BGL = False
9636
9637   def CheckPrereq(self):
9638     """Check prerequisites.
9639
9640     This checks that we have the given tag.
9641
9642     """
9643     TagsLU.CheckPrereq(self)
9644     for tag in self.op.tags:
9645       objects.TaggableObject.ValidateTag(tag)
9646     del_tags = frozenset(self.op.tags)
9647     cur_tags = self.target.GetTags()
9648     if not del_tags <= cur_tags:
9649       diff_tags = del_tags - cur_tags
9650       diff_names = ["'%s'" % tag for tag in diff_tags]
9651       diff_names.sort()
9652       raise errors.OpPrereqError("Tag(s) %s not found" %
9653                                  (",".join(diff_names)), errors.ECODE_NOENT)
9654
9655   def Exec(self, feedback_fn):
9656     """Remove the tag from the object.
9657
9658     """
9659     for tag in self.op.tags:
9660       self.target.RemoveTag(tag)
9661     self.cfg.Update(self.target, feedback_fn)
9662
9663
9664 class LUTestDelay(NoHooksLU):
9665   """Sleep for a specified amount of time.
9666
9667   This LU sleeps on the master and/or nodes for a specified amount of
9668   time.
9669
9670   """
9671   _OP_REQP = [
9672     ("duration", _TFloat),
9673     ("on_master", _TBool),
9674     ("on_nodes", _TListOf(_TNonEmptyString)),
9675     ("repeat", _TPositiveInt)
9676     ]
9677   _OP_DEFS = [
9678     ("repeat", 0),
9679     ]
9680   REQ_BGL = False
9681
9682   def ExpandNames(self):
9683     """Expand names and set required locks.
9684
9685     This expands the node list, if any.
9686
9687     """
9688     self.needed_locks = {}
9689     if self.op.on_nodes:
9690       # _GetWantedNodes can be used here, but is not always appropriate to use
9691       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9692       # more information.
9693       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9694       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9695
9696   def _TestDelay(self):
9697     """Do the actual sleep.
9698
9699     """
9700     if self.op.on_master:
9701       if not utils.TestDelay(self.op.duration):
9702         raise errors.OpExecError("Error during master delay test")
9703     if self.op.on_nodes:
9704       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9705       for node, node_result in result.items():
9706         node_result.Raise("Failure during rpc call to node %s" % node)
9707
9708   def Exec(self, feedback_fn):
9709     """Execute the test delay opcode, with the wanted repetitions.
9710
9711     """
9712     if self.op.repeat == 0:
9713       self._TestDelay()
9714     else:
9715       top_value = self.op.repeat - 1
9716       for i in range(self.op.repeat):
9717         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9718         self._TestDelay()
9719
9720
9721 class IAllocator(object):
9722   """IAllocator framework.
9723
9724   An IAllocator instance has three sets of attributes:
9725     - cfg that is needed to query the cluster
9726     - input data (all members of the _KEYS class attribute are required)
9727     - four buffer attributes (in|out_data|text), that represent the
9728       input (to the external script) in text and data structure format,
9729       and the output from it, again in two formats
9730     - the result variables from the script (success, info, nodes) for
9731       easy usage
9732
9733   """
9734   # pylint: disable-msg=R0902
9735   # lots of instance attributes
9736   _ALLO_KEYS = [
9737     "name", "mem_size", "disks", "disk_template",
9738     "os", "tags", "nics", "vcpus", "hypervisor",
9739     ]
9740   _RELO_KEYS = [
9741     "name", "relocate_from",
9742     ]
9743   _EVAC_KEYS = [
9744     "evac_nodes",
9745     ]
9746
9747   def __init__(self, cfg, rpc, mode, **kwargs):
9748     self.cfg = cfg
9749     self.rpc = rpc
9750     # init buffer variables
9751     self.in_text = self.out_text = self.in_data = self.out_data = None
9752     # init all input fields so that pylint is happy
9753     self.mode = mode
9754     self.mem_size = self.disks = self.disk_template = None
9755     self.os = self.tags = self.nics = self.vcpus = None
9756     self.hypervisor = None
9757     self.relocate_from = None
9758     self.name = None
9759     self.evac_nodes = None
9760     # computed fields
9761     self.required_nodes = None
9762     # init result fields
9763     self.success = self.info = self.result = None
9764     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9765       keyset = self._ALLO_KEYS
9766       fn = self._AddNewInstance
9767     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9768       keyset = self._RELO_KEYS
9769       fn = self._AddRelocateInstance
9770     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9771       keyset = self._EVAC_KEYS
9772       fn = self._AddEvacuateNodes
9773     else:
9774       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9775                                    " IAllocator" % self.mode)
9776     for key in kwargs:
9777       if key not in keyset:
9778         raise errors.ProgrammerError("Invalid input parameter '%s' to"
9779                                      " IAllocator" % key)
9780       setattr(self, key, kwargs[key])
9781
9782     for key in keyset:
9783       if key not in kwargs:
9784         raise errors.ProgrammerError("Missing input parameter '%s' to"
9785                                      " IAllocator" % key)
9786     self._BuildInputData(fn)
9787
9788   def _ComputeClusterData(self):
9789     """Compute the generic allocator input data.
9790
9791     This is the data that is independent of the actual operation.
9792
9793     """
9794     cfg = self.cfg
9795     cluster_info = cfg.GetClusterInfo()
9796     # cluster data
9797     data = {
9798       "version": constants.IALLOCATOR_VERSION,
9799       "cluster_name": cfg.GetClusterName(),
9800       "cluster_tags": list(cluster_info.GetTags()),
9801       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9802       # we don't have job IDs
9803       }
9804     iinfo = cfg.GetAllInstancesInfo().values()
9805     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9806
9807     # node data
9808     node_results = {}
9809     node_list = cfg.GetNodeList()
9810
9811     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9812       hypervisor_name = self.hypervisor
9813     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9814       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9815     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9816       hypervisor_name = cluster_info.enabled_hypervisors[0]
9817
9818     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9819                                         hypervisor_name)
9820     node_iinfo = \
9821       self.rpc.call_all_instances_info(node_list,
9822                                        cluster_info.enabled_hypervisors)
9823     for nname, nresult in node_data.items():
9824       # first fill in static (config-based) values
9825       ninfo = cfg.GetNodeInfo(nname)
9826       pnr = {
9827         "tags": list(ninfo.GetTags()),
9828         "primary_ip": ninfo.primary_ip,
9829         "secondary_ip": ninfo.secondary_ip,
9830         "offline": ninfo.offline,
9831         "drained": ninfo.drained,
9832         "master_candidate": ninfo.master_candidate,
9833         }
9834
9835       if not (ninfo.offline or ninfo.drained):
9836         nresult.Raise("Can't get data for node %s" % nname)
9837         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9838                                 nname)
9839         remote_info = nresult.payload
9840
9841         for attr in ['memory_total', 'memory_free', 'memory_dom0',
9842                      'vg_size', 'vg_free', 'cpu_total']:
9843           if attr not in remote_info:
9844             raise errors.OpExecError("Node '%s' didn't return attribute"
9845                                      " '%s'" % (nname, attr))
9846           if not isinstance(remote_info[attr], int):
9847             raise errors.OpExecError("Node '%s' returned invalid value"
9848                                      " for '%s': %s" %
9849                                      (nname, attr, remote_info[attr]))
9850         # compute memory used by primary instances
9851         i_p_mem = i_p_up_mem = 0
9852         for iinfo, beinfo in i_list:
9853           if iinfo.primary_node == nname:
9854             i_p_mem += beinfo[constants.BE_MEMORY]
9855             if iinfo.name not in node_iinfo[nname].payload:
9856               i_used_mem = 0
9857             else:
9858               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9859             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9860             remote_info['memory_free'] -= max(0, i_mem_diff)
9861
9862             if iinfo.admin_up:
9863               i_p_up_mem += beinfo[constants.BE_MEMORY]
9864
9865         # compute memory used by instances
9866         pnr_dyn = {
9867           "total_memory": remote_info['memory_total'],
9868           "reserved_memory": remote_info['memory_dom0'],
9869           "free_memory": remote_info['memory_free'],
9870           "total_disk": remote_info['vg_size'],
9871           "free_disk": remote_info['vg_free'],
9872           "total_cpus": remote_info['cpu_total'],
9873           "i_pri_memory": i_p_mem,
9874           "i_pri_up_memory": i_p_up_mem,
9875           }
9876         pnr.update(pnr_dyn)
9877
9878       node_results[nname] = pnr
9879     data["nodes"] = node_results
9880
9881     # instance data
9882     instance_data = {}
9883     for iinfo, beinfo in i_list:
9884       nic_data = []
9885       for nic in iinfo.nics:
9886         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9887         nic_dict = {"mac": nic.mac,
9888                     "ip": nic.ip,
9889                     "mode": filled_params[constants.NIC_MODE],
9890                     "link": filled_params[constants.NIC_LINK],
9891                    }
9892         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9893           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9894         nic_data.append(nic_dict)
9895       pir = {
9896         "tags": list(iinfo.GetTags()),
9897         "admin_up": iinfo.admin_up,
9898         "vcpus": beinfo[constants.BE_VCPUS],
9899         "memory": beinfo[constants.BE_MEMORY],
9900         "os": iinfo.os,
9901         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9902         "nics": nic_data,
9903         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9904         "disk_template": iinfo.disk_template,
9905         "hypervisor": iinfo.hypervisor,
9906         }
9907       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9908                                                  pir["disks"])
9909       instance_data[iinfo.name] = pir
9910
9911     data["instances"] = instance_data
9912
9913     self.in_data = data
9914
9915   def _AddNewInstance(self):
9916     """Add new instance data to allocator structure.
9917
9918     This in combination with _AllocatorGetClusterData will create the
9919     correct structure needed as input for the allocator.
9920
9921     The checks for the completeness of the opcode must have already been
9922     done.
9923
9924     """
9925     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9926
9927     if self.disk_template in constants.DTS_NET_MIRROR:
9928       self.required_nodes = 2
9929     else:
9930       self.required_nodes = 1
9931     request = {
9932       "name": self.name,
9933       "disk_template": self.disk_template,
9934       "tags": self.tags,
9935       "os": self.os,
9936       "vcpus": self.vcpus,
9937       "memory": self.mem_size,
9938       "disks": self.disks,
9939       "disk_space_total": disk_space,
9940       "nics": self.nics,
9941       "required_nodes": self.required_nodes,
9942       }
9943     return request
9944
9945   def _AddRelocateInstance(self):
9946     """Add relocate instance data to allocator structure.
9947
9948     This in combination with _IAllocatorGetClusterData will create the
9949     correct structure needed as input for the allocator.
9950
9951     The checks for the completeness of the opcode must have already been
9952     done.
9953
9954     """
9955     instance = self.cfg.GetInstanceInfo(self.name)
9956     if instance is None:
9957       raise errors.ProgrammerError("Unknown instance '%s' passed to"
9958                                    " IAllocator" % self.name)
9959
9960     if instance.disk_template not in constants.DTS_NET_MIRROR:
9961       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9962                                  errors.ECODE_INVAL)
9963
9964     if len(instance.secondary_nodes) != 1:
9965       raise errors.OpPrereqError("Instance has not exactly one secondary node",
9966                                  errors.ECODE_STATE)
9967
9968     self.required_nodes = 1
9969     disk_sizes = [{'size': disk.size} for disk in instance.disks]
9970     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9971
9972     request = {
9973       "name": self.name,
9974       "disk_space_total": disk_space,
9975       "required_nodes": self.required_nodes,
9976       "relocate_from": self.relocate_from,
9977       }
9978     return request
9979
9980   def _AddEvacuateNodes(self):
9981     """Add evacuate nodes data to allocator structure.
9982
9983     """
9984     request = {
9985       "evac_nodes": self.evac_nodes
9986       }
9987     return request
9988
9989   def _BuildInputData(self, fn):
9990     """Build input data structures.
9991
9992     """
9993     self._ComputeClusterData()
9994
9995     request = fn()
9996     request["type"] = self.mode
9997     self.in_data["request"] = request
9998
9999     self.in_text = serializer.Dump(self.in_data)
10000
10001   def Run(self, name, validate=True, call_fn=None):
10002     """Run an instance allocator and return the results.
10003
10004     """
10005     if call_fn is None:
10006       call_fn = self.rpc.call_iallocator_runner
10007
10008     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10009     result.Raise("Failure while running the iallocator script")
10010
10011     self.out_text = result.payload
10012     if validate:
10013       self._ValidateResult()
10014
10015   def _ValidateResult(self):
10016     """Process the allocator results.
10017
10018     This will process and if successful save the result in
10019     self.out_data and the other parameters.
10020
10021     """
10022     try:
10023       rdict = serializer.Load(self.out_text)
10024     except Exception, err:
10025       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10026
10027     if not isinstance(rdict, dict):
10028       raise errors.OpExecError("Can't parse iallocator results: not a dict")
10029
10030     # TODO: remove backwards compatiblity in later versions
10031     if "nodes" in rdict and "result" not in rdict:
10032       rdict["result"] = rdict["nodes"]
10033       del rdict["nodes"]
10034
10035     for key in "success", "info", "result":
10036       if key not in rdict:
10037         raise errors.OpExecError("Can't parse iallocator results:"
10038                                  " missing key '%s'" % key)
10039       setattr(self, key, rdict[key])
10040
10041     if not isinstance(rdict["result"], list):
10042       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10043                                " is not a list")
10044     self.out_data = rdict
10045
10046
10047 class LUTestAllocator(NoHooksLU):
10048   """Run allocator tests.
10049
10050   This LU runs the allocator tests
10051
10052   """
10053   _OP_REQP = [
10054     ("direction", _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10055     ("mode", _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10056     ("name", _TNonEmptyString),
10057     ("nics", _TOr(_TNone, _TListOf(
10058       _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10059                _TOr(_TNone, _TNonEmptyString))))),
10060     ("disks", _TOr(_TNone, _TList)),
10061     ]
10062   _OP_DEFS = [
10063     ("hypervisor", None),
10064     ("allocator", None),
10065     ("nics", None),
10066     ("disks", None),
10067     ]
10068
10069   def CheckPrereq(self):
10070     """Check prerequisites.
10071
10072     This checks the opcode parameters depending on the director and mode test.
10073
10074     """
10075     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10076       for attr in ["mem_size", "disks", "disk_template",
10077                    "os", "tags", "nics", "vcpus"]:
10078         if not hasattr(self.op, attr):
10079           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10080                                      attr, errors.ECODE_INVAL)
10081       iname = self.cfg.ExpandInstanceName(self.op.name)
10082       if iname is not None:
10083         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10084                                    iname, errors.ECODE_EXISTS)
10085       if not isinstance(self.op.nics, list):
10086         raise errors.OpPrereqError("Invalid parameter 'nics'",
10087                                    errors.ECODE_INVAL)
10088       if not isinstance(self.op.disks, list):
10089         raise errors.OpPrereqError("Invalid parameter 'disks'",
10090                                    errors.ECODE_INVAL)
10091       for row in self.op.disks:
10092         if (not isinstance(row, dict) or
10093             "size" not in row or
10094             not isinstance(row["size"], int) or
10095             "mode" not in row or
10096             row["mode"] not in ['r', 'w']):
10097           raise errors.OpPrereqError("Invalid contents of the 'disks'"
10098                                      " parameter", errors.ECODE_INVAL)
10099       if self.op.hypervisor is None:
10100         self.op.hypervisor = self.cfg.GetHypervisorType()
10101     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10102       fname = _ExpandInstanceName(self.cfg, self.op.name)
10103       self.op.name = fname
10104       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10105     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10106       if not hasattr(self.op, "evac_nodes"):
10107         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10108                                    " opcode input", errors.ECODE_INVAL)
10109     else:
10110       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10111                                  self.op.mode, errors.ECODE_INVAL)
10112
10113     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10114       if self.op.allocator is None:
10115         raise errors.OpPrereqError("Missing allocator name",
10116                                    errors.ECODE_INVAL)
10117     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10118       raise errors.OpPrereqError("Wrong allocator test '%s'" %
10119                                  self.op.direction, errors.ECODE_INVAL)
10120
10121   def Exec(self, feedback_fn):
10122     """Run the allocator test.
10123
10124     """
10125     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10126       ial = IAllocator(self.cfg, self.rpc,
10127                        mode=self.op.mode,
10128                        name=self.op.name,
10129                        mem_size=self.op.mem_size,
10130                        disks=self.op.disks,
10131                        disk_template=self.op.disk_template,
10132                        os=self.op.os,
10133                        tags=self.op.tags,
10134                        nics=self.op.nics,
10135                        vcpus=self.op.vcpus,
10136                        hypervisor=self.op.hypervisor,
10137                        )
10138     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10139       ial = IAllocator(self.cfg, self.rpc,
10140                        mode=self.op.mode,
10141                        name=self.op.name,
10142                        relocate_from=list(self.relocate_from),
10143                        )
10144     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10145       ial = IAllocator(self.cfg, self.rpc,
10146                        mode=self.op.mode,
10147                        evac_nodes=self.op.evac_nodes)
10148     else:
10149       raise errors.ProgrammerError("Uncatched mode %s in"
10150                                    " LUTestAllocator.Exec", self.op.mode)
10151
10152     if self.op.direction == constants.IALLOCATOR_DIR_IN:
10153       result = ial.in_text
10154     else:
10155       ial.Run(self.op.allocator, validate=False)
10156       result = ial.out_text
10157     return result