code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39
  40 from ganeti import ssh
  41 from ganeti import utils
  42 from ganeti import errors
  43 from ganeti import hypervisor
  44 from ganeti import locking
  45 from ganeti import constants
  46 from ganeti import objects
  47 from ganeti import serializer
  48 from ganeti import ssconf
  49 from ganeti import uidpool
  50 from ganeti import compat
  51 from ganeti import masterd
  52
  53 import ganeti.masterd.instance # pylint: disable-msg=W0611
  54
  55
  56 # Modifiable default values; need to define these here before the
  57 # actual LUs
  58
  59 def _EmptyList():
  60   """Returns an empty list.
  61
  62   """
  63   return []
  64
  65
  66 def _EmptyDict():
  67   """Returns an empty dict.
  68
  69   """
  70   return {}
  71
  72
  73 #: The without-default default value
  74 _NoDefault = object()
  75
  76
  77 #: The no-type (value to complex to check it in the type system)
  78 _NoType = object()
  79
  80
  81 # Some basic types
  82 def _TNotNone(val):
  83   """Checks if the given value is not None.
  84
  85   """
  86   return val is not None
  87
  88
  89 def _TNone(val):
  90   """Checks if the given value is None.
  91
  92   """
  93   return val is None
  94
  95
  96 def _TBool(val):
  97   """Checks if the given value is a boolean.
  98
  99   """
 100   return isinstance(val, bool)
 101
 102
 103 def _TInt(val):
 104   """Checks if the given value is an integer.
 105
 106   """
 107   return isinstance(val, int)
 108
 109
 110 def _TFloat(val):
 111   """Checks if the given value is a float.
 112
 113   """
 114   return isinstance(val, float)
 115
 116
 117 def _TString(val):
 118   """Checks if the given value is a string.
 119
 120   """
 121   return isinstance(val, basestring)
 122
 123
 124 def _TTrue(val):
 125   """Checks if a given value evaluates to a boolean True value.
 126
 127   """
 128   return bool(val)
 129
 130
 131 def _TElemOf(target_list):
 132   """Builds a function that checks if a given value is a member of a list.
 133
 134   """
 135   return lambda val: val in target_list
 136
 137
 138 # Container types
 139 def _TList(val):
 140   """Checks if the given value is a list.
 141
 142   """
 143   return isinstance(val, list)
 144
 145
 146 def _TDict(val):
 147   """Checks if the given value is a dictionary.
 148
 149   """
 150   return isinstance(val, dict)
 151
 152
 153 # Combinator types
 154 def _TAnd(*args):
 155   """Combine multiple functions using an AND operation.
 156
 157   """
 158   def fn(val):
 159     return compat.all(t(val) for t in args)
 160   return fn
 161
 162
 163 def _TOr(*args):
 164   """Combine multiple functions using an AND operation.
 165
 166   """
 167   def fn(val):
 168     return compat.any(t(val) for t in args)
 169   return fn
 170
 171
 172 # Type aliases
 173
 174 #: a non-empty string
 175 _TNonEmptyString = _TAnd(_TString, _TTrue)
 176
 177
 178 #: a maybe non-empty string
 179 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
 180
 181
 182 #: a maybe boolean (bool or none)
 183 _TMaybeBool = _TOr(_TBool, _TNone)
 184
 185
 186 #: a positive integer
 187 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
 188
 189 #: a strictly positive integer
 190 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
 191
 192
 193 def _TListOf(my_type):
 194   """Checks if a given value is a list with all elements of the same type.
 195
 196   """
 197   return _TAnd(_TList,
 198                lambda lst: compat.all(my_type(v) for v in lst))
 199
 200
 201 def _TDictOf(key_type, val_type):
 202   """Checks a dict type for the type of its key/values.
 203
 204   """
 205   return _TAnd(_TDict,
 206                lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
 207                                 and compat.all(val_type(v)
 208                                                for v in my_dict.values())))
 209
 210
 211 # Common opcode attributes
 212
 213 #: output fields for a query operation
 214 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
 215
 216
 217 #: the shutdown timeout
 218 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
 219                      _TPositiveInt)
 220
 221 #: the force parameter
 222 _PForce = ("force", False, _TBool)
 223
 224 #: a required instance name (for single-instance LUs)
 225 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
 226
 227
 228 #: a required node name (for single-node LUs)
 229 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
 230
 231
 232 # End types
 233 class LogicalUnit(object):
 234   """Logical Unit base class.
 235
 236   Subclasses must follow these rules:
 237     - implement ExpandNames
 238     - implement CheckPrereq (except when tasklets are used)
 239     - implement Exec (except when tasklets are used)
 240     - implement BuildHooksEnv
 241     - redefine HPATH and HTYPE
 242     - optionally redefine their run requirements:
 243         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 244
 245   Note that all commands require root permissions.
 246
 247   @ivar dry_run_result: the value (if any) that will be returned to the caller
 248       in dry-run mode (signalled by opcode dry_run parameter)
 249   @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
 250       they should get if not already defined, and types they must match
 251
 252   """
 253   HPATH = None
 254   HTYPE = None
 255   _OP_PARAMS = []
 256   REQ_BGL = True
 257
 258   def __init__(self, processor, op, context, rpc):
 259     """Constructor for LogicalUnit.
 260
 261     This needs to be overridden in derived classes in order to check op
 262     validity.
 263
 264     """
 265     self.proc = processor
 266     self.op = op
 267     self.cfg = context.cfg
 268     self.context = context
 269     self.rpc = rpc
 270     # Dicts used to declare locking needs to mcpu
 271     self.needed_locks = None
 272     self.acquired_locks = {}
 273     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 274     self.add_locks = {}
 275     self.remove_locks = {}
 276     # Used to force good behavior when calling helper functions
 277     self.recalculate_locks = {}
 278     self.__ssh = None
 279     # logging
 280     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 281     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 282     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 283     # support for dry-run
 284     self.dry_run_result = None
 285     # support for generic debug attribute
 286     if (not hasattr(self.op, "debug_level") or
 287         not isinstance(self.op.debug_level, int)):
 288       self.op.debug_level = 0
 289
 290     # Tasklets
 291     self.tasklets = None
 292
 293     # The new kind-of-type-system
 294     op_id = self.op.OP_ID
 295     for attr_name, aval, test in self._OP_PARAMS:
 296       if not hasattr(op, attr_name):
 297         if aval == _NoDefault:
 298           raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
 299                                      (op_id, attr_name), errors.ECODE_INVAL)
 300         else:
 301           if callable(aval):
 302             dval = aval()
 303           else:
 304             dval = aval
 305           setattr(self.op, attr_name, dval)
 306       attr_val = getattr(op, attr_name)
 307       if test == _NoType:
 308         # no tests here
 309         continue
 310       if not callable(test):
 311         raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
 312                                      " given type is not a proper type (%s)" %
 313                                      (op_id, attr_name, test))
 314       if not test(attr_val):
 315         logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
 316                       self.op.OP_ID, attr_name, type(attr_val), attr_val)
 317         raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
 318                                    (op_id, attr_name), errors.ECODE_INVAL)
 319
 320     self.CheckArguments()
 321
 322   def __GetSSH(self):
 323     """Returns the SshRunner object
 324
 325     """
 326     if not self.__ssh:
 327       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 328     return self.__ssh
 329
 330   ssh = property(fget=__GetSSH)
 331
 332   def CheckArguments(self):
 333     """Check syntactic validity for the opcode arguments.
 334
 335     This method is for doing a simple syntactic check and ensure
 336     validity of opcode parameters, without any cluster-related
 337     checks. While the same can be accomplished in ExpandNames and/or
 338     CheckPrereq, doing these separate is better because:
 339
 340       - ExpandNames is left as as purely a lock-related function
 341       - CheckPrereq is run after we have acquired locks (and possible
 342         waited for them)
 343
 344     The function is allowed to change the self.op attribute so that
 345     later methods can no longer worry about missing parameters.
 346
 347     """
 348     pass
 349
 350   def ExpandNames(self):
 351     """Expand names for this LU.
 352
 353     This method is called before starting to execute the opcode, and it should
 354     update all the parameters of the opcode to their canonical form (e.g. a
 355     short node name must be fully expanded after this method has successfully
 356     completed). This way locking, hooks, logging, ecc. can work correctly.
 357
 358     LUs which implement this method must also populate the self.needed_locks
 359     member, as a dict with lock levels as keys, and a list of needed lock names
 360     as values. Rules:
 361
 362       - use an empty dict if you don't need any lock
 363       - if you don't need any lock at a particular level omit that level
 364       - don't put anything for the BGL level
 365       - if you want all locks at a level use locking.ALL_SET as a value
 366
 367     If you need to share locks (rather than acquire them exclusively) at one
 368     level you can modify self.share_locks, setting a true value (usually 1) for
 369     that level. By default locks are not shared.
 370
 371     This function can also define a list of tasklets, which then will be
 372     executed in order instead of the usual LU-level CheckPrereq and Exec
 373     functions, if those are not defined by the LU.
 374
 375     Examples::
 376
 377       # Acquire all nodes and one instance
 378       self.needed_locks = {
 379         locking.LEVEL_NODE: locking.ALL_SET,
 380         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 381       }
 382       # Acquire just two nodes
 383       self.needed_locks = {
 384         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 385       }
 386       # Acquire no locks
 387       self.needed_locks = {} # No, you can't leave it to the default value None
 388
 389     """
 390     # The implementation of this method is mandatory only if the new LU is
 391     # concurrent, so that old LUs don't need to be changed all at the same
 392     # time.
 393     if self.REQ_BGL:
 394       self.needed_locks = {} # Exclusive LUs don't need locks.
 395     else:
 396       raise NotImplementedError
 397
 398   def DeclareLocks(self, level):
 399     """Declare LU locking needs for a level
 400
 401     While most LUs can just declare their locking needs at ExpandNames time,
 402     sometimes there's the need to calculate some locks after having acquired
 403     the ones before. This function is called just before acquiring locks at a
 404     particular level, but after acquiring the ones at lower levels, and permits
 405     such calculations. It can be used to modify self.needed_locks, and by
 406     default it does nothing.
 407
 408     This function is only called if you have something already set in
 409     self.needed_locks for the level.
 410
 411     @param level: Locking level which is going to be locked
 412     @type level: member of ganeti.locking.LEVELS
 413
 414     """
 415
 416   def CheckPrereq(self):
 417     """Check prerequisites for this LU.
 418
 419     This method should check that the prerequisites for the execution
 420     of this LU are fulfilled. It can do internode communication, but
 421     it should be idempotent - no cluster or system changes are
 422     allowed.
 423
 424     The method should raise errors.OpPrereqError in case something is
 425     not fulfilled. Its return value is ignored.
 426
 427     This method should also update all the parameters of the opcode to
 428     their canonical form if it hasn't been done by ExpandNames before.
 429
 430     """
 431     if self.tasklets is not None:
 432       for (idx, tl) in enumerate(self.tasklets):
 433         logging.debug("Checking prerequisites for tasklet %s/%s",
 434                       idx + 1, len(self.tasklets))
 435         tl.CheckPrereq()
 436     else:
 437       pass
 438
 439   def Exec(self, feedback_fn):
 440     """Execute the LU.
 441
 442     This method should implement the actual work. It should raise
 443     errors.OpExecError for failures that are somewhat dealt with in
 444     code, or expected.
 445
 446     """
 447     if self.tasklets is not None:
 448       for (idx, tl) in enumerate(self.tasklets):
 449         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 450         tl.Exec(feedback_fn)
 451     else:
 452       raise NotImplementedError
 453
 454   def BuildHooksEnv(self):
 455     """Build hooks environment for this LU.
 456
 457     This method should return a three-node tuple consisting of: a dict
 458     containing the environment that will be used for running the
 459     specific hook for this LU, a list of node names on which the hook
 460     should run before the execution, and a list of node names on which
 461     the hook should run after the execution.
 462
 463     The keys of the dict must not have 'GANETI_' prefixed as this will
 464     be handled in the hooks runner. Also note additional keys will be
 465     added by the hooks runner. If the LU doesn't define any
 466     environment, an empty dict (and not None) should be returned.
 467
 468     No nodes should be returned as an empty list (and not None).
 469
 470     Note that if the HPATH for a LU class is None, this function will
 471     not be called.
 472
 473     """
 474     raise NotImplementedError
 475
 476   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 477     """Notify the LU about the results of its hooks.
 478
 479     This method is called every time a hooks phase is executed, and notifies
 480     the Logical Unit about the hooks' result. The LU can then use it to alter
 481     its result based on the hooks.  By default the method does nothing and the
 482     previous result is passed back unchanged but any LU can define it if it
 483     wants to use the local cluster hook-scripts somehow.
 484
 485     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 486         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 487     @param hook_results: the results of the multi-node hooks rpc call
 488     @param feedback_fn: function used send feedback back to the caller
 489     @param lu_result: the previous Exec result this LU had, or None
 490         in the PRE phase
 491     @return: the new Exec result, based on the previous result
 492         and hook results
 493
 494     """
 495     # API must be kept, thus we ignore the unused argument and could
 496     # be a function warnings
 497     # pylint: disable-msg=W0613,R0201
 498     return lu_result
 499
 500   def _ExpandAndLockInstance(self):
 501     """Helper function to expand and lock an instance.
 502
 503     Many LUs that work on an instance take its name in self.op.instance_name
 504     and need to expand it and then declare the expanded name for locking. This
 505     function does it, and then updates self.op.instance_name to the expanded
 506     name. It also initializes needed_locks as a dict, if this hasn't been done
 507     before.
 508
 509     """
 510     if self.needed_locks is None:
 511       self.needed_locks = {}
 512     else:
 513       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 514         "_ExpandAndLockInstance called with instance-level locks set"
 515     self.op.instance_name = _ExpandInstanceName(self.cfg,
 516                                                 self.op.instance_name)
 517     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 518
 519   def _LockInstancesNodes(self, primary_only=False):
 520     """Helper function to declare instances' nodes for locking.
 521
 522     This function should be called after locking one or more instances to lock
 523     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 524     with all primary or secondary nodes for instances already locked and
 525     present in self.needed_locks[locking.LEVEL_INSTANCE].
 526
 527     It should be called from DeclareLocks, and for safety only works if
 528     self.recalculate_locks[locking.LEVEL_NODE] is set.
 529
 530     In the future it may grow parameters to just lock some instance's nodes, or
 531     to just lock primaries or secondary nodes, if needed.
 532
 533     If should be called in DeclareLocks in a way similar to::
 534
 535       if level == locking.LEVEL_NODE:
 536         self._LockInstancesNodes()
 537
 538     @type primary_only: boolean
 539     @param primary_only: only lock primary nodes of locked instances
 540
 541     """
 542     assert locking.LEVEL_NODE in self.recalculate_locks, \
 543       "_LockInstancesNodes helper function called with no nodes to recalculate"
 544
 545     # TODO: check if we're really been called with the instance locks held
 546
 547     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 548     # future we might want to have different behaviors depending on the value
 549     # of self.recalculate_locks[locking.LEVEL_NODE]
 550     wanted_nodes = []
 551     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 552       instance = self.context.cfg.GetInstanceInfo(instance_name)
 553       wanted_nodes.append(instance.primary_node)
 554       if not primary_only:
 555         wanted_nodes.extend(instance.secondary_nodes)
 556
 557     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 558       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 559     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 560       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 561
 562     del self.recalculate_locks[locking.LEVEL_NODE]
 563
 564
 565 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 566   """Simple LU which runs no hooks.
 567
 568   This LU is intended as a parent for other LogicalUnits which will
 569   run no hooks, in order to reduce duplicate code.
 570
 571   """
 572   HPATH = None
 573   HTYPE = None
 574
 575   def BuildHooksEnv(self):
 576     """Empty BuildHooksEnv for NoHooksLu.
 577
 578     This just raises an error.
 579
 580     """
 581     assert False, "BuildHooksEnv called for NoHooksLUs"
 582
 583
 584 class Tasklet:
 585   """Tasklet base class.
 586
 587   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 588   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 589   tasklets know nothing about locks.
 590
 591   Subclasses must follow these rules:
 592     - Implement CheckPrereq
 593     - Implement Exec
 594
 595   """
 596   def __init__(self, lu):
 597     self.lu = lu
 598
 599     # Shortcuts
 600     self.cfg = lu.cfg
 601     self.rpc = lu.rpc
 602
 603   def CheckPrereq(self):
 604     """Check prerequisites for this tasklets.
 605
 606     This method should check whether the prerequisites for the execution of
 607     this tasklet are fulfilled. It can do internode communication, but it
 608     should be idempotent - no cluster or system changes are allowed.
 609
 610     The method should raise errors.OpPrereqError in case something is not
 611     fulfilled. Its return value is ignored.
 612
 613     This method should also update all parameters to their canonical form if it
 614     hasn't been done before.
 615
 616     """
 617     pass
 618
 619   def Exec(self, feedback_fn):
 620     """Execute the tasklet.
 621
 622     This method should implement the actual work. It should raise
 623     errors.OpExecError for failures that are somewhat dealt with in code, or
 624     expected.
 625
 626     """
 627     raise NotImplementedError
 628
 629
 630 def _GetWantedNodes(lu, nodes):
 631   """Returns list of checked and expanded node names.
 632
 633   @type lu: L{LogicalUnit}
 634   @param lu: the logical unit on whose behalf we execute
 635   @type nodes: list
 636   @param nodes: list of node names or None for all nodes
 637   @rtype: list
 638   @return: the list of nodes, sorted
 639   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 640
 641   """
 642   if not nodes:
 643     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 644       " non-empty list of nodes whose name is to be expanded.")
 645
 646   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 647   return utils.NiceSort(wanted)
 648
 649
 650 def _GetWantedInstances(lu, instances):
 651   """Returns list of checked and expanded instance names.
 652
 653   @type lu: L{LogicalUnit}
 654   @param lu: the logical unit on whose behalf we execute
 655   @type instances: list
 656   @param instances: list of instance names or None for all instances
 657   @rtype: list
 658   @return: the list of instances, sorted
 659   @raise errors.OpPrereqError: if the instances parameter is wrong type
 660   @raise errors.OpPrereqError: if any of the passed instances is not found
 661
 662   """
 663   if instances:
 664     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 665   else:
 666     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 667   return wanted
 668
 669
 670 def _GetUpdatedParams(old_params, update_dict,
 671                       use_default=True, use_none=False):
 672   """Return the new version of a parameter dictionary.
 673
 674   @type old_params: dict
 675   @param old_params: old parameters
 676   @type update_dict: dict
 677   @param update_dict: dict containing new parameter values, or
 678       constants.VALUE_DEFAULT to reset the parameter to its default
 679       value
 680   @param use_default: boolean
 681   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 682       values as 'to be deleted' values
 683   @param use_none: boolean
 684   @type use_none: whether to recognise C{None} values as 'to be
 685       deleted' values
 686   @rtype: dict
 687   @return: the new parameter dictionary
 688
 689   """
 690   params_copy = copy.deepcopy(old_params)
 691   for key, val in update_dict.iteritems():
 692     if ((use_default and val == constants.VALUE_DEFAULT) or
 693         (use_none and val is None)):
 694       try:
 695         del params_copy[key]
 696       except KeyError:
 697         pass
 698     else:
 699       params_copy[key] = val
 700   return params_copy
 701
 702
 703 def _CheckOutputFields(static, dynamic, selected):
 704   """Checks whether all selected fields are valid.
 705
 706   @type static: L{utils.FieldSet}
 707   @param static: static fields set
 708   @type dynamic: L{utils.FieldSet}
 709   @param dynamic: dynamic fields set
 710
 711   """
 712   f = utils.FieldSet()
 713   f.Extend(static)
 714   f.Extend(dynamic)
 715
 716   delta = f.NonMatching(selected)
 717   if delta:
 718     raise errors.OpPrereqError("Unknown output fields selected: %s"
 719                                % ",".join(delta), errors.ECODE_INVAL)
 720
 721
 722 def _CheckBooleanOpField(op, name):
 723   """Validates boolean opcode parameters.
 724
 725   This will ensure that an opcode parameter is either a boolean value,
 726   or None (but that it always exists).
 727
 728   """
 729   val = getattr(op, name, None)
 730   if not (val is None or isinstance(val, bool)):
 731     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 732                                (name, str(val)), errors.ECODE_INVAL)
 733   setattr(op, name, val)
 734
 735
 736 def _CheckGlobalHvParams(params):
 737   """Validates that given hypervisor params are not global ones.
 738
 739   This will ensure that instances don't get customised versions of
 740   global params.
 741
 742   """
 743   used_globals = constants.HVC_GLOBALS.intersection(params)
 744   if used_globals:
 745     msg = ("The following hypervisor parameters are global and cannot"
 746            " be customized at instance level, please modify them at"
 747            " cluster level: %s" % utils.CommaJoin(used_globals))
 748     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 749
 750
 751 def _CheckNodeOnline(lu, node):
 752   """Ensure that a given node is online.
 753
 754   @param lu: the LU on behalf of which we make the check
 755   @param node: the node to check
 756   @raise errors.OpPrereqError: if the node is offline
 757
 758   """
 759   if lu.cfg.GetNodeInfo(node).offline:
 760     raise errors.OpPrereqError("Can't use offline node %s" % node,
 761                                errors.ECODE_INVAL)
 762
 763
 764 def _CheckNodeNotDrained(lu, node):
 765   """Ensure that a given node is not drained.
 766
 767   @param lu: the LU on behalf of which we make the check
 768   @param node: the node to check
 769   @raise errors.OpPrereqError: if the node is drained
 770
 771   """
 772   if lu.cfg.GetNodeInfo(node).drained:
 773     raise errors.OpPrereqError("Can't use drained node %s" % node,
 774                                errors.ECODE_INVAL)
 775
 776
 777 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 778   """Ensure that a node supports a given OS.
 779
 780   @param lu: the LU on behalf of which we make the check
 781   @param node: the node to check
 782   @param os_name: the OS to query about
 783   @param force_variant: whether to ignore variant errors
 784   @raise errors.OpPrereqError: if the node is not supporting the OS
 785
 786   """
 787   result = lu.rpc.call_os_get(node, os_name)
 788   result.Raise("OS '%s' not in supported OS list for node %s" %
 789                (os_name, node),
 790                prereq=True, ecode=errors.ECODE_INVAL)
 791   if not force_variant:
 792     _CheckOSVariant(result.payload, os_name)
 793
 794
 795 def _RequireFileStorage():
 796   """Checks that file storage is enabled.
 797
 798   @raise errors.OpPrereqError: when file storage is disabled
 799
 800   """
 801   if not constants.ENABLE_FILE_STORAGE:
 802     raise errors.OpPrereqError("File storage disabled at configure time",
 803                                errors.ECODE_INVAL)
 804
 805
 806 def _CheckDiskTemplate(template):
 807   """Ensure a given disk template is valid.
 808
 809   """
 810   if template not in constants.DISK_TEMPLATES:
 811     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 812            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 813     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 814   if template == constants.DT_FILE:
 815     _RequireFileStorage()
 816   return True
 817
 818
 819 def _CheckStorageType(storage_type):
 820   """Ensure a given storage type is valid.
 821
 822   """
 823   if storage_type not in constants.VALID_STORAGE_TYPES:
 824     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 825                                errors.ECODE_INVAL)
 826   if storage_type == constants.ST_FILE:
 827     _RequireFileStorage()
 828   return True
 829
 830
 831 def _GetClusterDomainSecret():
 832   """Reads the cluster domain secret.
 833
 834   """
 835   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 836                                strict=True)
 837
 838
 839 def _CheckInstanceDown(lu, instance, reason):
 840   """Ensure that an instance is not running."""
 841   if instance.admin_up:
 842     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 843                                (instance.name, reason), errors.ECODE_STATE)
 844
 845   pnode = instance.primary_node
 846   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 847   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 848               prereq=True, ecode=errors.ECODE_ENVIRON)
 849
 850   if instance.name in ins_l.payload:
 851     raise errors.OpPrereqError("Instance %s is running, %s" %
 852                                (instance.name, reason), errors.ECODE_STATE)
 853
 854
 855 def _ExpandItemName(fn, name, kind):
 856   """Expand an item name.
 857
 858   @param fn: the function to use for expansion
 859   @param name: requested item name
 860   @param kind: text description ('Node' or 'Instance')
 861   @return: the resolved (full) name
 862   @raise errors.OpPrereqError: if the item is not found
 863
 864   """
 865   full_name = fn(name)
 866   if full_name is None:
 867     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 868                                errors.ECODE_NOENT)
 869   return full_name
 870
 871
 872 def _ExpandNodeName(cfg, name):
 873   """Wrapper over L{_ExpandItemName} for nodes."""
 874   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 875
 876
 877 def _ExpandInstanceName(cfg, name):
 878   """Wrapper over L{_ExpandItemName} for instance."""
 879   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 880
 881
 882 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 883                           memory, vcpus, nics, disk_template, disks,
 884                           bep, hvp, hypervisor_name):
 885   """Builds instance related env variables for hooks
 886
 887   This builds the hook environment from individual variables.
 888
 889   @type name: string
 890   @param name: the name of the instance
 891   @type primary_node: string
 892   @param primary_node: the name of the instance's primary node
 893   @type secondary_nodes: list
 894   @param secondary_nodes: list of secondary nodes as strings
 895   @type os_type: string
 896   @param os_type: the name of the instance's OS
 897   @type status: boolean
 898   @param status: the should_run status of the instance
 899   @type memory: string
 900   @param memory: the memory size of the instance
 901   @type vcpus: string
 902   @param vcpus: the count of VCPUs the instance has
 903   @type nics: list
 904   @param nics: list of tuples (ip, mac, mode, link) representing
 905       the NICs the instance has
 906   @type disk_template: string
 907   @param disk_template: the disk template of the instance
 908   @type disks: list
 909   @param disks: the list of (size, mode) pairs
 910   @type bep: dict
 911   @param bep: the backend parameters for the instance
 912   @type hvp: dict
 913   @param hvp: the hypervisor parameters for the instance
 914   @type hypervisor_name: string
 915   @param hypervisor_name: the hypervisor for the instance
 916   @rtype: dict
 917   @return: the hook environment for this instance
 918
 919   """
 920   if status:
 921     str_status = "up"
 922   else:
 923     str_status = "down"
 924   env = {
 925     "OP_TARGET": name,
 926     "INSTANCE_NAME": name,
 927     "INSTANCE_PRIMARY": primary_node,
 928     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 929     "INSTANCE_OS_TYPE": os_type,
 930     "INSTANCE_STATUS": str_status,
 931     "INSTANCE_MEMORY": memory,
 932     "INSTANCE_VCPUS": vcpus,
 933     "INSTANCE_DISK_TEMPLATE": disk_template,
 934     "INSTANCE_HYPERVISOR": hypervisor_name,
 935   }
 936
 937   if nics:
 938     nic_count = len(nics)
 939     for idx, (ip, mac, mode, link) in enumerate(nics):
 940       if ip is None:
 941         ip = ""
 942       env["INSTANCE_NIC%d_IP" % idx] = ip
 943       env["INSTANCE_NIC%d_MAC" % idx] = mac
 944       env["INSTANCE_NIC%d_MODE" % idx] = mode
 945       env["INSTANCE_NIC%d_LINK" % idx] = link
 946       if mode == constants.NIC_MODE_BRIDGED:
 947         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 948   else:
 949     nic_count = 0
 950
 951   env["INSTANCE_NIC_COUNT"] = nic_count
 952
 953   if disks:
 954     disk_count = len(disks)
 955     for idx, (size, mode) in enumerate(disks):
 956       env["INSTANCE_DISK%d_SIZE" % idx] = size
 957       env["INSTANCE_DISK%d_MODE" % idx] = mode
 958   else:
 959     disk_count = 0
 960
 961   env["INSTANCE_DISK_COUNT"] = disk_count
 962
 963   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 964     for key, value in source.items():
 965       env["INSTANCE_%s_%s" % (kind, key)] = value
 966
 967   return env
 968
 969
 970 def _NICListToTuple(lu, nics):
 971   """Build a list of nic information tuples.
 972
 973   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 974   value in LUQueryInstanceData.
 975
 976   @type lu:  L{LogicalUnit}
 977   @param lu: the logical unit on whose behalf we execute
 978   @type nics: list of L{objects.NIC}
 979   @param nics: list of nics to convert to hooks tuples
 980
 981   """
 982   hooks_nics = []
 983   cluster = lu.cfg.GetClusterInfo()
 984   for nic in nics:
 985     ip = nic.ip
 986     mac = nic.mac
 987     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 988     mode = filled_params[constants.NIC_MODE]
 989     link = filled_params[constants.NIC_LINK]
 990     hooks_nics.append((ip, mac, mode, link))
 991   return hooks_nics
 992
 993
 994 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 995   """Builds instance related env variables for hooks from an object.
 996
 997   @type lu: L{LogicalUnit}
 998   @param lu: the logical unit on whose behalf we execute
 999   @type instance: L{objects.Instance}
1000   @param instance: the instance for which we should build the
1001       environment
1002   @type override: dict
1003   @param override: dictionary with key/values that will override
1004       our values
1005   @rtype: dict
1006   @return: the hook environment dictionary
1007
1008   """
1009   cluster = lu.cfg.GetClusterInfo()
1010   bep = cluster.FillBE(instance)
1011   hvp = cluster.FillHV(instance)
1012   args = {
1013     'name': instance.name,
1014     'primary_node': instance.primary_node,
1015     'secondary_nodes': instance.secondary_nodes,
1016     'os_type': instance.os,
1017     'status': instance.admin_up,
1018     'memory': bep[constants.BE_MEMORY],
1019     'vcpus': bep[constants.BE_VCPUS],
1020     'nics': _NICListToTuple(lu, instance.nics),
1021     'disk_template': instance.disk_template,
1022     'disks': [(disk.size, disk.mode) for disk in instance.disks],
1023     'bep': bep,
1024     'hvp': hvp,
1025     'hypervisor_name': instance.hypervisor,
1026   }
1027   if override:
1028     args.update(override)
1029   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1030
1031
1032 def _AdjustCandidatePool(lu, exceptions):
1033   """Adjust the candidate pool after node operations.
1034
1035   """
1036   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1037   if mod_list:
1038     lu.LogInfo("Promoted nodes to master candidate role: %s",
1039                utils.CommaJoin(node.name for node in mod_list))
1040     for name in mod_list:
1041       lu.context.ReaddNode(name)
1042   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1043   if mc_now > mc_max:
1044     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1045                (mc_now, mc_max))
1046
1047
1048 def _DecideSelfPromotion(lu, exceptions=None):
1049   """Decide whether I should promote myself as a master candidate.
1050
1051   """
1052   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1053   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1054   # the new node will increase mc_max with one, so:
1055   mc_should = min(mc_should + 1, cp_size)
1056   return mc_now < mc_should
1057
1058
1059 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1060   """Check that the brigdes needed by a list of nics exist.
1061
1062   """
1063   cluster = lu.cfg.GetClusterInfo()
1064   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1065   brlist = [params[constants.NIC_LINK] for params in paramslist
1066             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1067   if brlist:
1068     result = lu.rpc.call_bridges_exist(target_node, brlist)
1069     result.Raise("Error checking bridges on destination node '%s'" %
1070                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1071
1072
1073 def _CheckInstanceBridgesExist(lu, instance, node=None):
1074   """Check that the brigdes needed by an instance exist.
1075
1076   """
1077   if node is None:
1078     node = instance.primary_node
1079   _CheckNicsBridgesExist(lu, instance.nics, node)
1080
1081
1082 def _CheckOSVariant(os_obj, name):
1083   """Check whether an OS name conforms to the os variants specification.
1084
1085   @type os_obj: L{objects.OS}
1086   @param os_obj: OS object to check
1087   @type name: string
1088   @param name: OS name passed by the user, to check for validity
1089
1090   """
1091   if not os_obj.supported_variants:
1092     return
1093   try:
1094     variant = name.split("+", 1)[1]
1095   except IndexError:
1096     raise errors.OpPrereqError("OS name must include a variant",
1097                                errors.ECODE_INVAL)
1098
1099   if variant not in os_obj.supported_variants:
1100     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1101
1102
1103 def _GetNodeInstancesInner(cfg, fn):
1104   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1105
1106
1107 def _GetNodeInstances(cfg, node_name):
1108   """Returns a list of all primary and secondary instances on a node.
1109
1110   """
1111
1112   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1113
1114
1115 def _GetNodePrimaryInstances(cfg, node_name):
1116   """Returns primary instances on a node.
1117
1118   """
1119   return _GetNodeInstancesInner(cfg,
1120                                 lambda inst: node_name == inst.primary_node)
1121
1122
1123 def _GetNodeSecondaryInstances(cfg, node_name):
1124   """Returns secondary instances on a node.
1125
1126   """
1127   return _GetNodeInstancesInner(cfg,
1128                                 lambda inst: node_name in inst.secondary_nodes)
1129
1130
1131 def _GetStorageTypeArgs(cfg, storage_type):
1132   """Returns the arguments for a storage type.
1133
1134   """
1135   # Special case for file storage
1136   if storage_type == constants.ST_FILE:
1137     # storage.FileStorage wants a list of storage directories
1138     return [[cfg.GetFileStorageDir()]]
1139
1140   return []
1141
1142
1143 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1144   faulty = []
1145
1146   for dev in instance.disks:
1147     cfg.SetDiskID(dev, node_name)
1148
1149   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1150   result.Raise("Failed to get disk status from node %s" % node_name,
1151                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1152
1153   for idx, bdev_status in enumerate(result.payload):
1154     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1155       faulty.append(idx)
1156
1157   return faulty
1158
1159
1160 class LUPostInitCluster(LogicalUnit):
1161   """Logical unit for running hooks after cluster initialization.
1162
1163   """
1164   HPATH = "cluster-init"
1165   HTYPE = constants.HTYPE_CLUSTER
1166
1167   def BuildHooksEnv(self):
1168     """Build hooks env.
1169
1170     """
1171     env = {"OP_TARGET": self.cfg.GetClusterName()}
1172     mn = self.cfg.GetMasterNode()
1173     return env, [], [mn]
1174
1175   def Exec(self, feedback_fn):
1176     """Nothing to do.
1177
1178     """
1179     return True
1180
1181
1182 class LUDestroyCluster(LogicalUnit):
1183   """Logical unit for destroying the cluster.
1184
1185   """
1186   HPATH = "cluster-destroy"
1187   HTYPE = constants.HTYPE_CLUSTER
1188
1189   def BuildHooksEnv(self):
1190     """Build hooks env.
1191
1192     """
1193     env = {"OP_TARGET": self.cfg.GetClusterName()}
1194     return env, [], []
1195
1196   def CheckPrereq(self):
1197     """Check prerequisites.
1198
1199     This checks whether the cluster is empty.
1200
1201     Any errors are signaled by raising errors.OpPrereqError.
1202
1203     """
1204     master = self.cfg.GetMasterNode()
1205
1206     nodelist = self.cfg.GetNodeList()
1207     if len(nodelist) != 1 or nodelist[0] != master:
1208       raise errors.OpPrereqError("There are still %d node(s) in"
1209                                  " this cluster." % (len(nodelist) - 1),
1210                                  errors.ECODE_INVAL)
1211     instancelist = self.cfg.GetInstanceList()
1212     if instancelist:
1213       raise errors.OpPrereqError("There are still %d instance(s) in"
1214                                  " this cluster." % len(instancelist),
1215                                  errors.ECODE_INVAL)
1216
1217   def Exec(self, feedback_fn):
1218     """Destroys the cluster.
1219
1220     """
1221     master = self.cfg.GetMasterNode()
1222     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1223
1224     # Run post hooks on master node before it's removed
1225     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1226     try:
1227       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1228     except:
1229       # pylint: disable-msg=W0702
1230       self.LogWarning("Errors occurred running hooks on %s" % master)
1231
1232     result = self.rpc.call_node_stop_master(master, False)
1233     result.Raise("Could not disable the master role")
1234
1235     if modify_ssh_setup:
1236       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1237       utils.CreateBackup(priv_key)
1238       utils.CreateBackup(pub_key)
1239
1240     return master
1241
1242
1243 def _VerifyCertificate(filename):
1244   """Verifies a certificate for LUVerifyCluster.
1245
1246   @type filename: string
1247   @param filename: Path to PEM file
1248
1249   """
1250   try:
1251     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1252                                            utils.ReadFile(filename))
1253   except Exception, err: # pylint: disable-msg=W0703
1254     return (LUVerifyCluster.ETYPE_ERROR,
1255             "Failed to load X509 certificate %s: %s" % (filename, err))
1256
1257   (errcode, msg) = \
1258     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1259                                 constants.SSL_CERT_EXPIRATION_ERROR)
1260
1261   if msg:
1262     fnamemsg = "While verifying %s: %s" % (filename, msg)
1263   else:
1264     fnamemsg = None
1265
1266   if errcode is None:
1267     return (None, fnamemsg)
1268   elif errcode == utils.CERT_WARNING:
1269     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1270   elif errcode == utils.CERT_ERROR:
1271     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1272
1273   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1274
1275
1276 class LUVerifyCluster(LogicalUnit):
1277   """Verifies the cluster status.
1278
1279   """
1280   HPATH = "cluster-verify"
1281   HTYPE = constants.HTYPE_CLUSTER
1282   _OP_PARAMS = [
1283     ("skip_checks", _EmptyList,
1284      _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1285     ("verbose", False, _TBool),
1286     ("error_codes", False, _TBool),
1287     ("debug_simulate_errors", False, _TBool),
1288     ]
1289   REQ_BGL = False
1290
1291   TCLUSTER = "cluster"
1292   TNODE = "node"
1293   TINSTANCE = "instance"
1294
1295   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1296   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1297   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1298   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1299   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1300   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1301   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1302   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1303   ENODEDRBD = (TNODE, "ENODEDRBD")
1304   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1305   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1306   ENODEHV = (TNODE, "ENODEHV")
1307   ENODELVM = (TNODE, "ENODELVM")
1308   ENODEN1 = (TNODE, "ENODEN1")
1309   ENODENET = (TNODE, "ENODENET")
1310   ENODEOS = (TNODE, "ENODEOS")
1311   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1312   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1313   ENODERPC = (TNODE, "ENODERPC")
1314   ENODESSH = (TNODE, "ENODESSH")
1315   ENODEVERSION = (TNODE, "ENODEVERSION")
1316   ENODESETUP = (TNODE, "ENODESETUP")
1317   ENODETIME = (TNODE, "ENODETIME")
1318
1319   ETYPE_FIELD = "code"
1320   ETYPE_ERROR = "ERROR"
1321   ETYPE_WARNING = "WARNING"
1322
1323   class NodeImage(object):
1324     """A class representing the logical and physical status of a node.
1325
1326     @type name: string
1327     @ivar name: the node name to which this object refers
1328     @ivar volumes: a structure as returned from
1329         L{ganeti.backend.GetVolumeList} (runtime)
1330     @ivar instances: a list of running instances (runtime)
1331     @ivar pinst: list of configured primary instances (config)
1332     @ivar sinst: list of configured secondary instances (config)
1333     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1334         of this node (config)
1335     @ivar mfree: free memory, as reported by hypervisor (runtime)
1336     @ivar dfree: free disk, as reported by the node (runtime)
1337     @ivar offline: the offline status (config)
1338     @type rpc_fail: boolean
1339     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1340         not whether the individual keys were correct) (runtime)
1341     @type lvm_fail: boolean
1342     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1343     @type hyp_fail: boolean
1344     @ivar hyp_fail: whether the RPC call didn't return the instance list
1345     @type ghost: boolean
1346     @ivar ghost: whether this is a known node or not (config)
1347     @type os_fail: boolean
1348     @ivar os_fail: whether the RPC call didn't return valid OS data
1349     @type oslist: list
1350     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1351
1352     """
1353     def __init__(self, offline=False, name=None):
1354       self.name = name
1355       self.volumes = {}
1356       self.instances = []
1357       self.pinst = []
1358       self.sinst = []
1359       self.sbp = {}
1360       self.mfree = 0
1361       self.dfree = 0
1362       self.offline = offline
1363       self.rpc_fail = False
1364       self.lvm_fail = False
1365       self.hyp_fail = False
1366       self.ghost = False
1367       self.os_fail = False
1368       self.oslist = {}
1369
1370   def ExpandNames(self):
1371     self.needed_locks = {
1372       locking.LEVEL_NODE: locking.ALL_SET,
1373       locking.LEVEL_INSTANCE: locking.ALL_SET,
1374     }
1375     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1376
1377   def _Error(self, ecode, item, msg, *args, **kwargs):
1378     """Format an error message.
1379
1380     Based on the opcode's error_codes parameter, either format a
1381     parseable error code, or a simpler error string.
1382
1383     This must be called only from Exec and functions called from Exec.
1384
1385     """
1386     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1387     itype, etxt = ecode
1388     # first complete the msg
1389     if args:
1390       msg = msg % args
1391     # then format the whole message
1392     if self.op.error_codes:
1393       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1394     else:
1395       if item:
1396         item = " " + item
1397       else:
1398         item = ""
1399       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1400     # and finally report it via the feedback_fn
1401     self._feedback_fn("  - %s" % msg)
1402
1403   def _ErrorIf(self, cond, *args, **kwargs):
1404     """Log an error message if the passed condition is True.
1405
1406     """
1407     cond = bool(cond) or self.op.debug_simulate_errors
1408     if cond:
1409       self._Error(*args, **kwargs)
1410     # do not mark the operation as failed for WARN cases only
1411     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1412       self.bad = self.bad or cond
1413
1414   def _VerifyNode(self, ninfo, nresult):
1415     """Run multiple tests against a node.
1416
1417     Test list:
1418
1419       - compares ganeti version
1420       - checks vg existence and size > 20G
1421       - checks config file checksum
1422       - checks ssh to other nodes
1423
1424     @type ninfo: L{objects.Node}
1425     @param ninfo: the node to check
1426     @param nresult: the results from the node
1427     @rtype: boolean
1428     @return: whether overall this call was successful (and we can expect
1429          reasonable values in the respose)
1430
1431     """
1432     node = ninfo.name
1433     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1434
1435     # main result, nresult should be a non-empty dict
1436     test = not nresult or not isinstance(nresult, dict)
1437     _ErrorIf(test, self.ENODERPC, node,
1438                   "unable to verify node: no data returned")
1439     if test:
1440       return False
1441
1442     # compares ganeti version
1443     local_version = constants.PROTOCOL_VERSION
1444     remote_version = nresult.get("version", None)
1445     test = not (remote_version and
1446                 isinstance(remote_version, (list, tuple)) and
1447                 len(remote_version) == 2)
1448     _ErrorIf(test, self.ENODERPC, node,
1449              "connection to node returned invalid data")
1450     if test:
1451       return False
1452
1453     test = local_version != remote_version[0]
1454     _ErrorIf(test, self.ENODEVERSION, node,
1455              "incompatible protocol versions: master %s,"
1456              " node %s", local_version, remote_version[0])
1457     if test:
1458       return False
1459
1460     # node seems compatible, we can actually try to look into its results
1461
1462     # full package version
1463     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1464                   self.ENODEVERSION, node,
1465                   "software version mismatch: master %s, node %s",
1466                   constants.RELEASE_VERSION, remote_version[1],
1467                   code=self.ETYPE_WARNING)
1468
1469     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1470     if isinstance(hyp_result, dict):
1471       for hv_name, hv_result in hyp_result.iteritems():
1472         test = hv_result is not None
1473         _ErrorIf(test, self.ENODEHV, node,
1474                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1475
1476
1477     test = nresult.get(constants.NV_NODESETUP,
1478                            ["Missing NODESETUP results"])
1479     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1480              "; ".join(test))
1481
1482     return True
1483
1484   def _VerifyNodeTime(self, ninfo, nresult,
1485                       nvinfo_starttime, nvinfo_endtime):
1486     """Check the node time.
1487
1488     @type ninfo: L{objects.Node}
1489     @param ninfo: the node to check
1490     @param nresult: the remote results for the node
1491     @param nvinfo_starttime: the start time of the RPC call
1492     @param nvinfo_endtime: the end time of the RPC call
1493
1494     """
1495     node = ninfo.name
1496     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1497
1498     ntime = nresult.get(constants.NV_TIME, None)
1499     try:
1500       ntime_merged = utils.MergeTime(ntime)
1501     except (ValueError, TypeError):
1502       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1503       return
1504
1505     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1506       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1507     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1508       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1509     else:
1510       ntime_diff = None
1511
1512     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1513              "Node time diverges by at least %s from master node time",
1514              ntime_diff)
1515
1516   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1517     """Check the node time.
1518
1519     @type ninfo: L{objects.Node}
1520     @param ninfo: the node to check
1521     @param nresult: the remote results for the node
1522     @param vg_name: the configured VG name
1523
1524     """
1525     if vg_name is None:
1526       return
1527
1528     node = ninfo.name
1529     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1530
1531     # checks vg existence and size > 20G
1532     vglist = nresult.get(constants.NV_VGLIST, None)
1533     test = not vglist
1534     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1535     if not test:
1536       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1537                                             constants.MIN_VG_SIZE)
1538       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1539
1540     # check pv names
1541     pvlist = nresult.get(constants.NV_PVLIST, None)
1542     test = pvlist is None
1543     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1544     if not test:
1545       # check that ':' is not present in PV names, since it's a
1546       # special character for lvcreate (denotes the range of PEs to
1547       # use on the PV)
1548       for _, pvname, owner_vg in pvlist:
1549         test = ":" in pvname
1550         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1551                  " '%s' of VG '%s'", pvname, owner_vg)
1552
1553   def _VerifyNodeNetwork(self, ninfo, nresult):
1554     """Check the node time.
1555
1556     @type ninfo: L{objects.Node}
1557     @param ninfo: the node to check
1558     @param nresult: the remote results for the node
1559
1560     """
1561     node = ninfo.name
1562     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1563
1564     test = constants.NV_NODELIST not in nresult
1565     _ErrorIf(test, self.ENODESSH, node,
1566              "node hasn't returned node ssh connectivity data")
1567     if not test:
1568       if nresult[constants.NV_NODELIST]:
1569         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1570           _ErrorIf(True, self.ENODESSH, node,
1571                    "ssh communication with node '%s': %s", a_node, a_msg)
1572
1573     test = constants.NV_NODENETTEST not in nresult
1574     _ErrorIf(test, self.ENODENET, node,
1575              "node hasn't returned node tcp connectivity data")
1576     if not test:
1577       if nresult[constants.NV_NODENETTEST]:
1578         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1579         for anode in nlist:
1580           _ErrorIf(True, self.ENODENET, node,
1581                    "tcp communication with node '%s': %s",
1582                    anode, nresult[constants.NV_NODENETTEST][anode])
1583
1584     test = constants.NV_MASTERIP not in nresult
1585     _ErrorIf(test, self.ENODENET, node,
1586              "node hasn't returned node master IP reachability data")
1587     if not test:
1588       if not nresult[constants.NV_MASTERIP]:
1589         if node == self.master_node:
1590           msg = "the master node cannot reach the master IP (not configured?)"
1591         else:
1592           msg = "cannot reach the master IP"
1593         _ErrorIf(True, self.ENODENET, node, msg)
1594
1595
1596   def _VerifyInstance(self, instance, instanceconfig, node_image):
1597     """Verify an instance.
1598
1599     This function checks to see if the required block devices are
1600     available on the instance's node.
1601
1602     """
1603     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1604     node_current = instanceconfig.primary_node
1605
1606     node_vol_should = {}
1607     instanceconfig.MapLVsByNode(node_vol_should)
1608
1609     for node in node_vol_should:
1610       n_img = node_image[node]
1611       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1612         # ignore missing volumes on offline or broken nodes
1613         continue
1614       for volume in node_vol_should[node]:
1615         test = volume not in n_img.volumes
1616         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1617                  "volume %s missing on node %s", volume, node)
1618
1619     if instanceconfig.admin_up:
1620       pri_img = node_image[node_current]
1621       test = instance not in pri_img.instances and not pri_img.offline
1622       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1623                "instance not running on its primary node %s",
1624                node_current)
1625
1626     for node, n_img in node_image.items():
1627       if (not node == node_current):
1628         test = instance in n_img.instances
1629         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1630                  "instance should not run on node %s", node)
1631
1632   def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1633     """Verify if there are any unknown volumes in the cluster.
1634
1635     The .os, .swap and backup volumes are ignored. All other volumes are
1636     reported as unknown.
1637
1638     """
1639     for node, n_img in node_image.items():
1640       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1641         # skip non-healthy nodes
1642         continue
1643       for volume in n_img.volumes:
1644         test = (node not in node_vol_should or
1645                 volume not in node_vol_should[node])
1646         self._ErrorIf(test, self.ENODEORPHANLV, node,
1647                       "volume %s is unknown", volume)
1648
1649   def _VerifyOrphanInstances(self, instancelist, node_image):
1650     """Verify the list of running instances.
1651
1652     This checks what instances are running but unknown to the cluster.
1653
1654     """
1655     for node, n_img in node_image.items():
1656       for o_inst in n_img.instances:
1657         test = o_inst not in instancelist
1658         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1659                       "instance %s on node %s should not exist", o_inst, node)
1660
1661   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1662     """Verify N+1 Memory Resilience.
1663
1664     Check that if one single node dies we can still start all the
1665     instances it was primary for.
1666
1667     """
1668     for node, n_img in node_image.items():
1669       # This code checks that every node which is now listed as
1670       # secondary has enough memory to host all instances it is
1671       # supposed to should a single other node in the cluster fail.
1672       # FIXME: not ready for failover to an arbitrary node
1673       # FIXME: does not support file-backed instances
1674       # WARNING: we currently take into account down instances as well
1675       # as up ones, considering that even if they're down someone
1676       # might want to start them even in the event of a node failure.
1677       for prinode, instances in n_img.sbp.items():
1678         needed_mem = 0
1679         for instance in instances:
1680           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1681           if bep[constants.BE_AUTO_BALANCE]:
1682             needed_mem += bep[constants.BE_MEMORY]
1683         test = n_img.mfree < needed_mem
1684         self._ErrorIf(test, self.ENODEN1, node,
1685                       "not enough memory on to accommodate"
1686                       " failovers should peer node %s fail", prinode)
1687
1688   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1689                        master_files):
1690     """Verifies and computes the node required file checksums.
1691
1692     @type ninfo: L{objects.Node}
1693     @param ninfo: the node to check
1694     @param nresult: the remote results for the node
1695     @param file_list: required list of files
1696     @param local_cksum: dictionary of local files and their checksums
1697     @param master_files: list of files that only masters should have
1698
1699     """
1700     node = ninfo.name
1701     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1702
1703     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1704     test = not isinstance(remote_cksum, dict)
1705     _ErrorIf(test, self.ENODEFILECHECK, node,
1706              "node hasn't returned file checksum data")
1707     if test:
1708       return
1709
1710     for file_name in file_list:
1711       node_is_mc = ninfo.master_candidate
1712       must_have = (file_name not in master_files) or node_is_mc
1713       # missing
1714       test1 = file_name not in remote_cksum
1715       # invalid checksum
1716       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1717       # existing and good
1718       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1719       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1720                "file '%s' missing", file_name)
1721       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1722                "file '%s' has wrong checksum", file_name)
1723       # not candidate and this is not a must-have file
1724       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1725                "file '%s' should not exist on non master"
1726                " candidates (and the file is outdated)", file_name)
1727       # all good, except non-master/non-must have combination
1728       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1729                "file '%s' should not exist"
1730                " on non master candidates", file_name)
1731
1732   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1733     """Verifies and the node DRBD status.
1734
1735     @type ninfo: L{objects.Node}
1736     @param ninfo: the node to check
1737     @param nresult: the remote results for the node
1738     @param instanceinfo: the dict of instances
1739     @param drbd_map: the DRBD map as returned by
1740         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1741
1742     """
1743     node = ninfo.name
1744     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1745
1746     # compute the DRBD minors
1747     node_drbd = {}
1748     for minor, instance in drbd_map[node].items():
1749       test = instance not in instanceinfo
1750       _ErrorIf(test, self.ECLUSTERCFG, None,
1751                "ghost instance '%s' in temporary DRBD map", instance)
1752         # ghost instance should not be running, but otherwise we
1753         # don't give double warnings (both ghost instance and
1754         # unallocated minor in use)
1755       if test:
1756         node_drbd[minor] = (instance, False)
1757       else:
1758         instance = instanceinfo[instance]
1759         node_drbd[minor] = (instance.name, instance.admin_up)
1760
1761     # and now check them
1762     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1763     test = not isinstance(used_minors, (tuple, list))
1764     _ErrorIf(test, self.ENODEDRBD, node,
1765              "cannot parse drbd status file: %s", str(used_minors))
1766     if test:
1767       # we cannot check drbd status
1768       return
1769
1770     for minor, (iname, must_exist) in node_drbd.items():
1771       test = minor not in used_minors and must_exist
1772       _ErrorIf(test, self.ENODEDRBD, node,
1773                "drbd minor %d of instance %s is not active", minor, iname)
1774     for minor in used_minors:
1775       test = minor not in node_drbd
1776       _ErrorIf(test, self.ENODEDRBD, node,
1777                "unallocated drbd minor %d is in use", minor)
1778
1779   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1780     """Builds the node OS structures.
1781
1782     @type ninfo: L{objects.Node}
1783     @param ninfo: the node to check
1784     @param nresult: the remote results for the node
1785     @param nimg: the node image object
1786
1787     """
1788     node = ninfo.name
1789     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1790
1791     remote_os = nresult.get(constants.NV_OSLIST, None)
1792     test = (not isinstance(remote_os, list) or
1793             not compat.all(isinstance(v, list) and len(v) == 7
1794                            for v in remote_os))
1795
1796     _ErrorIf(test, self.ENODEOS, node,
1797              "node hasn't returned valid OS data")
1798
1799     nimg.os_fail = test
1800
1801     if test:
1802       return
1803
1804     os_dict = {}
1805
1806     for (name, os_path, status, diagnose,
1807          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1808
1809       if name not in os_dict:
1810         os_dict[name] = []
1811
1812       # parameters is a list of lists instead of list of tuples due to
1813       # JSON lacking a real tuple type, fix it:
1814       parameters = [tuple(v) for v in parameters]
1815       os_dict[name].append((os_path, status, diagnose,
1816                             set(variants), set(parameters), set(api_ver)))
1817
1818     nimg.oslist = os_dict
1819
1820   def _VerifyNodeOS(self, ninfo, nimg, base):
1821     """Verifies the node OS list.
1822
1823     @type ninfo: L{objects.Node}
1824     @param ninfo: the node to check
1825     @param nimg: the node image object
1826     @param base: the 'template' node we match against (e.g. from the master)
1827
1828     """
1829     node = ninfo.name
1830     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1831
1832     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1833
1834     for os_name, os_data in nimg.oslist.items():
1835       assert os_data, "Empty OS status for OS %s?!" % os_name
1836       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1837       _ErrorIf(not f_status, self.ENODEOS, node,
1838                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1839       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1840                "OS '%s' has multiple entries (first one shadows the rest): %s",
1841                os_name, utils.CommaJoin([v[0] for v in os_data]))
1842       # this will catched in backend too
1843       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1844                and not f_var, self.ENODEOS, node,
1845                "OS %s with API at least %d does not declare any variant",
1846                os_name, constants.OS_API_V15)
1847       # comparisons with the 'base' image
1848       test = os_name not in base.oslist
1849       _ErrorIf(test, self.ENODEOS, node,
1850                "Extra OS %s not present on reference node (%s)",
1851                os_name, base.name)
1852       if test:
1853         continue
1854       assert base.oslist[os_name], "Base node has empty OS status?"
1855       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1856       if not b_status:
1857         # base OS is invalid, skipping
1858         continue
1859       for kind, a, b in [("API version", f_api, b_api),
1860                          ("variants list", f_var, b_var),
1861                          ("parameters", f_param, b_param)]:
1862         _ErrorIf(a != b, self.ENODEOS, node,
1863                  "OS %s %s differs from reference node %s: %s vs. %s",
1864                  kind, os_name, base.name,
1865                  utils.CommaJoin(a), utils.CommaJoin(b))
1866
1867     # check any missing OSes
1868     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1869     _ErrorIf(missing, self.ENODEOS, node,
1870              "OSes present on reference node %s but missing on this node: %s",
1871              base.name, utils.CommaJoin(missing))
1872
1873   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1874     """Verifies and updates the node volume data.
1875
1876     This function will update a L{NodeImage}'s internal structures
1877     with data from the remote call.
1878
1879     @type ninfo: L{objects.Node}
1880     @param ninfo: the node to check
1881     @param nresult: the remote results for the node
1882     @param nimg: the node image object
1883     @param vg_name: the configured VG name
1884
1885     """
1886     node = ninfo.name
1887     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1888
1889     nimg.lvm_fail = True
1890     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1891     if vg_name is None:
1892       pass
1893     elif isinstance(lvdata, basestring):
1894       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1895                utils.SafeEncode(lvdata))
1896     elif not isinstance(lvdata, dict):
1897       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1898     else:
1899       nimg.volumes = lvdata
1900       nimg.lvm_fail = False
1901
1902   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1903     """Verifies and updates the node instance list.
1904
1905     If the listing was successful, then updates this node's instance
1906     list. Otherwise, it marks the RPC call as failed for the instance
1907     list key.
1908
1909     @type ninfo: L{objects.Node}
1910     @param ninfo: the node to check
1911     @param nresult: the remote results for the node
1912     @param nimg: the node image object
1913
1914     """
1915     idata = nresult.get(constants.NV_INSTANCELIST, None)
1916     test = not isinstance(idata, list)
1917     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1918                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1919     if test:
1920       nimg.hyp_fail = True
1921     else:
1922       nimg.instances = idata
1923
1924   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1925     """Verifies and computes a node information map
1926
1927     @type ninfo: L{objects.Node}
1928     @param ninfo: the node to check
1929     @param nresult: the remote results for the node
1930     @param nimg: the node image object
1931     @param vg_name: the configured VG name
1932
1933     """
1934     node = ninfo.name
1935     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1936
1937     # try to read free memory (from the hypervisor)
1938     hv_info = nresult.get(constants.NV_HVINFO, None)
1939     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1940     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1941     if not test:
1942       try:
1943         nimg.mfree = int(hv_info["memory_free"])
1944       except (ValueError, TypeError):
1945         _ErrorIf(True, self.ENODERPC, node,
1946                  "node returned invalid nodeinfo, check hypervisor")
1947
1948     # FIXME: devise a free space model for file based instances as well
1949     if vg_name is not None:
1950       test = (constants.NV_VGLIST not in nresult or
1951               vg_name not in nresult[constants.NV_VGLIST])
1952       _ErrorIf(test, self.ENODELVM, node,
1953                "node didn't return data for the volume group '%s'"
1954                " - it is either missing or broken", vg_name)
1955       if not test:
1956         try:
1957           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1958         except (ValueError, TypeError):
1959           _ErrorIf(True, self.ENODERPC, node,
1960                    "node returned invalid LVM info, check LVM status")
1961
1962   def BuildHooksEnv(self):
1963     """Build hooks env.
1964
1965     Cluster-Verify hooks just ran in the post phase and their failure makes
1966     the output be logged in the verify output and the verification to fail.
1967
1968     """
1969     all_nodes = self.cfg.GetNodeList()
1970     env = {
1971       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1972       }
1973     for node in self.cfg.GetAllNodesInfo().values():
1974       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1975
1976     return env, [], all_nodes
1977
1978   def Exec(self, feedback_fn):
1979     """Verify integrity of cluster, performing various test on nodes.
1980
1981     """
1982     self.bad = False
1983     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1984     verbose = self.op.verbose
1985     self._feedback_fn = feedback_fn
1986     feedback_fn("* Verifying global settings")
1987     for msg in self.cfg.VerifyConfig():
1988       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1989
1990     # Check the cluster certificates
1991     for cert_filename in constants.ALL_CERT_FILES:
1992       (errcode, msg) = _VerifyCertificate(cert_filename)
1993       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1994
1995     vg_name = self.cfg.GetVGName()
1996     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1997     cluster = self.cfg.GetClusterInfo()
1998     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1999     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2000     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2001     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2002                         for iname in instancelist)
2003     i_non_redundant = [] # Non redundant instances
2004     i_non_a_balanced = [] # Non auto-balanced instances
2005     n_offline = 0 # Count of offline nodes
2006     n_drained = 0 # Count of nodes being drained
2007     node_vol_should = {}
2008
2009     # FIXME: verify OS list
2010     # do local checksums
2011     master_files = [constants.CLUSTER_CONF_FILE]
2012     master_node = self.master_node = self.cfg.GetMasterNode()
2013     master_ip = self.cfg.GetMasterIP()
2014
2015     file_names = ssconf.SimpleStore().GetFileList()
2016     file_names.extend(constants.ALL_CERT_FILES)
2017     file_names.extend(master_files)
2018     if cluster.modify_etc_hosts:
2019       file_names.append(constants.ETC_HOSTS)
2020
2021     local_checksums = utils.FingerprintFiles(file_names)
2022
2023     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2024     node_verify_param = {
2025       constants.NV_FILELIST: file_names,
2026       constants.NV_NODELIST: [node.name for node in nodeinfo
2027                               if not node.offline],
2028       constants.NV_HYPERVISOR: hypervisors,
2029       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2030                                   node.secondary_ip) for node in nodeinfo
2031                                  if not node.offline],
2032       constants.NV_INSTANCELIST: hypervisors,
2033       constants.NV_VERSION: None,
2034       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2035       constants.NV_NODESETUP: None,
2036       constants.NV_TIME: None,
2037       constants.NV_MASTERIP: (master_node, master_ip),
2038       constants.NV_OSLIST: None,
2039       }
2040
2041     if vg_name is not None:
2042       node_verify_param[constants.NV_VGLIST] = None
2043       node_verify_param[constants.NV_LVLIST] = vg_name
2044       node_verify_param[constants.NV_PVLIST] = [vg_name]
2045       node_verify_param[constants.NV_DRBDLIST] = None
2046
2047     # Build our expected cluster state
2048     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2049                                                  name=node.name))
2050                       for node in nodeinfo)
2051
2052     for instance in instancelist:
2053       inst_config = instanceinfo[instance]
2054
2055       for nname in inst_config.all_nodes:
2056         if nname not in node_image:
2057           # ghost node
2058           gnode = self.NodeImage(name=nname)
2059           gnode.ghost = True
2060           node_image[nname] = gnode
2061
2062       inst_config.MapLVsByNode(node_vol_should)
2063
2064       pnode = inst_config.primary_node
2065       node_image[pnode].pinst.append(instance)
2066
2067       for snode in inst_config.secondary_nodes:
2068         nimg = node_image[snode]
2069         nimg.sinst.append(instance)
2070         if pnode not in nimg.sbp:
2071           nimg.sbp[pnode] = []
2072         nimg.sbp[pnode].append(instance)
2073
2074     # At this point, we have the in-memory data structures complete,
2075     # except for the runtime information, which we'll gather next
2076
2077     # Due to the way our RPC system works, exact response times cannot be
2078     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2079     # time before and after executing the request, we can at least have a time
2080     # window.
2081     nvinfo_starttime = time.time()
2082     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2083                                            self.cfg.GetClusterName())
2084     nvinfo_endtime = time.time()
2085
2086     all_drbd_map = self.cfg.ComputeDRBDMap()
2087
2088     feedback_fn("* Verifying node status")
2089
2090     refos_img = None
2091
2092     for node_i in nodeinfo:
2093       node = node_i.name
2094       nimg = node_image[node]
2095
2096       if node_i.offline:
2097         if verbose:
2098           feedback_fn("* Skipping offline node %s" % (node,))
2099         n_offline += 1
2100         continue
2101
2102       if node == master_node:
2103         ntype = "master"
2104       elif node_i.master_candidate:
2105         ntype = "master candidate"
2106       elif node_i.drained:
2107         ntype = "drained"
2108         n_drained += 1
2109       else:
2110         ntype = "regular"
2111       if verbose:
2112         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2113
2114       msg = all_nvinfo[node].fail_msg
2115       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2116       if msg:
2117         nimg.rpc_fail = True
2118         continue
2119
2120       nresult = all_nvinfo[node].payload
2121
2122       nimg.call_ok = self._VerifyNode(node_i, nresult)
2123       self._VerifyNodeNetwork(node_i, nresult)
2124       self._VerifyNodeLVM(node_i, nresult, vg_name)
2125       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2126                             master_files)
2127       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
2128       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2129
2130       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2131       self._UpdateNodeInstances(node_i, nresult, nimg)
2132       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2133       self._UpdateNodeOS(node_i, nresult, nimg)
2134       if not nimg.os_fail:
2135         if refos_img is None:
2136           refos_img = nimg
2137         self._VerifyNodeOS(node_i, nimg, refos_img)
2138
2139     feedback_fn("* Verifying instance status")
2140     for instance in instancelist:
2141       if verbose:
2142         feedback_fn("* Verifying instance %s" % instance)
2143       inst_config = instanceinfo[instance]
2144       self._VerifyInstance(instance, inst_config, node_image)
2145       inst_nodes_offline = []
2146
2147       pnode = inst_config.primary_node
2148       pnode_img = node_image[pnode]
2149       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2150                self.ENODERPC, pnode, "instance %s, connection to"
2151                " primary node failed", instance)
2152
2153       if pnode_img.offline:
2154         inst_nodes_offline.append(pnode)
2155
2156       # If the instance is non-redundant we cannot survive losing its primary
2157       # node, so we are not N+1 compliant. On the other hand we have no disk
2158       # templates with more than one secondary so that situation is not well
2159       # supported either.
2160       # FIXME: does not support file-backed instances
2161       if not inst_config.secondary_nodes:
2162         i_non_redundant.append(instance)
2163       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2164                instance, "instance has multiple secondary nodes: %s",
2165                utils.CommaJoin(inst_config.secondary_nodes),
2166                code=self.ETYPE_WARNING)
2167
2168       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2169         i_non_a_balanced.append(instance)
2170
2171       for snode in inst_config.secondary_nodes:
2172         s_img = node_image[snode]
2173         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2174                  "instance %s, connection to secondary node failed", instance)
2175
2176         if s_img.offline:
2177           inst_nodes_offline.append(snode)
2178
2179       # warn that the instance lives on offline nodes
2180       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2181                "instance lives on offline node(s) %s",
2182                utils.CommaJoin(inst_nodes_offline))
2183       # ... or ghost nodes
2184       for node in inst_config.all_nodes:
2185         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2186                  "instance lives on ghost node %s", node)
2187
2188     feedback_fn("* Verifying orphan volumes")
2189     self._VerifyOrphanVolumes(node_vol_should, node_image)
2190
2191     feedback_fn("* Verifying orphan instances")
2192     self._VerifyOrphanInstances(instancelist, node_image)
2193
2194     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2195       feedback_fn("* Verifying N+1 Memory redundancy")
2196       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2197
2198     feedback_fn("* Other Notes")
2199     if i_non_redundant:
2200       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2201                   % len(i_non_redundant))
2202
2203     if i_non_a_balanced:
2204       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2205                   % len(i_non_a_balanced))
2206
2207     if n_offline:
2208       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2209
2210     if n_drained:
2211       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2212
2213     return not self.bad
2214
2215   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2216     """Analyze the post-hooks' result
2217
2218     This method analyses the hook result, handles it, and sends some
2219     nicely-formatted feedback back to the user.
2220
2221     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2222         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2223     @param hooks_results: the results of the multi-node hooks rpc call
2224     @param feedback_fn: function used send feedback back to the caller
2225     @param lu_result: previous Exec result
2226     @return: the new Exec result, based on the previous result
2227         and hook results
2228
2229     """
2230     # We only really run POST phase hooks, and are only interested in
2231     # their results
2232     if phase == constants.HOOKS_PHASE_POST:
2233       # Used to change hooks' output to proper indentation
2234       indent_re = re.compile('^', re.M)
2235       feedback_fn("* Hooks Results")
2236       assert hooks_results, "invalid result from hooks"
2237
2238       for node_name in hooks_results:
2239         res = hooks_results[node_name]
2240         msg = res.fail_msg
2241         test = msg and not res.offline
2242         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2243                       "Communication failure in hooks execution: %s", msg)
2244         if res.offline or msg:
2245           # No need to investigate payload if node is offline or gave an error.
2246           # override manually lu_result here as _ErrorIf only
2247           # overrides self.bad
2248           lu_result = 1
2249           continue
2250         for script, hkr, output in res.payload:
2251           test = hkr == constants.HKR_FAIL
2252           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2253                         "Script %s failed, output:", script)
2254           if test:
2255             output = indent_re.sub('      ', output)
2256             feedback_fn("%s" % output)
2257             lu_result = 0
2258
2259       return lu_result
2260
2261
2262 class LUVerifyDisks(NoHooksLU):
2263   """Verifies the cluster disks status.
2264
2265   """
2266   REQ_BGL = False
2267
2268   def ExpandNames(self):
2269     self.needed_locks = {
2270       locking.LEVEL_NODE: locking.ALL_SET,
2271       locking.LEVEL_INSTANCE: locking.ALL_SET,
2272     }
2273     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2274
2275   def Exec(self, feedback_fn):
2276     """Verify integrity of cluster disks.
2277
2278     @rtype: tuple of three items
2279     @return: a tuple of (dict of node-to-node_error, list of instances
2280         which need activate-disks, dict of instance: (node, volume) for
2281         missing volumes
2282
2283     """
2284     result = res_nodes, res_instances, res_missing = {}, [], {}
2285
2286     vg_name = self.cfg.GetVGName()
2287     nodes = utils.NiceSort(self.cfg.GetNodeList())
2288     instances = [self.cfg.GetInstanceInfo(name)
2289                  for name in self.cfg.GetInstanceList()]
2290
2291     nv_dict = {}
2292     for inst in instances:
2293       inst_lvs = {}
2294       if (not inst.admin_up or
2295           inst.disk_template not in constants.DTS_NET_MIRROR):
2296         continue
2297       inst.MapLVsByNode(inst_lvs)
2298       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2299       for node, vol_list in inst_lvs.iteritems():
2300         for vol in vol_list:
2301           nv_dict[(node, vol)] = inst
2302
2303     if not nv_dict:
2304       return result
2305
2306     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2307
2308     for node in nodes:
2309       # node_volume
2310       node_res = node_lvs[node]
2311       if node_res.offline:
2312         continue
2313       msg = node_res.fail_msg
2314       if msg:
2315         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2316         res_nodes[node] = msg
2317         continue
2318
2319       lvs = node_res.payload
2320       for lv_name, (_, _, lv_online) in lvs.items():
2321         inst = nv_dict.pop((node, lv_name), None)
2322         if (not lv_online and inst is not None
2323             and inst.name not in res_instances):
2324           res_instances.append(inst.name)
2325
2326     # any leftover items in nv_dict are missing LVs, let's arrange the
2327     # data better
2328     for key, inst in nv_dict.iteritems():
2329       if inst.name not in res_missing:
2330         res_missing[inst.name] = []
2331       res_missing[inst.name].append(key)
2332
2333     return result
2334
2335
2336 class LURepairDiskSizes(NoHooksLU):
2337   """Verifies the cluster disks sizes.
2338
2339   """
2340   _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2341   REQ_BGL = False
2342
2343   def ExpandNames(self):
2344     if self.op.instances:
2345       self.wanted_names = []
2346       for name in self.op.instances:
2347         full_name = _ExpandInstanceName(self.cfg, name)
2348         self.wanted_names.append(full_name)
2349       self.needed_locks = {
2350         locking.LEVEL_NODE: [],
2351         locking.LEVEL_INSTANCE: self.wanted_names,
2352         }
2353       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2354     else:
2355       self.wanted_names = None
2356       self.needed_locks = {
2357         locking.LEVEL_NODE: locking.ALL_SET,
2358         locking.LEVEL_INSTANCE: locking.ALL_SET,
2359         }
2360     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2361
2362   def DeclareLocks(self, level):
2363     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2364       self._LockInstancesNodes(primary_only=True)
2365
2366   def CheckPrereq(self):
2367     """Check prerequisites.
2368
2369     This only checks the optional instance list against the existing names.
2370
2371     """
2372     if self.wanted_names is None:
2373       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2374
2375     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2376                              in self.wanted_names]
2377
2378   def _EnsureChildSizes(self, disk):
2379     """Ensure children of the disk have the needed disk size.
2380
2381     This is valid mainly for DRBD8 and fixes an issue where the
2382     children have smaller disk size.
2383
2384     @param disk: an L{ganeti.objects.Disk} object
2385
2386     """
2387     if disk.dev_type == constants.LD_DRBD8:
2388       assert disk.children, "Empty children for DRBD8?"
2389       fchild = disk.children[0]
2390       mismatch = fchild.size < disk.size
2391       if mismatch:
2392         self.LogInfo("Child disk has size %d, parent %d, fixing",
2393                      fchild.size, disk.size)
2394         fchild.size = disk.size
2395
2396       # and we recurse on this child only, not on the metadev
2397       return self._EnsureChildSizes(fchild) or mismatch
2398     else:
2399       return False
2400
2401   def Exec(self, feedback_fn):
2402     """Verify the size of cluster disks.
2403
2404     """
2405     # TODO: check child disks too
2406     # TODO: check differences in size between primary/secondary nodes
2407     per_node_disks = {}
2408     for instance in self.wanted_instances:
2409       pnode = instance.primary_node
2410       if pnode not in per_node_disks:
2411         per_node_disks[pnode] = []
2412       for idx, disk in enumerate(instance.disks):
2413         per_node_disks[pnode].append((instance, idx, disk))
2414
2415     changed = []
2416     for node, dskl in per_node_disks.items():
2417       newl = [v[2].Copy() for v in dskl]
2418       for dsk in newl:
2419         self.cfg.SetDiskID(dsk, node)
2420       result = self.rpc.call_blockdev_getsizes(node, newl)
2421       if result.fail_msg:
2422         self.LogWarning("Failure in blockdev_getsizes call to node"
2423                         " %s, ignoring", node)
2424         continue
2425       if len(result.data) != len(dskl):
2426         self.LogWarning("Invalid result from node %s, ignoring node results",
2427                         node)
2428         continue
2429       for ((instance, idx, disk), size) in zip(dskl, result.data):
2430         if size is None:
2431           self.LogWarning("Disk %d of instance %s did not return size"
2432                           " information, ignoring", idx, instance.name)
2433           continue
2434         if not isinstance(size, (int, long)):
2435           self.LogWarning("Disk %d of instance %s did not return valid"
2436                           " size information, ignoring", idx, instance.name)
2437           continue
2438         size = size >> 20
2439         if size != disk.size:
2440           self.LogInfo("Disk %d of instance %s has mismatched size,"
2441                        " correcting: recorded %d, actual %d", idx,
2442                        instance.name, disk.size, size)
2443           disk.size = size
2444           self.cfg.Update(instance, feedback_fn)
2445           changed.append((instance.name, idx, size))
2446         if self._EnsureChildSizes(disk):
2447           self.cfg.Update(instance, feedback_fn)
2448           changed.append((instance.name, idx, disk.size))
2449     return changed
2450
2451
2452 class LURenameCluster(LogicalUnit):
2453   """Rename the cluster.
2454
2455   """
2456   HPATH = "cluster-rename"
2457   HTYPE = constants.HTYPE_CLUSTER
2458   _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2459
2460   def BuildHooksEnv(self):
2461     """Build hooks env.
2462
2463     """
2464     env = {
2465       "OP_TARGET": self.cfg.GetClusterName(),
2466       "NEW_NAME": self.op.name,
2467       }
2468     mn = self.cfg.GetMasterNode()
2469     all_nodes = self.cfg.GetNodeList()
2470     return env, [mn], all_nodes
2471
2472   def CheckPrereq(self):
2473     """Verify that the passed name is a valid one.
2474
2475     """
2476     hostname = utils.GetHostInfo(self.op.name)
2477
2478     new_name = hostname.name
2479     self.ip = new_ip = hostname.ip
2480     old_name = self.cfg.GetClusterName()
2481     old_ip = self.cfg.GetMasterIP()
2482     if new_name == old_name and new_ip == old_ip:
2483       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2484                                  " cluster has changed",
2485                                  errors.ECODE_INVAL)
2486     if new_ip != old_ip:
2487       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2488         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2489                                    " reachable on the network. Aborting." %
2490                                    new_ip, errors.ECODE_NOTUNIQUE)
2491
2492     self.op.name = new_name
2493
2494   def Exec(self, feedback_fn):
2495     """Rename the cluster.
2496
2497     """
2498     clustername = self.op.name
2499     ip = self.ip
2500
2501     # shutdown the master IP
2502     master = self.cfg.GetMasterNode()
2503     result = self.rpc.call_node_stop_master(master, False)
2504     result.Raise("Could not disable the master role")
2505
2506     try:
2507       cluster = self.cfg.GetClusterInfo()
2508       cluster.cluster_name = clustername
2509       cluster.master_ip = ip
2510       self.cfg.Update(cluster, feedback_fn)
2511
2512       # update the known hosts file
2513       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2514       node_list = self.cfg.GetNodeList()
2515       try:
2516         node_list.remove(master)
2517       except ValueError:
2518         pass
2519       result = self.rpc.call_upload_file(node_list,
2520                                          constants.SSH_KNOWN_HOSTS_FILE)
2521       for to_node, to_result in result.iteritems():
2522         msg = to_result.fail_msg
2523         if msg:
2524           msg = ("Copy of file %s to node %s failed: %s" %
2525                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2526           self.proc.LogWarning(msg)
2527
2528     finally:
2529       result = self.rpc.call_node_start_master(master, False, False)
2530       msg = result.fail_msg
2531       if msg:
2532         self.LogWarning("Could not re-enable the master role on"
2533                         " the master, please restart manually: %s", msg)
2534
2535
2536 def _RecursiveCheckIfLVMBased(disk):
2537   """Check if the given disk or its children are lvm-based.
2538
2539   @type disk: L{objects.Disk}
2540   @param disk: the disk to check
2541   @rtype: boolean
2542   @return: boolean indicating whether a LD_LV dev_type was found or not
2543
2544   """
2545   if disk.children:
2546     for chdisk in disk.children:
2547       if _RecursiveCheckIfLVMBased(chdisk):
2548         return True
2549   return disk.dev_type == constants.LD_LV
2550
2551
2552 class LUSetClusterParams(LogicalUnit):
2553   """Change the parameters of the cluster.
2554
2555   """
2556   HPATH = "cluster-modify"
2557   HTYPE = constants.HTYPE_CLUSTER
2558   _OP_PARAMS = [
2559     ("vg_name", None, _TMaybeString),
2560     ("enabled_hypervisors", None,
2561      _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2562     ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2563     ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2564     ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2565     ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2566     ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2567     ("uid_pool", None, _NoType),
2568     ("add_uids", None, _NoType),
2569     ("remove_uids", None, _NoType),
2570     ("maintain_node_health", None, _TMaybeBool),
2571     ("nicparams", None, _TOr(_TDict, _TNone)),
2572     ]
2573   REQ_BGL = False
2574
2575   def CheckArguments(self):
2576     """Check parameters
2577
2578     """
2579     if self.op.uid_pool:
2580       uidpool.CheckUidPool(self.op.uid_pool)
2581
2582     if self.op.add_uids:
2583       uidpool.CheckUidPool(self.op.add_uids)
2584
2585     if self.op.remove_uids:
2586       uidpool.CheckUidPool(self.op.remove_uids)
2587
2588   def ExpandNames(self):
2589     # FIXME: in the future maybe other cluster params won't require checking on
2590     # all nodes to be modified.
2591     self.needed_locks = {
2592       locking.LEVEL_NODE: locking.ALL_SET,
2593     }
2594     self.share_locks[locking.LEVEL_NODE] = 1
2595
2596   def BuildHooksEnv(self):
2597     """Build hooks env.
2598
2599     """
2600     env = {
2601       "OP_TARGET": self.cfg.GetClusterName(),
2602       "NEW_VG_NAME": self.op.vg_name,
2603       }
2604     mn = self.cfg.GetMasterNode()
2605     return env, [mn], [mn]
2606
2607   def CheckPrereq(self):
2608     """Check prerequisites.
2609
2610     This checks whether the given params don't conflict and
2611     if the given volume group is valid.
2612
2613     """
2614     if self.op.vg_name is not None and not self.op.vg_name:
2615       instances = self.cfg.GetAllInstancesInfo().values()
2616       for inst in instances:
2617         for disk in inst.disks:
2618           if _RecursiveCheckIfLVMBased(disk):
2619             raise errors.OpPrereqError("Cannot disable lvm storage while"
2620                                        " lvm-based instances exist",
2621                                        errors.ECODE_INVAL)
2622
2623     node_list = self.acquired_locks[locking.LEVEL_NODE]
2624
2625     # if vg_name not None, checks given volume group on all nodes
2626     if self.op.vg_name:
2627       vglist = self.rpc.call_vg_list(node_list)
2628       for node in node_list:
2629         msg = vglist[node].fail_msg
2630         if msg:
2631           # ignoring down node
2632           self.LogWarning("Error while gathering data on node %s"
2633                           " (ignoring node): %s", node, msg)
2634           continue
2635         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2636                                               self.op.vg_name,
2637                                               constants.MIN_VG_SIZE)
2638         if vgstatus:
2639           raise errors.OpPrereqError("Error on node '%s': %s" %
2640                                      (node, vgstatus), errors.ECODE_ENVIRON)
2641
2642     self.cluster = cluster = self.cfg.GetClusterInfo()
2643     # validate params changes
2644     if self.op.beparams:
2645       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2646       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2647
2648     if self.op.nicparams:
2649       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2650       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2651       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2652       nic_errors = []
2653
2654       # check all instances for consistency
2655       for instance in self.cfg.GetAllInstancesInfo().values():
2656         for nic_idx, nic in enumerate(instance.nics):
2657           params_copy = copy.deepcopy(nic.nicparams)
2658           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2659
2660           # check parameter syntax
2661           try:
2662             objects.NIC.CheckParameterSyntax(params_filled)
2663           except errors.ConfigurationError, err:
2664             nic_errors.append("Instance %s, nic/%d: %s" %
2665                               (instance.name, nic_idx, err))
2666
2667           # if we're moving instances to routed, check that they have an ip
2668           target_mode = params_filled[constants.NIC_MODE]
2669           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2670             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2671                               (instance.name, nic_idx))
2672       if nic_errors:
2673         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2674                                    "\n".join(nic_errors))
2675
2676     # hypervisor list/parameters
2677     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2678     if self.op.hvparams:
2679       for hv_name, hv_dict in self.op.hvparams.items():
2680         if hv_name not in self.new_hvparams:
2681           self.new_hvparams[hv_name] = hv_dict
2682         else:
2683           self.new_hvparams[hv_name].update(hv_dict)
2684
2685     # os hypervisor parameters
2686     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2687     if self.op.os_hvp:
2688       for os_name, hvs in self.op.os_hvp.items():
2689         if os_name not in self.new_os_hvp:
2690           self.new_os_hvp[os_name] = hvs
2691         else:
2692           for hv_name, hv_dict in hvs.items():
2693             if hv_name not in self.new_os_hvp[os_name]:
2694               self.new_os_hvp[os_name][hv_name] = hv_dict
2695             else:
2696               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2697
2698     # os parameters
2699     self.new_osp = objects.FillDict(cluster.osparams, {})
2700     if self.op.osparams:
2701       for os_name, osp in self.op.osparams.items():
2702         if os_name not in self.new_osp:
2703           self.new_osp[os_name] = {}
2704
2705         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2706                                                   use_none=True)
2707
2708         if not self.new_osp[os_name]:
2709           # we removed all parameters
2710           del self.new_osp[os_name]
2711         else:
2712           # check the parameter validity (remote check)
2713           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2714                          os_name, self.new_osp[os_name])
2715
2716     # changes to the hypervisor list
2717     if self.op.enabled_hypervisors is not None:
2718       self.hv_list = self.op.enabled_hypervisors
2719       for hv in self.hv_list:
2720         # if the hypervisor doesn't already exist in the cluster
2721         # hvparams, we initialize it to empty, and then (in both
2722         # cases) we make sure to fill the defaults, as we might not
2723         # have a complete defaults list if the hypervisor wasn't
2724         # enabled before
2725         if hv not in new_hvp:
2726           new_hvp[hv] = {}
2727         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2728         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2729     else:
2730       self.hv_list = cluster.enabled_hypervisors
2731
2732     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2733       # either the enabled list has changed, or the parameters have, validate
2734       for hv_name, hv_params in self.new_hvparams.items():
2735         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2736             (self.op.enabled_hypervisors and
2737              hv_name in self.op.enabled_hypervisors)):
2738           # either this is a new hypervisor, or its parameters have changed
2739           hv_class = hypervisor.GetHypervisor(hv_name)
2740           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2741           hv_class.CheckParameterSyntax(hv_params)
2742           _CheckHVParams(self, node_list, hv_name, hv_params)
2743
2744     if self.op.os_hvp:
2745       # no need to check any newly-enabled hypervisors, since the
2746       # defaults have already been checked in the above code-block
2747       for os_name, os_hvp in self.new_os_hvp.items():
2748         for hv_name, hv_params in os_hvp.items():
2749           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2750           # we need to fill in the new os_hvp on top of the actual hv_p
2751           cluster_defaults = self.new_hvparams.get(hv_name, {})
2752           new_osp = objects.FillDict(cluster_defaults, hv_params)
2753           hv_class = hypervisor.GetHypervisor(hv_name)
2754           hv_class.CheckParameterSyntax(new_osp)
2755           _CheckHVParams(self, node_list, hv_name, new_osp)
2756
2757
2758   def Exec(self, feedback_fn):
2759     """Change the parameters of the cluster.
2760
2761     """
2762     if self.op.vg_name is not None:
2763       new_volume = self.op.vg_name
2764       if not new_volume:
2765         new_volume = None
2766       if new_volume != self.cfg.GetVGName():
2767         self.cfg.SetVGName(new_volume)
2768       else:
2769         feedback_fn("Cluster LVM configuration already in desired"
2770                     " state, not changing")
2771     if self.op.hvparams:
2772       self.cluster.hvparams = self.new_hvparams
2773     if self.op.os_hvp:
2774       self.cluster.os_hvp = self.new_os_hvp
2775     if self.op.enabled_hypervisors is not None:
2776       self.cluster.hvparams = self.new_hvparams
2777       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2778     if self.op.beparams:
2779       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2780     if self.op.nicparams:
2781       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2782     if self.op.osparams:
2783       self.cluster.osparams = self.new_osp
2784
2785     if self.op.candidate_pool_size is not None:
2786       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2787       # we need to update the pool size here, otherwise the save will fail
2788       _AdjustCandidatePool(self, [])
2789
2790     if self.op.maintain_node_health is not None:
2791       self.cluster.maintain_node_health = self.op.maintain_node_health
2792
2793     if self.op.add_uids is not None:
2794       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2795
2796     if self.op.remove_uids is not None:
2797       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2798
2799     if self.op.uid_pool is not None:
2800       self.cluster.uid_pool = self.op.uid_pool
2801
2802     self.cfg.Update(self.cluster, feedback_fn)
2803
2804
2805 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2806   """Distribute additional files which are part of the cluster configuration.
2807
2808   ConfigWriter takes care of distributing the config and ssconf files, but
2809   there are more files which should be distributed to all nodes. This function
2810   makes sure those are copied.
2811
2812   @param lu: calling logical unit
2813   @param additional_nodes: list of nodes not in the config to distribute to
2814
2815   """
2816   # 1. Gather target nodes
2817   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2818   dist_nodes = lu.cfg.GetOnlineNodeList()
2819   if additional_nodes is not None:
2820     dist_nodes.extend(additional_nodes)
2821   if myself.name in dist_nodes:
2822     dist_nodes.remove(myself.name)
2823
2824   # 2. Gather files to distribute
2825   dist_files = set([constants.ETC_HOSTS,
2826                     constants.SSH_KNOWN_HOSTS_FILE,
2827                     constants.RAPI_CERT_FILE,
2828                     constants.RAPI_USERS_FILE,
2829                     constants.CONFD_HMAC_KEY,
2830                     constants.CLUSTER_DOMAIN_SECRET_FILE,
2831                    ])
2832
2833   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2834   for hv_name in enabled_hypervisors:
2835     hv_class = hypervisor.GetHypervisor(hv_name)
2836     dist_files.update(hv_class.GetAncillaryFiles())
2837
2838   # 3. Perform the files upload
2839   for fname in dist_files:
2840     if os.path.exists(fname):
2841       result = lu.rpc.call_upload_file(dist_nodes, fname)
2842       for to_node, to_result in result.items():
2843         msg = to_result.fail_msg
2844         if msg:
2845           msg = ("Copy of file %s to node %s failed: %s" %
2846                  (fname, to_node, msg))
2847           lu.proc.LogWarning(msg)
2848
2849
2850 class LURedistributeConfig(NoHooksLU):
2851   """Force the redistribution of cluster configuration.
2852
2853   This is a very simple LU.
2854
2855   """
2856   REQ_BGL = False
2857
2858   def ExpandNames(self):
2859     self.needed_locks = {
2860       locking.LEVEL_NODE: locking.ALL_SET,
2861     }
2862     self.share_locks[locking.LEVEL_NODE] = 1
2863
2864   def Exec(self, feedback_fn):
2865     """Redistribute the configuration.
2866
2867     """
2868     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2869     _RedistributeAncillaryFiles(self)
2870
2871
2872 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2873   """Sleep and poll for an instance's disk to sync.
2874
2875   """
2876   if not instance.disks or disks is not None and not disks:
2877     return True
2878
2879   disks = _ExpandCheckDisks(instance, disks)
2880
2881   if not oneshot:
2882     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2883
2884   node = instance.primary_node
2885
2886   for dev in disks:
2887     lu.cfg.SetDiskID(dev, node)
2888
2889   # TODO: Convert to utils.Retry
2890
2891   retries = 0
2892   degr_retries = 10 # in seconds, as we sleep 1 second each time
2893   while True:
2894     max_time = 0
2895     done = True
2896     cumul_degraded = False
2897     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2898     msg = rstats.fail_msg
2899     if msg:
2900       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2901       retries += 1
2902       if retries >= 10:
2903         raise errors.RemoteError("Can't contact node %s for mirror data,"
2904                                  " aborting." % node)
2905       time.sleep(6)
2906       continue
2907     rstats = rstats.payload
2908     retries = 0
2909     for i, mstat in enumerate(rstats):
2910       if mstat is None:
2911         lu.LogWarning("Can't compute data for node %s/%s",
2912                            node, disks[i].iv_name)
2913         continue
2914
2915       cumul_degraded = (cumul_degraded or
2916                         (mstat.is_degraded and mstat.sync_percent is None))
2917       if mstat.sync_percent is not None:
2918         done = False
2919         if mstat.estimated_time is not None:
2920           rem_time = ("%s remaining (estimated)" %
2921                       utils.FormatSeconds(mstat.estimated_time))
2922           max_time = mstat.estimated_time
2923         else:
2924           rem_time = "no time estimate"
2925         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2926                         (disks[i].iv_name, mstat.sync_percent, rem_time))
2927
2928     # if we're done but degraded, let's do a few small retries, to
2929     # make sure we see a stable and not transient situation; therefore
2930     # we force restart of the loop
2931     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2932       logging.info("Degraded disks found, %d retries left", degr_retries)
2933       degr_retries -= 1
2934       time.sleep(1)
2935       continue
2936
2937     if done or oneshot:
2938       break
2939
2940     time.sleep(min(60, max_time))
2941
2942   if done:
2943     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2944   return not cumul_degraded
2945
2946
2947 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2948   """Check that mirrors are not degraded.
2949
2950   The ldisk parameter, if True, will change the test from the
2951   is_degraded attribute (which represents overall non-ok status for
2952   the device(s)) to the ldisk (representing the local storage status).
2953
2954   """
2955   lu.cfg.SetDiskID(dev, node)
2956
2957   result = True
2958
2959   if on_primary or dev.AssembleOnSecondary():
2960     rstats = lu.rpc.call_blockdev_find(node, dev)
2961     msg = rstats.fail_msg
2962     if msg:
2963       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2964       result = False
2965     elif not rstats.payload:
2966       lu.LogWarning("Can't find disk on node %s", node)
2967       result = False
2968     else:
2969       if ldisk:
2970         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2971       else:
2972         result = result and not rstats.payload.is_degraded
2973
2974   if dev.children:
2975     for child in dev.children:
2976       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2977
2978   return result
2979
2980
2981 class LUDiagnoseOS(NoHooksLU):
2982   """Logical unit for OS diagnose/query.
2983
2984   """
2985   _OP_PARAMS = [
2986     _POutputFields,
2987     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
2988     ]
2989   REQ_BGL = False
2990   _FIELDS_STATIC = utils.FieldSet()
2991   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2992                                    "parameters", "api_versions")
2993
2994   def CheckArguments(self):
2995     if self.op.names:
2996       raise errors.OpPrereqError("Selective OS query not supported",
2997                                  errors.ECODE_INVAL)
2998
2999     _CheckOutputFields(static=self._FIELDS_STATIC,
3000                        dynamic=self._FIELDS_DYNAMIC,
3001                        selected=self.op.output_fields)
3002
3003   def ExpandNames(self):
3004     # Lock all nodes, in shared mode
3005     # Temporary removal of locks, should be reverted later
3006     # TODO: reintroduce locks when they are lighter-weight
3007     self.needed_locks = {}
3008     #self.share_locks[locking.LEVEL_NODE] = 1
3009     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3010
3011   @staticmethod
3012   def _DiagnoseByOS(rlist):
3013     """Remaps a per-node return list into an a per-os per-node dictionary
3014
3015     @param rlist: a map with node names as keys and OS objects as values
3016
3017     @rtype: dict
3018     @return: a dictionary with osnames as keys and as value another
3019         map, with nodes as keys and tuples of (path, status, diagnose,
3020         variants, parameters, api_versions) as values, eg::
3021
3022           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3023                                      (/srv/..., False, "invalid api")],
3024                            "node2": [(/srv/..., True, "", [], [])]}
3025           }
3026
3027     """
3028     all_os = {}
3029     # we build here the list of nodes that didn't fail the RPC (at RPC
3030     # level), so that nodes with a non-responding node daemon don't
3031     # make all OSes invalid
3032     good_nodes = [node_name for node_name in rlist
3033                   if not rlist[node_name].fail_msg]
3034     for node_name, nr in rlist.items():
3035       if nr.fail_msg or not nr.payload:
3036         continue
3037       for (name, path, status, diagnose, variants,
3038            params, api_versions) in nr.payload:
3039         if name not in all_os:
3040           # build a list of nodes for this os containing empty lists
3041           # for each node in node_list
3042           all_os[name] = {}
3043           for nname in good_nodes:
3044             all_os[name][nname] = []
3045         # convert params from [name, help] to (name, help)
3046         params = [tuple(v) for v in params]
3047         all_os[name][node_name].append((path, status, diagnose,
3048                                         variants, params, api_versions))
3049     return all_os
3050
3051   def Exec(self, feedback_fn):
3052     """Compute the list of OSes.
3053
3054     """
3055     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3056     node_data = self.rpc.call_os_diagnose(valid_nodes)
3057     pol = self._DiagnoseByOS(node_data)
3058     output = []
3059
3060     for os_name, os_data in pol.items():
3061       row = []
3062       valid = True
3063       (variants, params, api_versions) = null_state = (set(), set(), set())
3064       for idx, osl in enumerate(os_data.values()):
3065         valid = bool(valid and osl and osl[0][1])
3066         if not valid:
3067           (variants, params, api_versions) = null_state
3068           break
3069         node_variants, node_params, node_api = osl[0][3:6]
3070         if idx == 0: # first entry
3071           variants = set(node_variants)
3072           params = set(node_params)
3073           api_versions = set(node_api)
3074         else: # keep consistency
3075           variants.intersection_update(node_variants)
3076           params.intersection_update(node_params)
3077           api_versions.intersection_update(node_api)
3078
3079       for field in self.op.output_fields:
3080         if field == "name":
3081           val = os_name
3082         elif field == "valid":
3083           val = valid
3084         elif field == "node_status":
3085           # this is just a copy of the dict
3086           val = {}
3087           for node_name, nos_list in os_data.items():
3088             val[node_name] = nos_list
3089         elif field == "variants":
3090           val = list(variants)
3091         elif field == "parameters":
3092           val = list(params)
3093         elif field == "api_versions":
3094           val = list(api_versions)
3095         else:
3096           raise errors.ParameterError(field)
3097         row.append(val)
3098       output.append(row)
3099
3100     return output
3101
3102
3103 class LURemoveNode(LogicalUnit):
3104   """Logical unit for removing a node.
3105
3106   """
3107   HPATH = "node-remove"
3108   HTYPE = constants.HTYPE_NODE
3109   _OP_PARAMS = [
3110     _PNodeName,
3111     ]
3112
3113   def BuildHooksEnv(self):
3114     """Build hooks env.
3115
3116     This doesn't run on the target node in the pre phase as a failed
3117     node would then be impossible to remove.
3118
3119     """
3120     env = {
3121       "OP_TARGET": self.op.node_name,
3122       "NODE_NAME": self.op.node_name,
3123       }
3124     all_nodes = self.cfg.GetNodeList()
3125     try:
3126       all_nodes.remove(self.op.node_name)
3127     except ValueError:
3128       logging.warning("Node %s which is about to be removed not found"
3129                       " in the all nodes list", self.op.node_name)
3130     return env, all_nodes, all_nodes
3131
3132   def CheckPrereq(self):
3133     """Check prerequisites.
3134
3135     This checks:
3136      - the node exists in the configuration
3137      - it does not have primary or secondary instances
3138      - it's not the master
3139
3140     Any errors are signaled by raising errors.OpPrereqError.
3141
3142     """
3143     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3144     node = self.cfg.GetNodeInfo(self.op.node_name)
3145     assert node is not None
3146
3147     instance_list = self.cfg.GetInstanceList()
3148
3149     masternode = self.cfg.GetMasterNode()
3150     if node.name == masternode:
3151       raise errors.OpPrereqError("Node is the master node,"
3152                                  " you need to failover first.",
3153                                  errors.ECODE_INVAL)
3154
3155     for instance_name in instance_list:
3156       instance = self.cfg.GetInstanceInfo(instance_name)
3157       if node.name in instance.all_nodes:
3158         raise errors.OpPrereqError("Instance %s is still running on the node,"
3159                                    " please remove first." % instance_name,
3160                                    errors.ECODE_INVAL)
3161     self.op.node_name = node.name
3162     self.node = node
3163
3164   def Exec(self, feedback_fn):
3165     """Removes the node from the cluster.
3166
3167     """
3168     node = self.node
3169     logging.info("Stopping the node daemon and removing configs from node %s",
3170                  node.name)
3171
3172     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3173
3174     # Promote nodes to master candidate as needed
3175     _AdjustCandidatePool(self, exceptions=[node.name])
3176     self.context.RemoveNode(node.name)
3177
3178     # Run post hooks on the node before it's removed
3179     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3180     try:
3181       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3182     except:
3183       # pylint: disable-msg=W0702
3184       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3185
3186     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3187     msg = result.fail_msg
3188     if msg:
3189       self.LogWarning("Errors encountered on the remote node while leaving"
3190                       " the cluster: %s", msg)
3191
3192     # Remove node from our /etc/hosts
3193     if self.cfg.GetClusterInfo().modify_etc_hosts:
3194       # FIXME: this should be done via an rpc call to node daemon
3195       utils.RemoveHostFromEtcHosts(node.name)
3196       _RedistributeAncillaryFiles(self)
3197
3198
3199 class LUQueryNodes(NoHooksLU):
3200   """Logical unit for querying nodes.
3201
3202   """
3203   # pylint: disable-msg=W0142
3204   _OP_PARAMS = [
3205     _POutputFields,
3206     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3207     ("use_locking", False, _TBool),
3208     ]
3209   REQ_BGL = False
3210
3211   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3212                     "master_candidate", "offline", "drained"]
3213
3214   _FIELDS_DYNAMIC = utils.FieldSet(
3215     "dtotal", "dfree",
3216     "mtotal", "mnode", "mfree",
3217     "bootid",
3218     "ctotal", "cnodes", "csockets",
3219     )
3220
3221   _FIELDS_STATIC = utils.FieldSet(*[
3222     "pinst_cnt", "sinst_cnt",
3223     "pinst_list", "sinst_list",
3224     "pip", "sip", "tags",
3225     "master",
3226     "role"] + _SIMPLE_FIELDS
3227     )
3228
3229   def CheckArguments(self):
3230     _CheckOutputFields(static=self._FIELDS_STATIC,
3231                        dynamic=self._FIELDS_DYNAMIC,
3232                        selected=self.op.output_fields)
3233
3234   def ExpandNames(self):
3235     self.needed_locks = {}
3236     self.share_locks[locking.LEVEL_NODE] = 1
3237
3238     if self.op.names:
3239       self.wanted = _GetWantedNodes(self, self.op.names)
3240     else:
3241       self.wanted = locking.ALL_SET
3242
3243     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3244     self.do_locking = self.do_node_query and self.op.use_locking
3245     if self.do_locking:
3246       # if we don't request only static fields, we need to lock the nodes
3247       self.needed_locks[locking.LEVEL_NODE] = self.wanted
3248
3249   def Exec(self, feedback_fn):
3250     """Computes the list of nodes and their attributes.
3251
3252     """
3253     all_info = self.cfg.GetAllNodesInfo()
3254     if self.do_locking:
3255       nodenames = self.acquired_locks[locking.LEVEL_NODE]
3256     elif self.wanted != locking.ALL_SET:
3257       nodenames = self.wanted
3258       missing = set(nodenames).difference(all_info.keys())
3259       if missing:
3260         raise errors.OpExecError(
3261           "Some nodes were removed before retrieving their data: %s" % missing)
3262     else:
3263       nodenames = all_info.keys()
3264
3265     nodenames = utils.NiceSort(nodenames)
3266     nodelist = [all_info[name] for name in nodenames]
3267
3268     # begin data gathering
3269
3270     if self.do_node_query:
3271       live_data = {}
3272       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3273                                           self.cfg.GetHypervisorType())
3274       for name in nodenames:
3275         nodeinfo = node_data[name]
3276         if not nodeinfo.fail_msg and nodeinfo.payload:
3277           nodeinfo = nodeinfo.payload
3278           fn = utils.TryConvert
3279           live_data[name] = {
3280             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3281             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3282             "mfree": fn(int, nodeinfo.get('memory_free', None)),
3283             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3284             "dfree": fn(int, nodeinfo.get('vg_free', None)),
3285             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3286             "bootid": nodeinfo.get('bootid', None),
3287             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3288             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3289             }
3290         else:
3291           live_data[name] = {}
3292     else:
3293       live_data = dict.fromkeys(nodenames, {})
3294
3295     node_to_primary = dict([(name, set()) for name in nodenames])
3296     node_to_secondary = dict([(name, set()) for name in nodenames])
3297
3298     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3299                              "sinst_cnt", "sinst_list"))
3300     if inst_fields & frozenset(self.op.output_fields):
3301       inst_data = self.cfg.GetAllInstancesInfo()
3302
3303       for inst in inst_data.values():
3304         if inst.primary_node in node_to_primary:
3305           node_to_primary[inst.primary_node].add(inst.name)
3306         for secnode in inst.secondary_nodes:
3307           if secnode in node_to_secondary:
3308             node_to_secondary[secnode].add(inst.name)
3309
3310     master_node = self.cfg.GetMasterNode()
3311
3312     # end data gathering
3313
3314     output = []
3315     for node in nodelist:
3316       node_output = []
3317       for field in self.op.output_fields:
3318         if field in self._SIMPLE_FIELDS:
3319           val = getattr(node, field)
3320         elif field == "pinst_list":
3321           val = list(node_to_primary[node.name])
3322         elif field == "sinst_list":
3323           val = list(node_to_secondary[node.name])
3324         elif field == "pinst_cnt":
3325           val = len(node_to_primary[node.name])
3326         elif field == "sinst_cnt":
3327           val = len(node_to_secondary[node.name])
3328         elif field == "pip":
3329           val = node.primary_ip
3330         elif field == "sip":
3331           val = node.secondary_ip
3332         elif field == "tags":
3333           val = list(node.GetTags())
3334         elif field == "master":
3335           val = node.name == master_node
3336         elif self._FIELDS_DYNAMIC.Matches(field):
3337           val = live_data[node.name].get(field, None)
3338         elif field == "role":
3339           if node.name == master_node:
3340             val = "M"
3341           elif node.master_candidate:
3342             val = "C"
3343           elif node.drained:
3344             val = "D"
3345           elif node.offline:
3346             val = "O"
3347           else:
3348             val = "R"
3349         else:
3350           raise errors.ParameterError(field)
3351         node_output.append(val)
3352       output.append(node_output)
3353
3354     return output
3355
3356
3357 class LUQueryNodeVolumes(NoHooksLU):
3358   """Logical unit for getting volumes on node(s).
3359
3360   """
3361   _OP_PARAMS = [
3362     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3363     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3364     ]
3365   REQ_BGL = False
3366   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3367   _FIELDS_STATIC = utils.FieldSet("node")
3368
3369   def CheckArguments(self):
3370     _CheckOutputFields(static=self._FIELDS_STATIC,
3371                        dynamic=self._FIELDS_DYNAMIC,
3372                        selected=self.op.output_fields)
3373
3374   def ExpandNames(self):
3375     self.needed_locks = {}
3376     self.share_locks[locking.LEVEL_NODE] = 1
3377     if not self.op.nodes:
3378       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3379     else:
3380       self.needed_locks[locking.LEVEL_NODE] = \
3381         _GetWantedNodes(self, self.op.nodes)
3382
3383   def Exec(self, feedback_fn):
3384     """Computes the list of nodes and their attributes.
3385
3386     """
3387     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3388     volumes = self.rpc.call_node_volumes(nodenames)
3389
3390     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3391              in self.cfg.GetInstanceList()]
3392
3393     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3394
3395     output = []
3396     for node in nodenames:
3397       nresult = volumes[node]
3398       if nresult.offline:
3399         continue
3400       msg = nresult.fail_msg
3401       if msg:
3402         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3403         continue
3404
3405       node_vols = nresult.payload[:]
3406       node_vols.sort(key=lambda vol: vol['dev'])
3407
3408       for vol in node_vols:
3409         node_output = []
3410         for field in self.op.output_fields:
3411           if field == "node":
3412             val = node
3413           elif field == "phys":
3414             val = vol['dev']
3415           elif field == "vg":
3416             val = vol['vg']
3417           elif field == "name":
3418             val = vol['name']
3419           elif field == "size":
3420             val = int(float(vol['size']))
3421           elif field == "instance":
3422             for inst in ilist:
3423               if node not in lv_by_node[inst]:
3424                 continue
3425               if vol['name'] in lv_by_node[inst][node]:
3426                 val = inst.name
3427                 break
3428             else:
3429               val = '-'
3430           else:
3431             raise errors.ParameterError(field)
3432           node_output.append(str(val))
3433
3434         output.append(node_output)
3435
3436     return output
3437
3438
3439 class LUQueryNodeStorage(NoHooksLU):
3440   """Logical unit for getting information on storage units on node(s).
3441
3442   """
3443   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3444   _OP_PARAMS = [
3445     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3446     ("storage_type", _NoDefault, _CheckStorageType),
3447     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3448     ("name", None, _TMaybeString),
3449     ]
3450   REQ_BGL = False
3451
3452   def CheckArguments(self):
3453     _CheckOutputFields(static=self._FIELDS_STATIC,
3454                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3455                        selected=self.op.output_fields)
3456
3457   def ExpandNames(self):
3458     self.needed_locks = {}
3459     self.share_locks[locking.LEVEL_NODE] = 1
3460
3461     if self.op.nodes:
3462       self.needed_locks[locking.LEVEL_NODE] = \
3463         _GetWantedNodes(self, self.op.nodes)
3464     else:
3465       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3466
3467   def Exec(self, feedback_fn):
3468     """Computes the list of nodes and their attributes.
3469
3470     """
3471     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3472
3473     # Always get name to sort by
3474     if constants.SF_NAME in self.op.output_fields:
3475       fields = self.op.output_fields[:]
3476     else:
3477       fields = [constants.SF_NAME] + self.op.output_fields
3478
3479     # Never ask for node or type as it's only known to the LU
3480     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3481       while extra in fields:
3482         fields.remove(extra)
3483
3484     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3485     name_idx = field_idx[constants.SF_NAME]
3486
3487     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3488     data = self.rpc.call_storage_list(self.nodes,
3489                                       self.op.storage_type, st_args,
3490                                       self.op.name, fields)
3491
3492     result = []
3493
3494     for node in utils.NiceSort(self.nodes):
3495       nresult = data[node]
3496       if nresult.offline:
3497         continue
3498
3499       msg = nresult.fail_msg
3500       if msg:
3501         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3502         continue
3503
3504       rows = dict([(row[name_idx], row) for row in nresult.payload])
3505
3506       for name in utils.NiceSort(rows.keys()):
3507         row = rows[name]
3508
3509         out = []
3510
3511         for field in self.op.output_fields:
3512           if field == constants.SF_NODE:
3513             val = node
3514           elif field == constants.SF_TYPE:
3515             val = self.op.storage_type
3516           elif field in field_idx:
3517             val = row[field_idx[field]]
3518           else:
3519             raise errors.ParameterError(field)
3520
3521           out.append(val)
3522
3523         result.append(out)
3524
3525     return result
3526
3527
3528 class LUModifyNodeStorage(NoHooksLU):
3529   """Logical unit for modifying a storage volume on a node.
3530
3531   """
3532   _OP_PARAMS = [
3533     _PNodeName,
3534     ("storage_type", _NoDefault, _CheckStorageType),
3535     ("name", _NoDefault, _TNonEmptyString),
3536     ("changes", _NoDefault, _TDict),
3537     ]
3538   REQ_BGL = False
3539
3540   def CheckArguments(self):
3541     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3542
3543     storage_type = self.op.storage_type
3544
3545     try:
3546       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3547     except KeyError:
3548       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3549                                  " modified" % storage_type,
3550                                  errors.ECODE_INVAL)
3551
3552     diff = set(self.op.changes.keys()) - modifiable
3553     if diff:
3554       raise errors.OpPrereqError("The following fields can not be modified for"
3555                                  " storage units of type '%s': %r" %
3556                                  (storage_type, list(diff)),
3557                                  errors.ECODE_INVAL)
3558
3559   def ExpandNames(self):
3560     self.needed_locks = {
3561       locking.LEVEL_NODE: self.op.node_name,
3562       }
3563
3564   def Exec(self, feedback_fn):
3565     """Computes the list of nodes and their attributes.
3566
3567     """
3568     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3569     result = self.rpc.call_storage_modify(self.op.node_name,
3570                                           self.op.storage_type, st_args,
3571                                           self.op.name, self.op.changes)
3572     result.Raise("Failed to modify storage unit '%s' on %s" %
3573                  (self.op.name, self.op.node_name))
3574
3575
3576 class LUAddNode(LogicalUnit):
3577   """Logical unit for adding node to the cluster.
3578
3579   """
3580   HPATH = "node-add"
3581   HTYPE = constants.HTYPE_NODE
3582   _OP_PARAMS = [
3583     _PNodeName,
3584     ("primary_ip", None, _NoType),
3585     ("secondary_ip", None, _TMaybeString),
3586     ("readd", False, _TBool),
3587     ]
3588
3589   def CheckArguments(self):
3590     # validate/normalize the node name
3591     self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3592
3593   def BuildHooksEnv(self):
3594     """Build hooks env.
3595
3596     This will run on all nodes before, and on all nodes + the new node after.
3597
3598     """
3599     env = {
3600       "OP_TARGET": self.op.node_name,
3601       "NODE_NAME": self.op.node_name,
3602       "NODE_PIP": self.op.primary_ip,
3603       "NODE_SIP": self.op.secondary_ip,
3604       }
3605     nodes_0 = self.cfg.GetNodeList()
3606     nodes_1 = nodes_0 + [self.op.node_name, ]
3607     return env, nodes_0, nodes_1
3608
3609   def CheckPrereq(self):
3610     """Check prerequisites.
3611
3612     This checks:
3613      - the new node is not already in the config
3614      - it is resolvable
3615      - its parameters (single/dual homed) matches the cluster
3616
3617     Any errors are signaled by raising errors.OpPrereqError.
3618
3619     """
3620     node_name = self.op.node_name
3621     cfg = self.cfg
3622
3623     dns_data = utils.GetHostInfo(node_name)
3624
3625     node = dns_data.name
3626     primary_ip = self.op.primary_ip = dns_data.ip
3627     if self.op.secondary_ip is None:
3628       self.op.secondary_ip = primary_ip
3629     if not utils.IsValidIP4(self.op.secondary_ip):
3630       raise errors.OpPrereqError("Invalid secondary IP given",
3631                                  errors.ECODE_INVAL)
3632     secondary_ip = self.op.secondary_ip
3633
3634     node_list = cfg.GetNodeList()
3635     if not self.op.readd and node in node_list:
3636       raise errors.OpPrereqError("Node %s is already in the configuration" %
3637                                  node, errors.ECODE_EXISTS)
3638     elif self.op.readd and node not in node_list:
3639       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3640                                  errors.ECODE_NOENT)
3641
3642     self.changed_primary_ip = False
3643
3644     for existing_node_name in node_list:
3645       existing_node = cfg.GetNodeInfo(existing_node_name)
3646
3647       if self.op.readd and node == existing_node_name:
3648         if existing_node.secondary_ip != secondary_ip:
3649           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3650                                      " address configuration as before",
3651                                      errors.ECODE_INVAL)
3652         if existing_node.primary_ip != primary_ip:
3653           self.changed_primary_ip = True
3654
3655         continue
3656
3657       if (existing_node.primary_ip == primary_ip or
3658           existing_node.secondary_ip == primary_ip or
3659           existing_node.primary_ip == secondary_ip or
3660           existing_node.secondary_ip == secondary_ip):
3661         raise errors.OpPrereqError("New node ip address(es) conflict with"
3662                                    " existing node %s" % existing_node.name,
3663                                    errors.ECODE_NOTUNIQUE)
3664
3665     # check that the type of the node (single versus dual homed) is the
3666     # same as for the master
3667     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3668     master_singlehomed = myself.secondary_ip == myself.primary_ip
3669     newbie_singlehomed = secondary_ip == primary_ip
3670     if master_singlehomed != newbie_singlehomed:
3671       if master_singlehomed:
3672         raise errors.OpPrereqError("The master has no private ip but the"
3673                                    " new node has one",
3674                                    errors.ECODE_INVAL)
3675       else:
3676         raise errors.OpPrereqError("The master has a private ip but the"
3677                                    " new node doesn't have one",
3678                                    errors.ECODE_INVAL)
3679
3680     # checks reachability
3681     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3682       raise errors.OpPrereqError("Node not reachable by ping",
3683                                  errors.ECODE_ENVIRON)
3684
3685     if not newbie_singlehomed:
3686       # check reachability from my secondary ip to newbie's secondary ip
3687       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3688                            source=myself.secondary_ip):
3689         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3690                                    " based ping to noded port",
3691                                    errors.ECODE_ENVIRON)
3692
3693     if self.op.readd:
3694       exceptions = [node]
3695     else:
3696       exceptions = []
3697
3698     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3699
3700     if self.op.readd:
3701       self.new_node = self.cfg.GetNodeInfo(node)
3702       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3703     else:
3704       self.new_node = objects.Node(name=node,
3705                                    primary_ip=primary_ip,
3706                                    secondary_ip=secondary_ip,
3707                                    master_candidate=self.master_candidate,
3708                                    offline=False, drained=False)
3709
3710   def Exec(self, feedback_fn):
3711     """Adds the new node to the cluster.
3712
3713     """
3714     new_node = self.new_node
3715     node = new_node.name
3716
3717     # for re-adds, reset the offline/drained/master-candidate flags;
3718     # we need to reset here, otherwise offline would prevent RPC calls
3719     # later in the procedure; this also means that if the re-add
3720     # fails, we are left with a non-offlined, broken node
3721     if self.op.readd:
3722       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3723       self.LogInfo("Readding a node, the offline/drained flags were reset")
3724       # if we demote the node, we do cleanup later in the procedure
3725       new_node.master_candidate = self.master_candidate
3726       if self.changed_primary_ip:
3727         new_node.primary_ip = self.op.primary_ip
3728
3729     # notify the user about any possible mc promotion
3730     if new_node.master_candidate:
3731       self.LogInfo("Node will be a master candidate")
3732
3733     # check connectivity
3734     result = self.rpc.call_version([node])[node]
3735     result.Raise("Can't get version information from node %s" % node)
3736     if constants.PROTOCOL_VERSION == result.payload:
3737       logging.info("Communication to node %s fine, sw version %s match",
3738                    node, result.payload)
3739     else:
3740       raise errors.OpExecError("Version mismatch master version %s,"
3741                                " node version %s" %
3742                                (constants.PROTOCOL_VERSION, result.payload))
3743
3744     # setup ssh on node
3745     if self.cfg.GetClusterInfo().modify_ssh_setup:
3746       logging.info("Copy ssh key to node %s", node)
3747       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3748       keyarray = []
3749       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3750                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3751                   priv_key, pub_key]
3752
3753       for i in keyfiles:
3754         keyarray.append(utils.ReadFile(i))
3755
3756       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3757                                       keyarray[2], keyarray[3], keyarray[4],
3758                                       keyarray[5])
3759       result.Raise("Cannot transfer ssh keys to the new node")
3760
3761     # Add node to our /etc/hosts, and add key to known_hosts
3762     if self.cfg.GetClusterInfo().modify_etc_hosts:
3763       # FIXME: this should be done via an rpc call to node daemon
3764       utils.AddHostToEtcHosts(new_node.name)
3765
3766     if new_node.secondary_ip != new_node.primary_ip:
3767       result = self.rpc.call_node_has_ip_address(new_node.name,
3768                                                  new_node.secondary_ip)
3769       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3770                    prereq=True, ecode=errors.ECODE_ENVIRON)
3771       if not result.payload:
3772         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3773                                  " you gave (%s). Please fix and re-run this"
3774                                  " command." % new_node.secondary_ip)
3775
3776     node_verify_list = [self.cfg.GetMasterNode()]
3777     node_verify_param = {
3778       constants.NV_NODELIST: [node],
3779       # TODO: do a node-net-test as well?
3780     }
3781
3782     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3783                                        self.cfg.GetClusterName())
3784     for verifier in node_verify_list:
3785       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3786       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3787       if nl_payload:
3788         for failed in nl_payload:
3789           feedback_fn("ssh/hostname verification failed"
3790                       " (checking from %s): %s" %
3791                       (verifier, nl_payload[failed]))
3792         raise errors.OpExecError("ssh/hostname verification failed.")
3793
3794     if self.op.readd:
3795       _RedistributeAncillaryFiles(self)
3796       self.context.ReaddNode(new_node)
3797       # make sure we redistribute the config
3798       self.cfg.Update(new_node, feedback_fn)
3799       # and make sure the new node will not have old files around
3800       if not new_node.master_candidate:
3801         result = self.rpc.call_node_demote_from_mc(new_node.name)
3802         msg = result.fail_msg
3803         if msg:
3804           self.LogWarning("Node failed to demote itself from master"
3805                           " candidate status: %s" % msg)
3806     else:
3807       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3808       self.context.AddNode(new_node, self.proc.GetECId())
3809
3810
3811 class LUSetNodeParams(LogicalUnit):
3812   """Modifies the parameters of a node.
3813
3814   """
3815   HPATH = "node-modify"
3816   HTYPE = constants.HTYPE_NODE
3817   _OP_PARAMS = [
3818     _PNodeName,
3819     ("master_candidate", None, _TMaybeBool),
3820     ("offline", None, _TMaybeBool),
3821     ("drained", None, _TMaybeBool),
3822     ("auto_promote", False, _TBool),
3823     _PForce,
3824     ]
3825   REQ_BGL = False
3826
3827   def CheckArguments(self):
3828     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3829     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3830     if all_mods.count(None) == 3:
3831       raise errors.OpPrereqError("Please pass at least one modification",
3832                                  errors.ECODE_INVAL)
3833     if all_mods.count(True) > 1:
3834       raise errors.OpPrereqError("Can't set the node into more than one"
3835                                  " state at the same time",
3836                                  errors.ECODE_INVAL)
3837
3838     # Boolean value that tells us whether we're offlining or draining the node
3839     self.offline_or_drain = (self.op.offline == True or
3840                              self.op.drained == True)
3841     self.deoffline_or_drain = (self.op.offline == False or
3842                                self.op.drained == False)
3843     self.might_demote = (self.op.master_candidate == False or
3844                          self.offline_or_drain)
3845
3846     self.lock_all = self.op.auto_promote and self.might_demote
3847
3848
3849   def ExpandNames(self):
3850     if self.lock_all:
3851       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3852     else:
3853       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3854
3855   def BuildHooksEnv(self):
3856     """Build hooks env.
3857
3858     This runs on the master node.
3859
3860     """
3861     env = {
3862       "OP_TARGET": self.op.node_name,
3863       "MASTER_CANDIDATE": str(self.op.master_candidate),
3864       "OFFLINE": str(self.op.offline),
3865       "DRAINED": str(self.op.drained),
3866       }
3867     nl = [self.cfg.GetMasterNode(),
3868           self.op.node_name]
3869     return env, nl, nl
3870
3871   def CheckPrereq(self):
3872     """Check prerequisites.
3873
3874     This only checks the instance list against the existing names.
3875
3876     """
3877     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3878
3879     if (self.op.master_candidate is not None or
3880         self.op.drained is not None or
3881         self.op.offline is not None):
3882       # we can't change the master's node flags
3883       if self.op.node_name == self.cfg.GetMasterNode():
3884         raise errors.OpPrereqError("The master role can be changed"
3885                                    " only via masterfailover",
3886                                    errors.ECODE_INVAL)
3887
3888
3889     if node.master_candidate and self.might_demote and not self.lock_all:
3890       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3891       # check if after removing the current node, we're missing master
3892       # candidates
3893       (mc_remaining, mc_should, _) = \
3894           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3895       if mc_remaining < mc_should:
3896         raise errors.OpPrereqError("Not enough master candidates, please"
3897                                    " pass auto_promote to allow promotion",
3898                                    errors.ECODE_INVAL)
3899
3900     if (self.op.master_candidate == True and
3901         ((node.offline and not self.op.offline == False) or
3902          (node.drained and not self.op.drained == False))):
3903       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3904                                  " to master_candidate" % node.name,
3905                                  errors.ECODE_INVAL)
3906
3907     # If we're being deofflined/drained, we'll MC ourself if needed
3908     if (self.deoffline_or_drain and not self.offline_or_drain and not
3909         self.op.master_candidate == True and not node.master_candidate):
3910       self.op.master_candidate = _DecideSelfPromotion(self)
3911       if self.op.master_candidate:
3912         self.LogInfo("Autopromoting node to master candidate")
3913
3914     return
3915
3916   def Exec(self, feedback_fn):
3917     """Modifies a node.
3918
3919     """
3920     node = self.node
3921
3922     result = []
3923     changed_mc = False
3924
3925     if self.op.offline is not None:
3926       node.offline = self.op.offline
3927       result.append(("offline", str(self.op.offline)))
3928       if self.op.offline == True:
3929         if node.master_candidate:
3930           node.master_candidate = False
3931           changed_mc = True
3932           result.append(("master_candidate", "auto-demotion due to offline"))
3933         if node.drained:
3934           node.drained = False
3935           result.append(("drained", "clear drained status due to offline"))
3936
3937     if self.op.master_candidate is not None:
3938       node.master_candidate = self.op.master_candidate
3939       changed_mc = True
3940       result.append(("master_candidate", str(self.op.master_candidate)))
3941       if self.op.master_candidate == False:
3942         rrc = self.rpc.call_node_demote_from_mc(node.name)
3943         msg = rrc.fail_msg
3944         if msg:
3945           self.LogWarning("Node failed to demote itself: %s" % msg)
3946
3947     if self.op.drained is not None:
3948       node.drained = self.op.drained
3949       result.append(("drained", str(self.op.drained)))
3950       if self.op.drained == True:
3951         if node.master_candidate:
3952           node.master_candidate = False
3953           changed_mc = True
3954           result.append(("master_candidate", "auto-demotion due to drain"))
3955           rrc = self.rpc.call_node_demote_from_mc(node.name)
3956           msg = rrc.fail_msg
3957           if msg:
3958             self.LogWarning("Node failed to demote itself: %s" % msg)
3959         if node.offline:
3960           node.offline = False
3961           result.append(("offline", "clear offline status due to drain"))
3962
3963     # we locked all nodes, we adjust the CP before updating this node
3964     if self.lock_all:
3965       _AdjustCandidatePool(self, [node.name])
3966
3967     # this will trigger configuration file update, if needed
3968     self.cfg.Update(node, feedback_fn)
3969
3970     # this will trigger job queue propagation or cleanup
3971     if changed_mc:
3972       self.context.ReaddNode(node)
3973
3974     return result
3975
3976
3977 class LUPowercycleNode(NoHooksLU):
3978   """Powercycles a node.
3979
3980   """
3981   _OP_PARAMS = [
3982     _PNodeName,
3983     _PForce,
3984     ]
3985   REQ_BGL = False
3986
3987   def CheckArguments(self):
3988     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3989     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3990       raise errors.OpPrereqError("The node is the master and the force"
3991                                  " parameter was not set",
3992                                  errors.ECODE_INVAL)
3993
3994   def ExpandNames(self):
3995     """Locking for PowercycleNode.
3996
3997     This is a last-resort option and shouldn't block on other
3998     jobs. Therefore, we grab no locks.
3999
4000     """
4001     self.needed_locks = {}
4002
4003   def Exec(self, feedback_fn):
4004     """Reboots a node.
4005
4006     """
4007     result = self.rpc.call_node_powercycle(self.op.node_name,
4008                                            self.cfg.GetHypervisorType())
4009     result.Raise("Failed to schedule the reboot")
4010     return result.payload
4011
4012
4013 class LUQueryClusterInfo(NoHooksLU):
4014   """Query cluster configuration.
4015
4016   """
4017   REQ_BGL = False
4018
4019   def ExpandNames(self):
4020     self.needed_locks = {}
4021
4022   def Exec(self, feedback_fn):
4023     """Return cluster config.
4024
4025     """
4026     cluster = self.cfg.GetClusterInfo()
4027     os_hvp = {}
4028
4029     # Filter just for enabled hypervisors
4030     for os_name, hv_dict in cluster.os_hvp.items():
4031       os_hvp[os_name] = {}
4032       for hv_name, hv_params in hv_dict.items():
4033         if hv_name in cluster.enabled_hypervisors:
4034           os_hvp[os_name][hv_name] = hv_params
4035
4036     result = {
4037       "software_version": constants.RELEASE_VERSION,
4038       "protocol_version": constants.PROTOCOL_VERSION,
4039       "config_version": constants.CONFIG_VERSION,
4040       "os_api_version": max(constants.OS_API_VERSIONS),
4041       "export_version": constants.EXPORT_VERSION,
4042       "architecture": (platform.architecture()[0], platform.machine()),
4043       "name": cluster.cluster_name,
4044       "master": cluster.master_node,
4045       "default_hypervisor": cluster.enabled_hypervisors[0],
4046       "enabled_hypervisors": cluster.enabled_hypervisors,
4047       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4048                         for hypervisor_name in cluster.enabled_hypervisors]),
4049       "os_hvp": os_hvp,
4050       "beparams": cluster.beparams,
4051       "osparams": cluster.osparams,
4052       "nicparams": cluster.nicparams,
4053       "candidate_pool_size": cluster.candidate_pool_size,
4054       "master_netdev": cluster.master_netdev,
4055       "volume_group_name": cluster.volume_group_name,
4056       "file_storage_dir": cluster.file_storage_dir,
4057       "maintain_node_health": cluster.maintain_node_health,
4058       "ctime": cluster.ctime,
4059       "mtime": cluster.mtime,
4060       "uuid": cluster.uuid,
4061       "tags": list(cluster.GetTags()),
4062       "uid_pool": cluster.uid_pool,
4063       }
4064
4065     return result
4066
4067
4068 class LUQueryConfigValues(NoHooksLU):
4069   """Return configuration values.
4070
4071   """
4072   _OP_PARAMS = [_POutputFields]
4073   REQ_BGL = False
4074   _FIELDS_DYNAMIC = utils.FieldSet()
4075   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4076                                   "watcher_pause")
4077
4078   def CheckArguments(self):
4079     _CheckOutputFields(static=self._FIELDS_STATIC,
4080                        dynamic=self._FIELDS_DYNAMIC,
4081                        selected=self.op.output_fields)
4082
4083   def ExpandNames(self):
4084     self.needed_locks = {}
4085
4086   def Exec(self, feedback_fn):
4087     """Dump a representation of the cluster config to the standard output.
4088
4089     """
4090     values = []
4091     for field in self.op.output_fields:
4092       if field == "cluster_name":
4093         entry = self.cfg.GetClusterName()
4094       elif field == "master_node":
4095         entry = self.cfg.GetMasterNode()
4096       elif field == "drain_flag":
4097         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4098       elif field == "watcher_pause":
4099         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4100       else:
4101         raise errors.ParameterError(field)
4102       values.append(entry)
4103     return values
4104
4105
4106 class LUActivateInstanceDisks(NoHooksLU):
4107   """Bring up an instance's disks.
4108
4109   """
4110   _OP_PARAMS = [
4111     _PInstanceName,
4112     ("ignore_size", False, _TBool),
4113     ]
4114   REQ_BGL = False
4115
4116   def ExpandNames(self):
4117     self._ExpandAndLockInstance()
4118     self.needed_locks[locking.LEVEL_NODE] = []
4119     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4120
4121   def DeclareLocks(self, level):
4122     if level == locking.LEVEL_NODE:
4123       self._LockInstancesNodes()
4124
4125   def CheckPrereq(self):
4126     """Check prerequisites.
4127
4128     This checks that the instance is in the cluster.
4129
4130     """
4131     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4132     assert self.instance is not None, \
4133       "Cannot retrieve locked instance %s" % self.op.instance_name
4134     _CheckNodeOnline(self, self.instance.primary_node)
4135
4136   def Exec(self, feedback_fn):
4137     """Activate the disks.
4138
4139     """
4140     disks_ok, disks_info = \
4141               _AssembleInstanceDisks(self, self.instance,
4142                                      ignore_size=self.op.ignore_size)
4143     if not disks_ok:
4144       raise errors.OpExecError("Cannot activate block devices")
4145
4146     return disks_info
4147
4148
4149 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4150                            ignore_size=False):
4151   """Prepare the block devices for an instance.
4152
4153   This sets up the block devices on all nodes.
4154
4155   @type lu: L{LogicalUnit}
4156   @param lu: the logical unit on whose behalf we execute
4157   @type instance: L{objects.Instance}
4158   @param instance: the instance for whose disks we assemble
4159   @type disks: list of L{objects.Disk} or None
4160   @param disks: which disks to assemble (or all, if None)
4161   @type ignore_secondaries: boolean
4162   @param ignore_secondaries: if true, errors on secondary nodes
4163       won't result in an error return from the function
4164   @type ignore_size: boolean
4165   @param ignore_size: if true, the current known size of the disk
4166       will not be used during the disk activation, useful for cases
4167       when the size is wrong
4168   @return: False if the operation failed, otherwise a list of
4169       (host, instance_visible_name, node_visible_name)
4170       with the mapping from node devices to instance devices
4171
4172   """
4173   device_info = []
4174   disks_ok = True
4175   iname = instance.name
4176   disks = _ExpandCheckDisks(instance, disks)
4177
4178   # With the two passes mechanism we try to reduce the window of
4179   # opportunity for the race condition of switching DRBD to primary
4180   # before handshaking occured, but we do not eliminate it
4181
4182   # The proper fix would be to wait (with some limits) until the
4183   # connection has been made and drbd transitions from WFConnection
4184   # into any other network-connected state (Connected, SyncTarget,
4185   # SyncSource, etc.)
4186
4187   # 1st pass, assemble on all nodes in secondary mode
4188   for inst_disk in disks:
4189     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4190       if ignore_size:
4191         node_disk = node_disk.Copy()
4192         node_disk.UnsetSize()
4193       lu.cfg.SetDiskID(node_disk, node)
4194       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4195       msg = result.fail_msg
4196       if msg:
4197         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4198                            " (is_primary=False, pass=1): %s",
4199                            inst_disk.iv_name, node, msg)
4200         if not ignore_secondaries:
4201           disks_ok = False
4202
4203   # FIXME: race condition on drbd migration to primary
4204
4205   # 2nd pass, do only the primary node
4206   for inst_disk in disks:
4207     dev_path = None
4208
4209     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4210       if node != instance.primary_node:
4211         continue
4212       if ignore_size:
4213         node_disk = node_disk.Copy()
4214         node_disk.UnsetSize()
4215       lu.cfg.SetDiskID(node_disk, node)
4216       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4217       msg = result.fail_msg
4218       if msg:
4219         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4220                            " (is_primary=True, pass=2): %s",
4221                            inst_disk.iv_name, node, msg)
4222         disks_ok = False
4223       else:
4224         dev_path = result.payload
4225
4226     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4227
4228   # leave the disks configured for the primary node
4229   # this is a workaround that would be fixed better by
4230   # improving the logical/physical id handling
4231   for disk in disks:
4232     lu.cfg.SetDiskID(disk, instance.primary_node)
4233
4234   return disks_ok, device_info
4235
4236
4237 def _StartInstanceDisks(lu, instance, force):
4238   """Start the disks of an instance.
4239
4240   """
4241   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4242                                            ignore_secondaries=force)
4243   if not disks_ok:
4244     _ShutdownInstanceDisks(lu, instance)
4245     if force is not None and not force:
4246       lu.proc.LogWarning("", hint="If the message above refers to a"
4247                          " secondary node,"
4248                          " you can retry the operation using '--force'.")
4249     raise errors.OpExecError("Disk consistency error")
4250
4251
4252 class LUDeactivateInstanceDisks(NoHooksLU):
4253   """Shutdown an instance's disks.
4254
4255   """
4256   _OP_PARAMS = [
4257     _PInstanceName,
4258     ]
4259   REQ_BGL = False
4260
4261   def ExpandNames(self):
4262     self._ExpandAndLockInstance()
4263     self.needed_locks[locking.LEVEL_NODE] = []
4264     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4265
4266   def DeclareLocks(self, level):
4267     if level == locking.LEVEL_NODE:
4268       self._LockInstancesNodes()
4269
4270   def CheckPrereq(self):
4271     """Check prerequisites.
4272
4273     This checks that the instance is in the cluster.
4274
4275     """
4276     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4277     assert self.instance is not None, \
4278       "Cannot retrieve locked instance %s" % self.op.instance_name
4279
4280   def Exec(self, feedback_fn):
4281     """Deactivate the disks
4282
4283     """
4284     instance = self.instance
4285     _SafeShutdownInstanceDisks(self, instance)
4286
4287
4288 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4289   """Shutdown block devices of an instance.
4290
4291   This function checks if an instance is running, before calling
4292   _ShutdownInstanceDisks.
4293
4294   """
4295   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4296   _ShutdownInstanceDisks(lu, instance, disks=disks)
4297
4298
4299 def _ExpandCheckDisks(instance, disks):
4300   """Return the instance disks selected by the disks list
4301
4302   @type disks: list of L{objects.Disk} or None
4303   @param disks: selected disks
4304   @rtype: list of L{objects.Disk}
4305   @return: selected instance disks to act on
4306
4307   """
4308   if disks is None:
4309     return instance.disks
4310   else:
4311     if not set(disks).issubset(instance.disks):
4312       raise errors.ProgrammerError("Can only act on disks belonging to the"
4313                                    " target instance")
4314     return disks
4315
4316
4317 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4318   """Shutdown block devices of an instance.
4319
4320   This does the shutdown on all nodes of the instance.
4321
4322   If the ignore_primary is false, errors on the primary node are
4323   ignored.
4324
4325   """
4326   all_result = True
4327   disks = _ExpandCheckDisks(instance, disks)
4328
4329   for disk in disks:
4330     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4331       lu.cfg.SetDiskID(top_disk, node)
4332       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4333       msg = result.fail_msg
4334       if msg:
4335         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4336                       disk.iv_name, node, msg)
4337         if not ignore_primary or node != instance.primary_node:
4338           all_result = False
4339   return all_result
4340
4341
4342 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4343   """Checks if a node has enough free memory.
4344
4345   This function check if a given node has the needed amount of free
4346   memory. In case the node has less memory or we cannot get the
4347   information from the node, this function raise an OpPrereqError
4348   exception.
4349
4350   @type lu: C{LogicalUnit}
4351   @param lu: a logical unit from which we get configuration data
4352   @type node: C{str}
4353   @param node: the node to check
4354   @type reason: C{str}
4355   @param reason: string to use in the error message
4356   @type requested: C{int}
4357   @param requested: the amount of memory in MiB to check for
4358   @type hypervisor_name: C{str}
4359   @param hypervisor_name: the hypervisor to ask for memory stats
4360   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4361       we cannot check the node
4362
4363   """
4364   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4365   nodeinfo[node].Raise("Can't get data from node %s" % node,
4366                        prereq=True, ecode=errors.ECODE_ENVIRON)
4367   free_mem = nodeinfo[node].payload.get('memory_free', None)
4368   if not isinstance(free_mem, int):
4369     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4370                                " was '%s'" % (node, free_mem),
4371                                errors.ECODE_ENVIRON)
4372   if requested > free_mem:
4373     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4374                                " needed %s MiB, available %s MiB" %
4375                                (node, reason, requested, free_mem),
4376                                errors.ECODE_NORES)
4377
4378
4379 def _CheckNodesFreeDisk(lu, nodenames, requested):
4380   """Checks if nodes have enough free disk space in the default VG.
4381
4382   This function check if all given nodes have the needed amount of
4383   free disk. In case any node has less disk or we cannot get the
4384   information from the node, this function raise an OpPrereqError
4385   exception.
4386
4387   @type lu: C{LogicalUnit}
4388   @param lu: a logical unit from which we get configuration data
4389   @type nodenames: C{list}
4390   @param nodenames: the list of node names to check
4391   @type requested: C{int}
4392   @param requested: the amount of disk in MiB to check for
4393   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4394       we cannot check the node
4395
4396   """
4397   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4398                                    lu.cfg.GetHypervisorType())
4399   for node in nodenames:
4400     info = nodeinfo[node]
4401     info.Raise("Cannot get current information from node %s" % node,
4402                prereq=True, ecode=errors.ECODE_ENVIRON)
4403     vg_free = info.payload.get("vg_free", None)
4404     if not isinstance(vg_free, int):
4405       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4406                                  " result was '%s'" % (node, vg_free),
4407                                  errors.ECODE_ENVIRON)
4408     if requested > vg_free:
4409       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4410                                  " required %d MiB, available %d MiB" %
4411                                  (node, requested, vg_free),
4412                                  errors.ECODE_NORES)
4413
4414
4415 class LUStartupInstance(LogicalUnit):
4416   """Starts an instance.
4417
4418   """
4419   HPATH = "instance-start"
4420   HTYPE = constants.HTYPE_INSTANCE
4421   _OP_PARAMS = [
4422     _PInstanceName,
4423     _PForce,
4424     ("hvparams", _EmptyDict, _TDict),
4425     ("beparams", _EmptyDict, _TDict),
4426     ]
4427   REQ_BGL = False
4428
4429   def CheckArguments(self):
4430     # extra beparams
4431     if self.op.beparams:
4432       # fill the beparams dict
4433       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4434
4435   def ExpandNames(self):
4436     self._ExpandAndLockInstance()
4437
4438   def BuildHooksEnv(self):
4439     """Build hooks env.
4440
4441     This runs on master, primary and secondary nodes of the instance.
4442
4443     """
4444     env = {
4445       "FORCE": self.op.force,
4446       }
4447     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4448     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4449     return env, nl, nl
4450
4451   def CheckPrereq(self):
4452     """Check prerequisites.
4453
4454     This checks that the instance is in the cluster.
4455
4456     """
4457     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4458     assert self.instance is not None, \
4459       "Cannot retrieve locked instance %s" % self.op.instance_name
4460
4461     # extra hvparams
4462     if self.op.hvparams:
4463       # check hypervisor parameter syntax (locally)
4464       cluster = self.cfg.GetClusterInfo()
4465       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4466       filled_hvp = cluster.FillHV(instance)
4467       filled_hvp.update(self.op.hvparams)
4468       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4469       hv_type.CheckParameterSyntax(filled_hvp)
4470       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4471
4472     _CheckNodeOnline(self, instance.primary_node)
4473
4474     bep = self.cfg.GetClusterInfo().FillBE(instance)
4475     # check bridges existence
4476     _CheckInstanceBridgesExist(self, instance)
4477
4478     remote_info = self.rpc.call_instance_info(instance.primary_node,
4479                                               instance.name,
4480                                               instance.hypervisor)
4481     remote_info.Raise("Error checking node %s" % instance.primary_node,
4482                       prereq=True, ecode=errors.ECODE_ENVIRON)
4483     if not remote_info.payload: # not running already
4484       _CheckNodeFreeMemory(self, instance.primary_node,
4485                            "starting instance %s" % instance.name,
4486                            bep[constants.BE_MEMORY], instance.hypervisor)
4487
4488   def Exec(self, feedback_fn):
4489     """Start the instance.
4490
4491     """
4492     instance = self.instance
4493     force = self.op.force
4494
4495     self.cfg.MarkInstanceUp(instance.name)
4496
4497     node_current = instance.primary_node
4498
4499     _StartInstanceDisks(self, instance, force)
4500
4501     result = self.rpc.call_instance_start(node_current, instance,
4502                                           self.op.hvparams, self.op.beparams)
4503     msg = result.fail_msg
4504     if msg:
4505       _ShutdownInstanceDisks(self, instance)
4506       raise errors.OpExecError("Could not start instance: %s" % msg)
4507
4508
4509 class LURebootInstance(LogicalUnit):
4510   """Reboot an instance.
4511
4512   """
4513   HPATH = "instance-reboot"
4514   HTYPE = constants.HTYPE_INSTANCE
4515   _OP_PARAMS = [
4516     _PInstanceName,
4517     ("ignore_secondaries", False, _TBool),
4518     ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4519     _PShutdownTimeout,
4520     ]
4521   REQ_BGL = False
4522
4523   def ExpandNames(self):
4524     self._ExpandAndLockInstance()
4525
4526   def BuildHooksEnv(self):
4527     """Build hooks env.
4528
4529     This runs on master, primary and secondary nodes of the instance.
4530
4531     """
4532     env = {
4533       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4534       "REBOOT_TYPE": self.op.reboot_type,
4535       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4536       }
4537     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4538     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4539     return env, nl, nl
4540
4541   def CheckPrereq(self):
4542     """Check prerequisites.
4543
4544     This checks that the instance is in the cluster.
4545
4546     """
4547     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4548     assert self.instance is not None, \
4549       "Cannot retrieve locked instance %s" % self.op.instance_name
4550
4551     _CheckNodeOnline(self, instance.primary_node)
4552
4553     # check bridges existence
4554     _CheckInstanceBridgesExist(self, instance)
4555
4556   def Exec(self, feedback_fn):
4557     """Reboot the instance.
4558
4559     """
4560     instance = self.instance
4561     ignore_secondaries = self.op.ignore_secondaries
4562     reboot_type = self.op.reboot_type
4563
4564     node_current = instance.primary_node
4565
4566     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4567                        constants.INSTANCE_REBOOT_HARD]:
4568       for disk in instance.disks:
4569         self.cfg.SetDiskID(disk, node_current)
4570       result = self.rpc.call_instance_reboot(node_current, instance,
4571                                              reboot_type,
4572                                              self.op.shutdown_timeout)
4573       result.Raise("Could not reboot instance")
4574     else:
4575       result = self.rpc.call_instance_shutdown(node_current, instance,
4576                                                self.op.shutdown_timeout)
4577       result.Raise("Could not shutdown instance for full reboot")
4578       _ShutdownInstanceDisks(self, instance)
4579       _StartInstanceDisks(self, instance, ignore_secondaries)
4580       result = self.rpc.call_instance_start(node_current, instance, None, None)
4581       msg = result.fail_msg
4582       if msg:
4583         _ShutdownInstanceDisks(self, instance)
4584         raise errors.OpExecError("Could not start instance for"
4585                                  " full reboot: %s" % msg)
4586
4587     self.cfg.MarkInstanceUp(instance.name)
4588
4589
4590 class LUShutdownInstance(LogicalUnit):
4591   """Shutdown an instance.
4592
4593   """
4594   HPATH = "instance-stop"
4595   HTYPE = constants.HTYPE_INSTANCE
4596   _OP_PARAMS = [
4597     _PInstanceName,
4598     ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4599     ]
4600   REQ_BGL = False
4601
4602   def ExpandNames(self):
4603     self._ExpandAndLockInstance()
4604
4605   def BuildHooksEnv(self):
4606     """Build hooks env.
4607
4608     This runs on master, primary and secondary nodes of the instance.
4609
4610     """
4611     env = _BuildInstanceHookEnvByObject(self, self.instance)
4612     env["TIMEOUT"] = self.op.timeout
4613     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4614     return env, nl, nl
4615
4616   def CheckPrereq(self):
4617     """Check prerequisites.
4618
4619     This checks that the instance is in the cluster.
4620
4621     """
4622     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4623     assert self.instance is not None, \
4624       "Cannot retrieve locked instance %s" % self.op.instance_name
4625     _CheckNodeOnline(self, self.instance.primary_node)
4626
4627   def Exec(self, feedback_fn):
4628     """Shutdown the instance.
4629
4630     """
4631     instance = self.instance
4632     node_current = instance.primary_node
4633     timeout = self.op.timeout
4634     self.cfg.MarkInstanceDown(instance.name)
4635     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4636     msg = result.fail_msg
4637     if msg:
4638       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4639
4640     _ShutdownInstanceDisks(self, instance)
4641
4642
4643 class LUReinstallInstance(LogicalUnit):
4644   """Reinstall an instance.
4645
4646   """
4647   HPATH = "instance-reinstall"
4648   HTYPE = constants.HTYPE_INSTANCE
4649   _OP_PARAMS = [
4650     _PInstanceName,
4651     ("os_type", None, _TMaybeString),
4652     ("force_variant", False, _TBool),
4653     ]
4654   REQ_BGL = False
4655
4656   def ExpandNames(self):
4657     self._ExpandAndLockInstance()
4658
4659   def BuildHooksEnv(self):
4660     """Build hooks env.
4661
4662     This runs on master, primary and secondary nodes of the instance.
4663
4664     """
4665     env = _BuildInstanceHookEnvByObject(self, self.instance)
4666     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4667     return env, nl, nl
4668
4669   def CheckPrereq(self):
4670     """Check prerequisites.
4671
4672     This checks that the instance is in the cluster and is not running.
4673
4674     """
4675     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4676     assert instance is not None, \
4677       "Cannot retrieve locked instance %s" % self.op.instance_name
4678     _CheckNodeOnline(self, instance.primary_node)
4679
4680     if instance.disk_template == constants.DT_DISKLESS:
4681       raise errors.OpPrereqError("Instance '%s' has no disks" %
4682                                  self.op.instance_name,
4683                                  errors.ECODE_INVAL)
4684     _CheckInstanceDown(self, instance, "cannot reinstall")
4685
4686     if self.op.os_type is not None:
4687       # OS verification
4688       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4689       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4690
4691     self.instance = instance
4692
4693   def Exec(self, feedback_fn):
4694     """Reinstall the instance.
4695
4696     """
4697     inst = self.instance
4698
4699     if self.op.os_type is not None:
4700       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4701       inst.os = self.op.os_type
4702       self.cfg.Update(inst, feedback_fn)
4703
4704     _StartInstanceDisks(self, inst, None)
4705     try:
4706       feedback_fn("Running the instance OS create scripts...")
4707       # FIXME: pass debug option from opcode to backend
4708       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4709                                              self.op.debug_level)
4710       result.Raise("Could not install OS for instance %s on node %s" %
4711                    (inst.name, inst.primary_node))
4712     finally:
4713       _ShutdownInstanceDisks(self, inst)
4714
4715
4716 class LURecreateInstanceDisks(LogicalUnit):
4717   """Recreate an instance's missing disks.
4718
4719   """
4720   HPATH = "instance-recreate-disks"
4721   HTYPE = constants.HTYPE_INSTANCE
4722   _OP_PARAMS = [
4723     _PInstanceName,
4724     ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4725     ]
4726   REQ_BGL = False
4727
4728   def ExpandNames(self):
4729     self._ExpandAndLockInstance()
4730
4731   def BuildHooksEnv(self):
4732     """Build hooks env.
4733
4734     This runs on master, primary and secondary nodes of the instance.
4735
4736     """
4737     env = _BuildInstanceHookEnvByObject(self, self.instance)
4738     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4739     return env, nl, nl
4740
4741   def CheckPrereq(self):
4742     """Check prerequisites.
4743
4744     This checks that the instance is in the cluster and is not running.
4745
4746     """
4747     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4748     assert instance is not None, \
4749       "Cannot retrieve locked instance %s" % self.op.instance_name
4750     _CheckNodeOnline(self, instance.primary_node)
4751
4752     if instance.disk_template == constants.DT_DISKLESS:
4753       raise errors.OpPrereqError("Instance '%s' has no disks" %
4754                                  self.op.instance_name, errors.ECODE_INVAL)
4755     _CheckInstanceDown(self, instance, "cannot recreate disks")
4756
4757     if not self.op.disks:
4758       self.op.disks = range(len(instance.disks))
4759     else:
4760       for idx in self.op.disks:
4761         if idx >= len(instance.disks):
4762           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4763                                      errors.ECODE_INVAL)
4764
4765     self.instance = instance
4766
4767   def Exec(self, feedback_fn):
4768     """Recreate the disks.
4769
4770     """
4771     to_skip = []
4772     for idx, _ in enumerate(self.instance.disks):
4773       if idx not in self.op.disks: # disk idx has not been passed in
4774         to_skip.append(idx)
4775         continue
4776
4777     _CreateDisks(self, self.instance, to_skip=to_skip)
4778
4779
4780 class LURenameInstance(LogicalUnit):
4781   """Rename an instance.
4782
4783   """
4784   HPATH = "instance-rename"
4785   HTYPE = constants.HTYPE_INSTANCE
4786   _OP_PARAMS = [
4787     _PInstanceName,
4788     ("new_name", _NoDefault, _TNonEmptyString),
4789     ("ignore_ip", False, _TBool),
4790     ("check_name", True, _TBool),
4791     ]
4792
4793   def BuildHooksEnv(self):
4794     """Build hooks env.
4795
4796     This runs on master, primary and secondary nodes of the instance.
4797
4798     """
4799     env = _BuildInstanceHookEnvByObject(self, self.instance)
4800     env["INSTANCE_NEW_NAME"] = self.op.new_name
4801     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4802     return env, nl, nl
4803
4804   def CheckPrereq(self):
4805     """Check prerequisites.
4806
4807     This checks that the instance is in the cluster and is not running.
4808
4809     """
4810     self.op.instance_name = _ExpandInstanceName(self.cfg,
4811                                                 self.op.instance_name)
4812     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4813     assert instance is not None
4814     _CheckNodeOnline(self, instance.primary_node)
4815     _CheckInstanceDown(self, instance, "cannot rename")
4816     self.instance = instance
4817
4818     # new name verification
4819     if self.op.check_name:
4820       name_info = utils.GetHostInfo(self.op.new_name)
4821       self.op.new_name = name_info.name
4822
4823     new_name = self.op.new_name
4824
4825     instance_list = self.cfg.GetInstanceList()
4826     if new_name in instance_list:
4827       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4828                                  new_name, errors.ECODE_EXISTS)
4829
4830     if not self.op.ignore_ip:
4831       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4832         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4833                                    (name_info.ip, new_name),
4834                                    errors.ECODE_NOTUNIQUE)
4835
4836   def Exec(self, feedback_fn):
4837     """Reinstall the instance.
4838
4839     """
4840     inst = self.instance
4841     old_name = inst.name
4842
4843     if inst.disk_template == constants.DT_FILE:
4844       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4845
4846     self.cfg.RenameInstance(inst.name, self.op.new_name)
4847     # Change the instance lock. This is definitely safe while we hold the BGL
4848     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4849     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4850
4851     # re-read the instance from the configuration after rename
4852     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4853
4854     if inst.disk_template == constants.DT_FILE:
4855       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4856       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4857                                                      old_file_storage_dir,
4858                                                      new_file_storage_dir)
4859       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4860                    " (but the instance has been renamed in Ganeti)" %
4861                    (inst.primary_node, old_file_storage_dir,
4862                     new_file_storage_dir))
4863
4864     _StartInstanceDisks(self, inst, None)
4865     try:
4866       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4867                                                  old_name, self.op.debug_level)
4868       msg = result.fail_msg
4869       if msg:
4870         msg = ("Could not run OS rename script for instance %s on node %s"
4871                " (but the instance has been renamed in Ganeti): %s" %
4872                (inst.name, inst.primary_node, msg))
4873         self.proc.LogWarning(msg)
4874     finally:
4875       _ShutdownInstanceDisks(self, inst)
4876
4877
4878 class LURemoveInstance(LogicalUnit):
4879   """Remove an instance.
4880
4881   """
4882   HPATH = "instance-remove"
4883   HTYPE = constants.HTYPE_INSTANCE
4884   _OP_PARAMS = [
4885     _PInstanceName,
4886     ("ignore_failures", False, _TBool),
4887     _PShutdownTimeout,
4888     ]
4889   REQ_BGL = False
4890
4891   def ExpandNames(self):
4892     self._ExpandAndLockInstance()
4893     self.needed_locks[locking.LEVEL_NODE] = []
4894     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4895
4896   def DeclareLocks(self, level):
4897     if level == locking.LEVEL_NODE:
4898       self._LockInstancesNodes()
4899
4900   def BuildHooksEnv(self):
4901     """Build hooks env.
4902
4903     This runs on master, primary and secondary nodes of the instance.
4904
4905     """
4906     env = _BuildInstanceHookEnvByObject(self, self.instance)
4907     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4908     nl = [self.cfg.GetMasterNode()]
4909     nl_post = list(self.instance.all_nodes) + nl
4910     return env, nl, nl_post
4911
4912   def CheckPrereq(self):
4913     """Check prerequisites.
4914
4915     This checks that the instance is in the cluster.
4916
4917     """
4918     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4919     assert self.instance is not None, \
4920       "Cannot retrieve locked instance %s" % self.op.instance_name
4921
4922   def Exec(self, feedback_fn):
4923     """Remove the instance.
4924
4925     """
4926     instance = self.instance
4927     logging.info("Shutting down instance %s on node %s",
4928                  instance.name, instance.primary_node)
4929
4930     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4931                                              self.op.shutdown_timeout)
4932     msg = result.fail_msg
4933     if msg:
4934       if self.op.ignore_failures:
4935         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4936       else:
4937         raise errors.OpExecError("Could not shutdown instance %s on"
4938                                  " node %s: %s" %
4939                                  (instance.name, instance.primary_node, msg))
4940
4941     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4942
4943
4944 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4945   """Utility function to remove an instance.
4946
4947   """
4948   logging.info("Removing block devices for instance %s", instance.name)
4949
4950   if not _RemoveDisks(lu, instance):
4951     if not ignore_failures:
4952       raise errors.OpExecError("Can't remove instance's disks")
4953     feedback_fn("Warning: can't remove instance's disks")
4954
4955   logging.info("Removing instance %s out of cluster config", instance.name)
4956
4957   lu.cfg.RemoveInstance(instance.name)
4958
4959   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4960     "Instance lock removal conflict"
4961
4962   # Remove lock for the instance
4963   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4964
4965
4966 class LUQueryInstances(NoHooksLU):
4967   """Logical unit for querying instances.
4968
4969   """
4970   # pylint: disable-msg=W0142
4971   _OP_PARAMS = [
4972     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
4973     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
4974     ("use_locking", False, _TBool),
4975     ]
4976   REQ_BGL = False
4977   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4978                     "serial_no", "ctime", "mtime", "uuid"]
4979   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4980                                     "admin_state",
4981                                     "disk_template", "ip", "mac", "bridge",
4982                                     "nic_mode", "nic_link",
4983                                     "sda_size", "sdb_size", "vcpus", "tags",
4984                                     "network_port", "beparams",
4985                                     r"(disk)\.(size)/([0-9]+)",
4986                                     r"(disk)\.(sizes)", "disk_usage",
4987                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4988                                     r"(nic)\.(bridge)/([0-9]+)",
4989                                     r"(nic)\.(macs|ips|modes|links|bridges)",
4990                                     r"(disk|nic)\.(count)",
4991                                     "hvparams",
4992                                     ] + _SIMPLE_FIELDS +
4993                                   ["hv/%s" % name
4994                                    for name in constants.HVS_PARAMETERS
4995                                    if name not in constants.HVC_GLOBALS] +
4996                                   ["be/%s" % name
4997                                    for name in constants.BES_PARAMETERS])
4998   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4999
5000
5001   def CheckArguments(self):
5002     _CheckOutputFields(static=self._FIELDS_STATIC,
5003                        dynamic=self._FIELDS_DYNAMIC,
5004                        selected=self.op.output_fields)
5005
5006   def ExpandNames(self):
5007     self.needed_locks = {}
5008     self.share_locks[locking.LEVEL_INSTANCE] = 1
5009     self.share_locks[locking.LEVEL_NODE] = 1
5010
5011     if self.op.names:
5012       self.wanted = _GetWantedInstances(self, self.op.names)
5013     else:
5014       self.wanted = locking.ALL_SET
5015
5016     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5017     self.do_locking = self.do_node_query and self.op.use_locking
5018     if self.do_locking:
5019       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5020       self.needed_locks[locking.LEVEL_NODE] = []
5021       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5022
5023   def DeclareLocks(self, level):
5024     if level == locking.LEVEL_NODE and self.do_locking:
5025       self._LockInstancesNodes()
5026
5027   def Exec(self, feedback_fn):
5028     """Computes the list of nodes and their attributes.
5029
5030     """
5031     # pylint: disable-msg=R0912
5032     # way too many branches here
5033     all_info = self.cfg.GetAllInstancesInfo()
5034     if self.wanted == locking.ALL_SET:
5035       # caller didn't specify instance names, so ordering is not important
5036       if self.do_locking:
5037         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5038       else:
5039         instance_names = all_info.keys()
5040       instance_names = utils.NiceSort(instance_names)
5041     else:
5042       # caller did specify names, so we must keep the ordering
5043       if self.do_locking:
5044         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5045       else:
5046         tgt_set = all_info.keys()
5047       missing = set(self.wanted).difference(tgt_set)
5048       if missing:
5049         raise errors.OpExecError("Some instances were removed before"
5050                                  " retrieving their data: %s" % missing)
5051       instance_names = self.wanted
5052
5053     instance_list = [all_info[iname] for iname in instance_names]
5054
5055     # begin data gathering
5056
5057     nodes = frozenset([inst.primary_node for inst in instance_list])
5058     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5059
5060     bad_nodes = []
5061     off_nodes = []
5062     if self.do_node_query:
5063       live_data = {}
5064       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5065       for name in nodes:
5066         result = node_data[name]
5067         if result.offline:
5068           # offline nodes will be in both lists
5069           off_nodes.append(name)
5070         if result.fail_msg:
5071           bad_nodes.append(name)
5072         else:
5073           if result.payload:
5074             live_data.update(result.payload)
5075           # else no instance is alive
5076     else:
5077       live_data = dict([(name, {}) for name in instance_names])
5078
5079     # end data gathering
5080
5081     HVPREFIX = "hv/"
5082     BEPREFIX = "be/"
5083     output = []
5084     cluster = self.cfg.GetClusterInfo()
5085     for instance in instance_list:
5086       iout = []
5087       i_hv = cluster.FillHV(instance, skip_globals=True)
5088       i_be = cluster.FillBE(instance)
5089       i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5090       for field in self.op.output_fields:
5091         st_match = self._FIELDS_STATIC.Matches(field)
5092         if field in self._SIMPLE_FIELDS:
5093           val = getattr(instance, field)
5094         elif field == "pnode":
5095           val = instance.primary_node
5096         elif field == "snodes":
5097           val = list(instance.secondary_nodes)
5098         elif field == "admin_state":
5099           val = instance.admin_up
5100         elif field == "oper_state":
5101           if instance.primary_node in bad_nodes:
5102             val = None
5103           else:
5104             val = bool(live_data.get(instance.name))
5105         elif field == "status":
5106           if instance.primary_node in off_nodes:
5107             val = "ERROR_nodeoffline"
5108           elif instance.primary_node in bad_nodes:
5109             val = "ERROR_nodedown"
5110           else:
5111             running = bool(live_data.get(instance.name))
5112             if running:
5113               if instance.admin_up:
5114                 val = "running"
5115               else:
5116                 val = "ERROR_up"
5117             else:
5118               if instance.admin_up:
5119                 val = "ERROR_down"
5120               else:
5121                 val = "ADMIN_down"
5122         elif field == "oper_ram":
5123           if instance.primary_node in bad_nodes:
5124             val = None
5125           elif instance.name in live_data:
5126             val = live_data[instance.name].get("memory", "?")
5127           else:
5128             val = "-"
5129         elif field == "vcpus":
5130           val = i_be[constants.BE_VCPUS]
5131         elif field == "disk_template":
5132           val = instance.disk_template
5133         elif field == "ip":
5134           if instance.nics:
5135             val = instance.nics[0].ip
5136           else:
5137             val = None
5138         elif field == "nic_mode":
5139           if instance.nics:
5140             val = i_nicp[0][constants.NIC_MODE]
5141           else:
5142             val = None
5143         elif field == "nic_link":
5144           if instance.nics:
5145             val = i_nicp[0][constants.NIC_LINK]
5146           else:
5147             val = None
5148         elif field == "bridge":
5149           if (instance.nics and
5150               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5151             val = i_nicp[0][constants.NIC_LINK]
5152           else:
5153             val = None
5154         elif field == "mac":
5155           if instance.nics:
5156             val = instance.nics[0].mac
5157           else:
5158             val = None
5159         elif field == "sda_size" or field == "sdb_size":
5160           idx = ord(field[2]) - ord('a')
5161           try:
5162             val = instance.FindDisk(idx).size
5163           except errors.OpPrereqError:
5164             val = None
5165         elif field == "disk_usage": # total disk usage per node
5166           disk_sizes = [{'size': disk.size} for disk in instance.disks]
5167           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5168         elif field == "tags":
5169           val = list(instance.GetTags())
5170         elif field == "hvparams":
5171           val = i_hv
5172         elif (field.startswith(HVPREFIX) and
5173               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5174               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5175           val = i_hv.get(field[len(HVPREFIX):], None)
5176         elif field == "beparams":
5177           val = i_be
5178         elif (field.startswith(BEPREFIX) and
5179               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5180           val = i_be.get(field[len(BEPREFIX):], None)
5181         elif st_match and st_match.groups():
5182           # matches a variable list
5183           st_groups = st_match.groups()
5184           if st_groups and st_groups[0] == "disk":
5185             if st_groups[1] == "count":
5186               val = len(instance.disks)
5187             elif st_groups[1] == "sizes":
5188               val = [disk.size for disk in instance.disks]
5189             elif st_groups[1] == "size":
5190               try:
5191                 val = instance.FindDisk(st_groups[2]).size
5192               except errors.OpPrereqError:
5193                 val = None
5194             else:
5195               assert False, "Unhandled disk parameter"
5196           elif st_groups[0] == "nic":
5197             if st_groups[1] == "count":
5198               val = len(instance.nics)
5199             elif st_groups[1] == "macs":
5200               val = [nic.mac for nic in instance.nics]
5201             elif st_groups[1] == "ips":
5202               val = [nic.ip for nic in instance.nics]
5203             elif st_groups[1] == "modes":
5204               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5205             elif st_groups[1] == "links":
5206               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5207             elif st_groups[1] == "bridges":
5208               val = []
5209               for nicp in i_nicp:
5210                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5211                   val.append(nicp[constants.NIC_LINK])
5212                 else:
5213                   val.append(None)
5214             else:
5215               # index-based item
5216               nic_idx = int(st_groups[2])
5217               if nic_idx >= len(instance.nics):
5218                 val = None
5219               else:
5220                 if st_groups[1] == "mac":
5221                   val = instance.nics[nic_idx].mac
5222                 elif st_groups[1] == "ip":
5223                   val = instance.nics[nic_idx].ip
5224                 elif st_groups[1] == "mode":
5225                   val = i_nicp[nic_idx][constants.NIC_MODE]
5226                 elif st_groups[1] == "link":
5227                   val = i_nicp[nic_idx][constants.NIC_LINK]
5228                 elif st_groups[1] == "bridge":
5229                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5230                   if nic_mode == constants.NIC_MODE_BRIDGED:
5231                     val = i_nicp[nic_idx][constants.NIC_LINK]
5232                   else:
5233                     val = None
5234                 else:
5235                   assert False, "Unhandled NIC parameter"
5236           else:
5237             assert False, ("Declared but unhandled variable parameter '%s'" %
5238                            field)
5239         else:
5240           assert False, "Declared but unhandled parameter '%s'" % field
5241         iout.append(val)
5242       output.append(iout)
5243
5244     return output
5245
5246
5247 class LUFailoverInstance(LogicalUnit):
5248   """Failover an instance.
5249
5250   """
5251   HPATH = "instance-failover"
5252   HTYPE = constants.HTYPE_INSTANCE
5253   _OP_PARAMS = [
5254     _PInstanceName,
5255     ("ignore_consistency", False, _TBool),
5256     _PShutdownTimeout,
5257     ]
5258   REQ_BGL = False
5259
5260   def ExpandNames(self):
5261     self._ExpandAndLockInstance()
5262     self.needed_locks[locking.LEVEL_NODE] = []
5263     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5264
5265   def DeclareLocks(self, level):
5266     if level == locking.LEVEL_NODE:
5267       self._LockInstancesNodes()
5268
5269   def BuildHooksEnv(self):
5270     """Build hooks env.
5271
5272     This runs on master, primary and secondary nodes of the instance.
5273
5274     """
5275     instance = self.instance
5276     source_node = instance.primary_node
5277     target_node = instance.secondary_nodes[0]
5278     env = {
5279       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5280       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5281       "OLD_PRIMARY": source_node,
5282       "OLD_SECONDARY": target_node,
5283       "NEW_PRIMARY": target_node,
5284       "NEW_SECONDARY": source_node,
5285       }
5286     env.update(_BuildInstanceHookEnvByObject(self, instance))
5287     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5288     nl_post = list(nl)
5289     nl_post.append(source_node)
5290     return env, nl, nl_post
5291
5292   def CheckPrereq(self):
5293     """Check prerequisites.
5294
5295     This checks that the instance is in the cluster.
5296
5297     """
5298     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5299     assert self.instance is not None, \
5300       "Cannot retrieve locked instance %s" % self.op.instance_name
5301
5302     bep = self.cfg.GetClusterInfo().FillBE(instance)
5303     if instance.disk_template not in constants.DTS_NET_MIRROR:
5304       raise errors.OpPrereqError("Instance's disk layout is not"
5305                                  " network mirrored, cannot failover.",
5306                                  errors.ECODE_STATE)
5307
5308     secondary_nodes = instance.secondary_nodes
5309     if not secondary_nodes:
5310       raise errors.ProgrammerError("no secondary node but using "
5311                                    "a mirrored disk template")
5312
5313     target_node = secondary_nodes[0]
5314     _CheckNodeOnline(self, target_node)
5315     _CheckNodeNotDrained(self, target_node)
5316     if instance.admin_up:
5317       # check memory requirements on the secondary node
5318       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5319                            instance.name, bep[constants.BE_MEMORY],
5320                            instance.hypervisor)
5321     else:
5322       self.LogInfo("Not checking memory on the secondary node as"
5323                    " instance will not be started")
5324
5325     # check bridge existance
5326     _CheckInstanceBridgesExist(self, instance, node=target_node)
5327
5328   def Exec(self, feedback_fn):
5329     """Failover an instance.
5330
5331     The failover is done by shutting it down on its present node and
5332     starting it on the secondary.
5333
5334     """
5335     instance = self.instance
5336
5337     source_node = instance.primary_node
5338     target_node = instance.secondary_nodes[0]
5339
5340     if instance.admin_up:
5341       feedback_fn("* checking disk consistency between source and target")
5342       for dev in instance.disks:
5343         # for drbd, these are drbd over lvm
5344         if not _CheckDiskConsistency(self, dev, target_node, False):
5345           if not self.op.ignore_consistency:
5346             raise errors.OpExecError("Disk %s is degraded on target node,"
5347                                      " aborting failover." % dev.iv_name)
5348     else:
5349       feedback_fn("* not checking disk consistency as instance is not running")
5350
5351     feedback_fn("* shutting down instance on source node")
5352     logging.info("Shutting down instance %s on node %s",
5353                  instance.name, source_node)
5354
5355     result = self.rpc.call_instance_shutdown(source_node, instance,
5356                                              self.op.shutdown_timeout)
5357     msg = result.fail_msg
5358     if msg:
5359       if self.op.ignore_consistency:
5360         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5361                              " Proceeding anyway. Please make sure node"
5362                              " %s is down. Error details: %s",
5363                              instance.name, source_node, source_node, msg)
5364       else:
5365         raise errors.OpExecError("Could not shutdown instance %s on"
5366                                  " node %s: %s" %
5367                                  (instance.name, source_node, msg))
5368
5369     feedback_fn("* deactivating the instance's disks on source node")
5370     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5371       raise errors.OpExecError("Can't shut down the instance's disks.")
5372
5373     instance.primary_node = target_node
5374     # distribute new instance config to the other nodes
5375     self.cfg.Update(instance, feedback_fn)
5376
5377     # Only start the instance if it's marked as up
5378     if instance.admin_up:
5379       feedback_fn("* activating the instance's disks on target node")
5380       logging.info("Starting instance %s on node %s",
5381                    instance.name, target_node)
5382
5383       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5384                                            ignore_secondaries=True)
5385       if not disks_ok:
5386         _ShutdownInstanceDisks(self, instance)
5387         raise errors.OpExecError("Can't activate the instance's disks")
5388
5389       feedback_fn("* starting the instance on the target node")
5390       result = self.rpc.call_instance_start(target_node, instance, None, None)
5391       msg = result.fail_msg
5392       if msg:
5393         _ShutdownInstanceDisks(self, instance)
5394         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5395                                  (instance.name, target_node, msg))
5396
5397
5398 class LUMigrateInstance(LogicalUnit):
5399   """Migrate an instance.
5400
5401   This is migration without shutting down, compared to the failover,
5402   which is done with shutdown.
5403
5404   """
5405   HPATH = "instance-migrate"
5406   HTYPE = constants.HTYPE_INSTANCE
5407   _OP_PARAMS = [
5408     _PInstanceName,
5409     ("live", True, _TBool),
5410     ("cleanup", False, _TBool),
5411     ]
5412
5413   REQ_BGL = False
5414
5415   def ExpandNames(self):
5416     self._ExpandAndLockInstance()
5417
5418     self.needed_locks[locking.LEVEL_NODE] = []
5419     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5420
5421     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5422                                        self.op.live, self.op.cleanup)
5423     self.tasklets = [self._migrater]
5424
5425   def DeclareLocks(self, level):
5426     if level == locking.LEVEL_NODE:
5427       self._LockInstancesNodes()
5428
5429   def BuildHooksEnv(self):
5430     """Build hooks env.
5431
5432     This runs on master, primary and secondary nodes of the instance.
5433
5434     """
5435     instance = self._migrater.instance
5436     source_node = instance.primary_node
5437     target_node = instance.secondary_nodes[0]
5438     env = _BuildInstanceHookEnvByObject(self, instance)
5439     env["MIGRATE_LIVE"] = self.op.live
5440     env["MIGRATE_CLEANUP"] = self.op.cleanup
5441     env.update({
5442         "OLD_PRIMARY": source_node,
5443         "OLD_SECONDARY": target_node,
5444         "NEW_PRIMARY": target_node,
5445         "NEW_SECONDARY": source_node,
5446         })
5447     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5448     nl_post = list(nl)
5449     nl_post.append(source_node)
5450     return env, nl, nl_post
5451
5452
5453 class LUMoveInstance(LogicalUnit):
5454   """Move an instance by data-copying.
5455
5456   """
5457   HPATH = "instance-move"
5458   HTYPE = constants.HTYPE_INSTANCE
5459   _OP_PARAMS = [
5460     _PInstanceName,
5461     ("target_node", _NoDefault, _TNonEmptyString),
5462     _PShutdownTimeout,
5463     ]
5464   REQ_BGL = False
5465
5466   def ExpandNames(self):
5467     self._ExpandAndLockInstance()
5468     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5469     self.op.target_node = target_node
5470     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5471     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5472
5473   def DeclareLocks(self, level):
5474     if level == locking.LEVEL_NODE:
5475       self._LockInstancesNodes(primary_only=True)
5476
5477   def BuildHooksEnv(self):
5478     """Build hooks env.
5479
5480     This runs on master, primary and secondary nodes of the instance.
5481
5482     """
5483     env = {
5484       "TARGET_NODE": self.op.target_node,
5485       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5486       }
5487     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5488     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5489                                        self.op.target_node]
5490     return env, nl, nl
5491
5492   def CheckPrereq(self):
5493     """Check prerequisites.
5494
5495     This checks that the instance is in the cluster.
5496
5497     """
5498     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5499     assert self.instance is not None, \
5500       "Cannot retrieve locked instance %s" % self.op.instance_name
5501
5502     node = self.cfg.GetNodeInfo(self.op.target_node)
5503     assert node is not None, \
5504       "Cannot retrieve locked node %s" % self.op.target_node
5505
5506     self.target_node = target_node = node.name
5507
5508     if target_node == instance.primary_node:
5509       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5510                                  (instance.name, target_node),
5511                                  errors.ECODE_STATE)
5512
5513     bep = self.cfg.GetClusterInfo().FillBE(instance)
5514
5515     for idx, dsk in enumerate(instance.disks):
5516       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5517         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5518                                    " cannot copy" % idx, errors.ECODE_STATE)
5519
5520     _CheckNodeOnline(self, target_node)
5521     _CheckNodeNotDrained(self, target_node)
5522
5523     if instance.admin_up:
5524       # check memory requirements on the secondary node
5525       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5526                            instance.name, bep[constants.BE_MEMORY],
5527                            instance.hypervisor)
5528     else:
5529       self.LogInfo("Not checking memory on the secondary node as"
5530                    " instance will not be started")
5531
5532     # check bridge existance
5533     _CheckInstanceBridgesExist(self, instance, node=target_node)
5534
5535   def Exec(self, feedback_fn):
5536     """Move an instance.
5537
5538     The move is done by shutting it down on its present node, copying
5539     the data over (slow) and starting it on the new node.
5540
5541     """
5542     instance = self.instance
5543
5544     source_node = instance.primary_node
5545     target_node = self.target_node
5546
5547     self.LogInfo("Shutting down instance %s on source node %s",
5548                  instance.name, source_node)
5549
5550     result = self.rpc.call_instance_shutdown(source_node, instance,
5551                                              self.op.shutdown_timeout)
5552     msg = result.fail_msg
5553     if msg:
5554       if self.op.ignore_consistency:
5555         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5556                              " Proceeding anyway. Please make sure node"
5557                              " %s is down. Error details: %s",
5558                              instance.name, source_node, source_node, msg)
5559       else:
5560         raise errors.OpExecError("Could not shutdown instance %s on"
5561                                  " node %s: %s" %
5562                                  (instance.name, source_node, msg))
5563
5564     # create the target disks
5565     try:
5566       _CreateDisks(self, instance, target_node=target_node)
5567     except errors.OpExecError:
5568       self.LogWarning("Device creation failed, reverting...")
5569       try:
5570         _RemoveDisks(self, instance, target_node=target_node)
5571       finally:
5572         self.cfg.ReleaseDRBDMinors(instance.name)
5573         raise
5574
5575     cluster_name = self.cfg.GetClusterInfo().cluster_name
5576
5577     errs = []
5578     # activate, get path, copy the data over
5579     for idx, disk in enumerate(instance.disks):
5580       self.LogInfo("Copying data for disk %d", idx)
5581       result = self.rpc.call_blockdev_assemble(target_node, disk,
5582                                                instance.name, True)
5583       if result.fail_msg:
5584         self.LogWarning("Can't assemble newly created disk %d: %s",
5585                         idx, result.fail_msg)
5586         errs.append(result.fail_msg)
5587         break
5588       dev_path = result.payload
5589       result = self.rpc.call_blockdev_export(source_node, disk,
5590                                              target_node, dev_path,
5591                                              cluster_name)
5592       if result.fail_msg:
5593         self.LogWarning("Can't copy data over for disk %d: %s",
5594                         idx, result.fail_msg)
5595         errs.append(result.fail_msg)
5596         break
5597
5598     if errs:
5599       self.LogWarning("Some disks failed to copy, aborting")
5600       try:
5601         _RemoveDisks(self, instance, target_node=target_node)
5602       finally:
5603         self.cfg.ReleaseDRBDMinors(instance.name)
5604         raise errors.OpExecError("Errors during disk copy: %s" %
5605                                  (",".join(errs),))
5606
5607     instance.primary_node = target_node
5608     self.cfg.Update(instance, feedback_fn)
5609
5610     self.LogInfo("Removing the disks on the original node")
5611     _RemoveDisks(self, instance, target_node=source_node)
5612
5613     # Only start the instance if it's marked as up
5614     if instance.admin_up:
5615       self.LogInfo("Starting instance %s on node %s",
5616                    instance.name, target_node)
5617
5618       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5619                                            ignore_secondaries=True)
5620       if not disks_ok:
5621         _ShutdownInstanceDisks(self, instance)
5622         raise errors.OpExecError("Can't activate the instance's disks")
5623
5624       result = self.rpc.call_instance_start(target_node, instance, None, None)
5625       msg = result.fail_msg
5626       if msg:
5627         _ShutdownInstanceDisks(self, instance)
5628         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5629                                  (instance.name, target_node, msg))
5630
5631
5632 class LUMigrateNode(LogicalUnit):
5633   """Migrate all instances from a node.
5634
5635   """
5636   HPATH = "node-migrate"
5637   HTYPE = constants.HTYPE_NODE
5638   _OP_PARAMS = [
5639     _PNodeName,
5640     ("live", False, _TBool),
5641     ]
5642   REQ_BGL = False
5643
5644   def ExpandNames(self):
5645     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5646
5647     self.needed_locks = {
5648       locking.LEVEL_NODE: [self.op.node_name],
5649       }
5650
5651     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5652
5653     # Create tasklets for migrating instances for all instances on this node
5654     names = []
5655     tasklets = []
5656
5657     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5658       logging.debug("Migrating instance %s", inst.name)
5659       names.append(inst.name)
5660
5661       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5662
5663     self.tasklets = tasklets
5664
5665     # Declare instance locks
5666     self.needed_locks[locking.LEVEL_INSTANCE] = names
5667
5668   def DeclareLocks(self, level):
5669     if level == locking.LEVEL_NODE:
5670       self._LockInstancesNodes()
5671
5672   def BuildHooksEnv(self):
5673     """Build hooks env.
5674
5675     This runs on the master, the primary and all the secondaries.
5676
5677     """
5678     env = {
5679       "NODE_NAME": self.op.node_name,
5680       }
5681
5682     nl = [self.cfg.GetMasterNode()]
5683
5684     return (env, nl, nl)
5685
5686
5687 class TLMigrateInstance(Tasklet):
5688   def __init__(self, lu, instance_name, live, cleanup):
5689     """Initializes this class.
5690
5691     """
5692     Tasklet.__init__(self, lu)
5693
5694     # Parameters
5695     self.instance_name = instance_name
5696     self.live = live
5697     self.cleanup = cleanup
5698
5699   def CheckPrereq(self):
5700     """Check prerequisites.
5701
5702     This checks that the instance is in the cluster.
5703
5704     """
5705     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5706     instance = self.cfg.GetInstanceInfo(instance_name)
5707     assert instance is not None
5708
5709     if instance.disk_template != constants.DT_DRBD8:
5710       raise errors.OpPrereqError("Instance's disk layout is not"
5711                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5712
5713     secondary_nodes = instance.secondary_nodes
5714     if not secondary_nodes:
5715       raise errors.ConfigurationError("No secondary node but using"
5716                                       " drbd8 disk template")
5717
5718     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5719
5720     target_node = secondary_nodes[0]
5721     # check memory requirements on the secondary node
5722     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5723                          instance.name, i_be[constants.BE_MEMORY],
5724                          instance.hypervisor)
5725
5726     # check bridge existance
5727     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5728
5729     if not self.cleanup:
5730       _CheckNodeNotDrained(self.lu, target_node)
5731       result = self.rpc.call_instance_migratable(instance.primary_node,
5732                                                  instance)
5733       result.Raise("Can't migrate, please use failover",
5734                    prereq=True, ecode=errors.ECODE_STATE)
5735
5736     self.instance = instance
5737
5738   def _WaitUntilSync(self):
5739     """Poll with custom rpc for disk sync.
5740
5741     This uses our own step-based rpc call.
5742
5743     """
5744     self.feedback_fn("* wait until resync is done")
5745     all_done = False
5746     while not all_done:
5747       all_done = True
5748       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5749                                             self.nodes_ip,
5750                                             self.instance.disks)
5751       min_percent = 100
5752       for node, nres in result.items():
5753         nres.Raise("Cannot resync disks on node %s" % node)
5754         node_done, node_percent = nres.payload
5755         all_done = all_done and node_done
5756         if node_percent is not None:
5757           min_percent = min(min_percent, node_percent)
5758       if not all_done:
5759         if min_percent < 100:
5760           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5761         time.sleep(2)
5762
5763   def _EnsureSecondary(self, node):
5764     """Demote a node to secondary.
5765
5766     """
5767     self.feedback_fn("* switching node %s to secondary mode" % node)
5768
5769     for dev in self.instance.disks:
5770       self.cfg.SetDiskID(dev, node)
5771
5772     result = self.rpc.call_blockdev_close(node, self.instance.name,
5773                                           self.instance.disks)
5774     result.Raise("Cannot change disk to secondary on node %s" % node)
5775
5776   def _GoStandalone(self):
5777     """Disconnect from the network.
5778
5779     """
5780     self.feedback_fn("* changing into standalone mode")
5781     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5782                                                self.instance.disks)
5783     for node, nres in result.items():
5784       nres.Raise("Cannot disconnect disks node %s" % node)
5785
5786   def _GoReconnect(self, multimaster):
5787     """Reconnect to the network.
5788
5789     """
5790     if multimaster:
5791       msg = "dual-master"
5792     else:
5793       msg = "single-master"
5794     self.feedback_fn("* changing disks into %s mode" % msg)
5795     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5796                                            self.instance.disks,
5797                                            self.instance.name, multimaster)
5798     for node, nres in result.items():
5799       nres.Raise("Cannot change disks config on node %s" % node)
5800
5801   def _ExecCleanup(self):
5802     """Try to cleanup after a failed migration.
5803
5804     The cleanup is done by:
5805       - check that the instance is running only on one node
5806         (and update the config if needed)
5807       - change disks on its secondary node to secondary
5808       - wait until disks are fully synchronized
5809       - disconnect from the network
5810       - change disks into single-master mode
5811       - wait again until disks are fully synchronized
5812
5813     """
5814     instance = self.instance
5815     target_node = self.target_node
5816     source_node = self.source_node
5817
5818     # check running on only one node
5819     self.feedback_fn("* checking where the instance actually runs"
5820                      " (if this hangs, the hypervisor might be in"
5821                      " a bad state)")
5822     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5823     for node, result in ins_l.items():
5824       result.Raise("Can't contact node %s" % node)
5825
5826     runningon_source = instance.name in ins_l[source_node].payload
5827     runningon_target = instance.name in ins_l[target_node].payload
5828
5829     if runningon_source and runningon_target:
5830       raise errors.OpExecError("Instance seems to be running on two nodes,"
5831                                " or the hypervisor is confused. You will have"
5832                                " to ensure manually that it runs only on one"
5833                                " and restart this operation.")
5834
5835     if not (runningon_source or runningon_target):
5836       raise errors.OpExecError("Instance does not seem to be running at all."
5837                                " In this case, it's safer to repair by"
5838                                " running 'gnt-instance stop' to ensure disk"
5839                                " shutdown, and then restarting it.")
5840
5841     if runningon_target:
5842       # the migration has actually succeeded, we need to update the config
5843       self.feedback_fn("* instance running on secondary node (%s),"
5844                        " updating config" % target_node)
5845       instance.primary_node = target_node
5846       self.cfg.Update(instance, self.feedback_fn)
5847       demoted_node = source_node
5848     else:
5849       self.feedback_fn("* instance confirmed to be running on its"
5850                        " primary node (%s)" % source_node)
5851       demoted_node = target_node
5852
5853     self._EnsureSecondary(demoted_node)
5854     try:
5855       self._WaitUntilSync()
5856     except errors.OpExecError:
5857       # we ignore here errors, since if the device is standalone, it
5858       # won't be able to sync
5859       pass
5860     self._GoStandalone()
5861     self._GoReconnect(False)
5862     self._WaitUntilSync()
5863
5864     self.feedback_fn("* done")
5865
5866   def _RevertDiskStatus(self):
5867     """Try to revert the disk status after a failed migration.
5868
5869     """
5870     target_node = self.target_node
5871     try:
5872       self._EnsureSecondary(target_node)
5873       self._GoStandalone()
5874       self._GoReconnect(False)
5875       self._WaitUntilSync()
5876     except errors.OpExecError, err:
5877       self.lu.LogWarning("Migration failed and I can't reconnect the"
5878                          " drives: error '%s'\n"
5879                          "Please look and recover the instance status" %
5880                          str(err))
5881
5882   def _AbortMigration(self):
5883     """Call the hypervisor code to abort a started migration.
5884
5885     """
5886     instance = self.instance
5887     target_node = self.target_node
5888     migration_info = self.migration_info
5889
5890     abort_result = self.rpc.call_finalize_migration(target_node,
5891                                                     instance,
5892                                                     migration_info,
5893                                                     False)
5894     abort_msg = abort_result.fail_msg
5895     if abort_msg:
5896       logging.error("Aborting migration failed on target node %s: %s",
5897                     target_node, abort_msg)
5898       # Don't raise an exception here, as we stil have to try to revert the
5899       # disk status, even if this step failed.
5900
5901   def _ExecMigration(self):
5902     """Migrate an instance.
5903
5904     The migrate is done by:
5905       - change the disks into dual-master mode
5906       - wait until disks are fully synchronized again
5907       - migrate the instance
5908       - change disks on the new secondary node (the old primary) to secondary
5909       - wait until disks are fully synchronized
5910       - change disks into single-master mode
5911
5912     """
5913     instance = self.instance
5914     target_node = self.target_node
5915     source_node = self.source_node
5916
5917     self.feedback_fn("* checking disk consistency between source and target")
5918     for dev in instance.disks:
5919       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5920         raise errors.OpExecError("Disk %s is degraded or not fully"
5921                                  " synchronized on target node,"
5922                                  " aborting migrate." % dev.iv_name)
5923
5924     # First get the migration information from the remote node
5925     result = self.rpc.call_migration_info(source_node, instance)
5926     msg = result.fail_msg
5927     if msg:
5928       log_err = ("Failed fetching source migration information from %s: %s" %
5929                  (source_node, msg))
5930       logging.error(log_err)
5931       raise errors.OpExecError(log_err)
5932
5933     self.migration_info = migration_info = result.payload
5934
5935     # Then switch the disks to master/master mode
5936     self._EnsureSecondary(target_node)
5937     self._GoStandalone()
5938     self._GoReconnect(True)
5939     self._WaitUntilSync()
5940
5941     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5942     result = self.rpc.call_accept_instance(target_node,
5943                                            instance,
5944                                            migration_info,
5945                                            self.nodes_ip[target_node])
5946
5947     msg = result.fail_msg
5948     if msg:
5949       logging.error("Instance pre-migration failed, trying to revert"
5950                     " disk status: %s", msg)
5951       self.feedback_fn("Pre-migration failed, aborting")
5952       self._AbortMigration()
5953       self._RevertDiskStatus()
5954       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5955                                (instance.name, msg))
5956
5957     self.feedback_fn("* migrating instance to %s" % target_node)
5958     time.sleep(10)
5959     result = self.rpc.call_instance_migrate(source_node, instance,
5960                                             self.nodes_ip[target_node],
5961                                             self.live)
5962     msg = result.fail_msg
5963     if msg:
5964       logging.error("Instance migration failed, trying to revert"
5965                     " disk status: %s", msg)
5966       self.feedback_fn("Migration failed, aborting")
5967       self._AbortMigration()
5968       self._RevertDiskStatus()
5969       raise errors.OpExecError("Could not migrate instance %s: %s" %
5970                                (instance.name, msg))
5971     time.sleep(10)
5972
5973     instance.primary_node = target_node
5974     # distribute new instance config to the other nodes
5975     self.cfg.Update(instance, self.feedback_fn)
5976
5977     result = self.rpc.call_finalize_migration(target_node,
5978                                               instance,
5979                                               migration_info,
5980                                               True)
5981     msg = result.fail_msg
5982     if msg:
5983       logging.error("Instance migration succeeded, but finalization failed:"
5984                     " %s", msg)
5985       raise errors.OpExecError("Could not finalize instance migration: %s" %
5986                                msg)
5987
5988     self._EnsureSecondary(source_node)
5989     self._WaitUntilSync()
5990     self._GoStandalone()
5991     self._GoReconnect(False)
5992     self._WaitUntilSync()
5993
5994     self.feedback_fn("* done")
5995
5996   def Exec(self, feedback_fn):
5997     """Perform the migration.
5998
5999     """
6000     feedback_fn("Migrating instance %s" % self.instance.name)
6001
6002     self.feedback_fn = feedback_fn
6003
6004     self.source_node = self.instance.primary_node
6005     self.target_node = self.instance.secondary_nodes[0]
6006     self.all_nodes = [self.source_node, self.target_node]
6007     self.nodes_ip = {
6008       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6009       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6010       }
6011
6012     if self.cleanup:
6013       return self._ExecCleanup()
6014     else:
6015       return self._ExecMigration()
6016
6017
6018 def _CreateBlockDev(lu, node, instance, device, force_create,
6019                     info, force_open):
6020   """Create a tree of block devices on a given node.
6021
6022   If this device type has to be created on secondaries, create it and
6023   all its children.
6024
6025   If not, just recurse to children keeping the same 'force' value.
6026
6027   @param lu: the lu on whose behalf we execute
6028   @param node: the node on which to create the device
6029   @type instance: L{objects.Instance}
6030   @param instance: the instance which owns the device
6031   @type device: L{objects.Disk}
6032   @param device: the device to create
6033   @type force_create: boolean
6034   @param force_create: whether to force creation of this device; this
6035       will be change to True whenever we find a device which has
6036       CreateOnSecondary() attribute
6037   @param info: the extra 'metadata' we should attach to the device
6038       (this will be represented as a LVM tag)
6039   @type force_open: boolean
6040   @param force_open: this parameter will be passes to the
6041       L{backend.BlockdevCreate} function where it specifies
6042       whether we run on primary or not, and it affects both
6043       the child assembly and the device own Open() execution
6044
6045   """
6046   if device.CreateOnSecondary():
6047     force_create = True
6048
6049   if device.children:
6050     for child in device.children:
6051       _CreateBlockDev(lu, node, instance, child, force_create,
6052                       info, force_open)
6053
6054   if not force_create:
6055     return
6056
6057   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6058
6059
6060 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6061   """Create a single block device on a given node.
6062
6063   This will not recurse over children of the device, so they must be
6064   created in advance.
6065
6066   @param lu: the lu on whose behalf we execute
6067   @param node: the node on which to create the device
6068   @type instance: L{objects.Instance}
6069   @param instance: the instance which owns the device
6070   @type device: L{objects.Disk}
6071   @param device: the device to create
6072   @param info: the extra 'metadata' we should attach to the device
6073       (this will be represented as a LVM tag)
6074   @type force_open: boolean
6075   @param force_open: this parameter will be passes to the
6076       L{backend.BlockdevCreate} function where it specifies
6077       whether we run on primary or not, and it affects both
6078       the child assembly and the device own Open() execution
6079
6080   """
6081   lu.cfg.SetDiskID(device, node)
6082   result = lu.rpc.call_blockdev_create(node, device, device.size,
6083                                        instance.name, force_open, info)
6084   result.Raise("Can't create block device %s on"
6085                " node %s for instance %s" % (device, node, instance.name))
6086   if device.physical_id is None:
6087     device.physical_id = result.payload
6088
6089
6090 def _GenerateUniqueNames(lu, exts):
6091   """Generate a suitable LV name.
6092
6093   This will generate a logical volume name for the given instance.
6094
6095   """
6096   results = []
6097   for val in exts:
6098     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6099     results.append("%s%s" % (new_id, val))
6100   return results
6101
6102
6103 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6104                          p_minor, s_minor):
6105   """Generate a drbd8 device complete with its children.
6106
6107   """
6108   port = lu.cfg.AllocatePort()
6109   vgname = lu.cfg.GetVGName()
6110   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6111   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6112                           logical_id=(vgname, names[0]))
6113   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6114                           logical_id=(vgname, names[1]))
6115   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6116                           logical_id=(primary, secondary, port,
6117                                       p_minor, s_minor,
6118                                       shared_secret),
6119                           children=[dev_data, dev_meta],
6120                           iv_name=iv_name)
6121   return drbd_dev
6122
6123
6124 def _GenerateDiskTemplate(lu, template_name,
6125                           instance_name, primary_node,
6126                           secondary_nodes, disk_info,
6127                           file_storage_dir, file_driver,
6128                           base_index):
6129   """Generate the entire disk layout for a given template type.
6130
6131   """
6132   #TODO: compute space requirements
6133
6134   vgname = lu.cfg.GetVGName()
6135   disk_count = len(disk_info)
6136   disks = []
6137   if template_name == constants.DT_DISKLESS:
6138     pass
6139   elif template_name == constants.DT_PLAIN:
6140     if len(secondary_nodes) != 0:
6141       raise errors.ProgrammerError("Wrong template configuration")
6142
6143     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6144                                       for i in range(disk_count)])
6145     for idx, disk in enumerate(disk_info):
6146       disk_index = idx + base_index
6147       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6148                               logical_id=(vgname, names[idx]),
6149                               iv_name="disk/%d" % disk_index,
6150                               mode=disk["mode"])
6151       disks.append(disk_dev)
6152   elif template_name == constants.DT_DRBD8:
6153     if len(secondary_nodes) != 1:
6154       raise errors.ProgrammerError("Wrong template configuration")
6155     remote_node = secondary_nodes[0]
6156     minors = lu.cfg.AllocateDRBDMinor(
6157       [primary_node, remote_node] * len(disk_info), instance_name)
6158
6159     names = []
6160     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6161                                                for i in range(disk_count)]):
6162       names.append(lv_prefix + "_data")
6163       names.append(lv_prefix + "_meta")
6164     for idx, disk in enumerate(disk_info):
6165       disk_index = idx + base_index
6166       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6167                                       disk["size"], names[idx*2:idx*2+2],
6168                                       "disk/%d" % disk_index,
6169                                       minors[idx*2], minors[idx*2+1])
6170       disk_dev.mode = disk["mode"]
6171       disks.append(disk_dev)
6172   elif template_name == constants.DT_FILE:
6173     if len(secondary_nodes) != 0:
6174       raise errors.ProgrammerError("Wrong template configuration")
6175
6176     _RequireFileStorage()
6177
6178     for idx, disk in enumerate(disk_info):
6179       disk_index = idx + base_index
6180       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6181                               iv_name="disk/%d" % disk_index,
6182                               logical_id=(file_driver,
6183                                           "%s/disk%d" % (file_storage_dir,
6184                                                          disk_index)),
6185                               mode=disk["mode"])
6186       disks.append(disk_dev)
6187   else:
6188     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6189   return disks
6190
6191
6192 def _GetInstanceInfoText(instance):
6193   """Compute that text that should be added to the disk's metadata.
6194
6195   """
6196   return "originstname+%s" % instance.name
6197
6198
6199 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6200   """Create all disks for an instance.
6201
6202   This abstracts away some work from AddInstance.
6203
6204   @type lu: L{LogicalUnit}
6205   @param lu: the logical unit on whose behalf we execute
6206   @type instance: L{objects.Instance}
6207   @param instance: the instance whose disks we should create
6208   @type to_skip: list
6209   @param to_skip: list of indices to skip
6210   @type target_node: string
6211   @param target_node: if passed, overrides the target node for creation
6212   @rtype: boolean
6213   @return: the success of the creation
6214
6215   """
6216   info = _GetInstanceInfoText(instance)
6217   if target_node is None:
6218     pnode = instance.primary_node
6219     all_nodes = instance.all_nodes
6220   else:
6221     pnode = target_node
6222     all_nodes = [pnode]
6223
6224   if instance.disk_template == constants.DT_FILE:
6225     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6226     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6227
6228     result.Raise("Failed to create directory '%s' on"
6229                  " node %s" % (file_storage_dir, pnode))
6230
6231   # Note: this needs to be kept in sync with adding of disks in
6232   # LUSetInstanceParams
6233   for idx, device in enumerate(instance.disks):
6234     if to_skip and idx in to_skip:
6235       continue
6236     logging.info("Creating volume %s for instance %s",
6237                  device.iv_name, instance.name)
6238     #HARDCODE
6239     for node in all_nodes:
6240       f_create = node == pnode
6241       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6242
6243
6244 def _RemoveDisks(lu, instance, target_node=None):
6245   """Remove all disks for an instance.
6246
6247   This abstracts away some work from `AddInstance()` and
6248   `RemoveInstance()`. Note that in case some of the devices couldn't
6249   be removed, the removal will continue with the other ones (compare
6250   with `_CreateDisks()`).
6251
6252   @type lu: L{LogicalUnit}
6253   @param lu: the logical unit on whose behalf we execute
6254   @type instance: L{objects.Instance}
6255   @param instance: the instance whose disks we should remove
6256   @type target_node: string
6257   @param target_node: used to override the node on which to remove the disks
6258   @rtype: boolean
6259   @return: the success of the removal
6260
6261   """
6262   logging.info("Removing block devices for instance %s", instance.name)
6263
6264   all_result = True
6265   for device in instance.disks:
6266     if target_node:
6267       edata = [(target_node, device)]
6268     else:
6269       edata = device.ComputeNodeTree(instance.primary_node)
6270     for node, disk in edata:
6271       lu.cfg.SetDiskID(disk, node)
6272       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6273       if msg:
6274         lu.LogWarning("Could not remove block device %s on node %s,"
6275                       " continuing anyway: %s", device.iv_name, node, msg)
6276         all_result = False
6277
6278   if instance.disk_template == constants.DT_FILE:
6279     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6280     if target_node:
6281       tgt = target_node
6282     else:
6283       tgt = instance.primary_node
6284     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6285     if result.fail_msg:
6286       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6287                     file_storage_dir, instance.primary_node, result.fail_msg)
6288       all_result = False
6289
6290   return all_result
6291
6292
6293 def _ComputeDiskSize(disk_template, disks):
6294   """Compute disk size requirements in the volume group
6295
6296   """
6297   # Required free disk space as a function of disk and swap space
6298   req_size_dict = {
6299     constants.DT_DISKLESS: None,
6300     constants.DT_PLAIN: sum(d["size"] for d in disks),
6301     # 128 MB are added for drbd metadata for each disk
6302     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6303     constants.DT_FILE: None,
6304   }
6305
6306   if disk_template not in req_size_dict:
6307     raise errors.ProgrammerError("Disk template '%s' size requirement"
6308                                  " is unknown" %  disk_template)
6309
6310   return req_size_dict[disk_template]
6311
6312
6313 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6314   """Hypervisor parameter validation.
6315
6316   This function abstract the hypervisor parameter validation to be
6317   used in both instance create and instance modify.
6318
6319   @type lu: L{LogicalUnit}
6320   @param lu: the logical unit for which we check
6321   @type nodenames: list
6322   @param nodenames: the list of nodes on which we should check
6323   @type hvname: string
6324   @param hvname: the name of the hypervisor we should use
6325   @type hvparams: dict
6326   @param hvparams: the parameters which we need to check
6327   @raise errors.OpPrereqError: if the parameters are not valid
6328
6329   """
6330   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6331                                                   hvname,
6332                                                   hvparams)
6333   for node in nodenames:
6334     info = hvinfo[node]
6335     if info.offline:
6336       continue
6337     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6338
6339
6340 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6341   """OS parameters validation.
6342
6343   @type lu: L{LogicalUnit}
6344   @param lu: the logical unit for which we check
6345   @type required: boolean
6346   @param required: whether the validation should fail if the OS is not
6347       found
6348   @type nodenames: list
6349   @param nodenames: the list of nodes on which we should check
6350   @type osname: string
6351   @param osname: the name of the hypervisor we should use
6352   @type osparams: dict
6353   @param osparams: the parameters which we need to check
6354   @raise errors.OpPrereqError: if the parameters are not valid
6355
6356   """
6357   result = lu.rpc.call_os_validate(required, nodenames, osname,
6358                                    [constants.OS_VALIDATE_PARAMETERS],
6359                                    osparams)
6360   for node, nres in result.items():
6361     # we don't check for offline cases since this should be run only
6362     # against the master node and/or an instance's nodes
6363     nres.Raise("OS Parameters validation failed on node %s" % node)
6364     if not nres.payload:
6365       lu.LogInfo("OS %s not found on node %s, validation skipped",
6366                  osname, node)
6367
6368
6369 class LUCreateInstance(LogicalUnit):
6370   """Create an instance.
6371
6372   """
6373   HPATH = "instance-add"
6374   HTYPE = constants.HTYPE_INSTANCE
6375   _OP_PARAMS = [
6376     _PInstanceName,
6377     ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6378     ("start", True, _TBool),
6379     ("wait_for_sync", True, _TBool),
6380     ("ip_check", True, _TBool),
6381     ("name_check", True, _TBool),
6382     ("disks", _NoDefault, _TListOf(_TDict)),
6383     ("nics", _NoDefault, _TListOf(_TDict)),
6384     ("hvparams", _NoDefault, _TDict),
6385     ("beparams", _NoDefault, _TDict),
6386     ("osparams", _NoDefault, _TDict),
6387     ("no_install", None, _TMaybeBool),
6388     ("os_type", None, _TMaybeString),
6389     ("force_variant", False, _TBool),
6390     ("source_handshake", None, _TOr(_TList, _TNone)),
6391     ("source_x509_ca", None, _TOr(_TList, _TNone)),
6392     ("source_instance_name", None, _TMaybeString),
6393     ("src_node", None, _TMaybeString),
6394     ("src_path", None, _TMaybeString),
6395     ("pnode", None, _TMaybeString),
6396     ("snode", None, _TMaybeString),
6397     ("iallocator", None, _TMaybeString),
6398     ("hypervisor", None, _TMaybeString),
6399     ("disk_template", _NoDefault, _CheckDiskTemplate),
6400     ("identify_defaults", False, _TBool),
6401     ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6402     ("file_storage_dir", None, _TMaybeString),
6403     ("dry_run", False, _TBool),
6404     ]
6405   REQ_BGL = False
6406
6407   def CheckArguments(self):
6408     """Check arguments.
6409
6410     """
6411     # do not require name_check to ease forward/backward compatibility
6412     # for tools
6413     if self.op.no_install and self.op.start:
6414       self.LogInfo("No-installation mode selected, disabling startup")
6415       self.op.start = False
6416     # validate/normalize the instance name
6417     self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6418     if self.op.ip_check and not self.op.name_check:
6419       # TODO: make the ip check more flexible and not depend on the name check
6420       raise errors.OpPrereqError("Cannot do ip checks without a name check",
6421                                  errors.ECODE_INVAL)
6422
6423     # check nics' parameter names
6424     for nic in self.op.nics:
6425       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6426
6427     # check disks. parameter names and consistent adopt/no-adopt strategy
6428     has_adopt = has_no_adopt = False
6429     for disk in self.op.disks:
6430       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6431       if "adopt" in disk:
6432         has_adopt = True
6433       else:
6434         has_no_adopt = True
6435     if has_adopt and has_no_adopt:
6436       raise errors.OpPrereqError("Either all disks are adopted or none is",
6437                                  errors.ECODE_INVAL)
6438     if has_adopt:
6439       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6440         raise errors.OpPrereqError("Disk adoption is not supported for the"
6441                                    " '%s' disk template" %
6442                                    self.op.disk_template,
6443                                    errors.ECODE_INVAL)
6444       if self.op.iallocator is not None:
6445         raise errors.OpPrereqError("Disk adoption not allowed with an"
6446                                    " iallocator script", errors.ECODE_INVAL)
6447       if self.op.mode == constants.INSTANCE_IMPORT:
6448         raise errors.OpPrereqError("Disk adoption not allowed for"
6449                                    " instance import", errors.ECODE_INVAL)
6450
6451     self.adopt_disks = has_adopt
6452
6453     # instance name verification
6454     if self.op.name_check:
6455       self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6456       self.op.instance_name = self.hostname1.name
6457       # used in CheckPrereq for ip ping check
6458       self.check_ip = self.hostname1.ip
6459     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6460       raise errors.OpPrereqError("Remote imports require names to be checked" %
6461                                  errors.ECODE_INVAL)
6462     else:
6463       self.check_ip = None
6464
6465     # file storage checks
6466     if (self.op.file_driver and
6467         not self.op.file_driver in constants.FILE_DRIVER):
6468       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6469                                  self.op.file_driver, errors.ECODE_INVAL)
6470
6471     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6472       raise errors.OpPrereqError("File storage directory path not absolute",
6473                                  errors.ECODE_INVAL)
6474
6475     ### Node/iallocator related checks
6476     if [self.op.iallocator, self.op.pnode].count(None) != 1:
6477       raise errors.OpPrereqError("One and only one of iallocator and primary"
6478                                  " node must be given",
6479                                  errors.ECODE_INVAL)
6480
6481     self._cds = _GetClusterDomainSecret()
6482
6483     if self.op.mode == constants.INSTANCE_IMPORT:
6484       # On import force_variant must be True, because if we forced it at
6485       # initial install, our only chance when importing it back is that it
6486       # works again!
6487       self.op.force_variant = True
6488
6489       if self.op.no_install:
6490         self.LogInfo("No-installation mode has no effect during import")
6491
6492     elif self.op.mode == constants.INSTANCE_CREATE:
6493       if self.op.os_type is None:
6494         raise errors.OpPrereqError("No guest OS specified",
6495                                    errors.ECODE_INVAL)
6496       if self.op.disk_template is None:
6497         raise errors.OpPrereqError("No disk template specified",
6498                                    errors.ECODE_INVAL)
6499
6500     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6501       # Check handshake to ensure both clusters have the same domain secret
6502       src_handshake = self.op.source_handshake
6503       if not src_handshake:
6504         raise errors.OpPrereqError("Missing source handshake",
6505                                    errors.ECODE_INVAL)
6506
6507       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6508                                                            src_handshake)
6509       if errmsg:
6510         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6511                                    errors.ECODE_INVAL)
6512
6513       # Load and check source CA
6514       self.source_x509_ca_pem = self.op.source_x509_ca
6515       if not self.source_x509_ca_pem:
6516         raise errors.OpPrereqError("Missing source X509 CA",
6517                                    errors.ECODE_INVAL)
6518
6519       try:
6520         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6521                                                     self._cds)
6522       except OpenSSL.crypto.Error, err:
6523         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6524                                    (err, ), errors.ECODE_INVAL)
6525
6526       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6527       if errcode is not None:
6528         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6529                                    errors.ECODE_INVAL)
6530
6531       self.source_x509_ca = cert
6532
6533       src_instance_name = self.op.source_instance_name
6534       if not src_instance_name:
6535         raise errors.OpPrereqError("Missing source instance name",
6536                                    errors.ECODE_INVAL)
6537
6538       self.source_instance_name = \
6539         utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6540
6541     else:
6542       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6543                                  self.op.mode, errors.ECODE_INVAL)
6544
6545   def ExpandNames(self):
6546     """ExpandNames for CreateInstance.
6547
6548     Figure out the right locks for instance creation.
6549
6550     """
6551     self.needed_locks = {}
6552
6553     instance_name = self.op.instance_name
6554     # this is just a preventive check, but someone might still add this
6555     # instance in the meantime, and creation will fail at lock-add time
6556     if instance_name in self.cfg.GetInstanceList():
6557       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6558                                  instance_name, errors.ECODE_EXISTS)
6559
6560     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6561
6562     if self.op.iallocator:
6563       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6564     else:
6565       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6566       nodelist = [self.op.pnode]
6567       if self.op.snode is not None:
6568         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6569         nodelist.append(self.op.snode)
6570       self.needed_locks[locking.LEVEL_NODE] = nodelist
6571
6572     # in case of import lock the source node too
6573     if self.op.mode == constants.INSTANCE_IMPORT:
6574       src_node = self.op.src_node
6575       src_path = self.op.src_path
6576
6577       if src_path is None:
6578         self.op.src_path = src_path = self.op.instance_name
6579
6580       if src_node is None:
6581         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6582         self.op.src_node = None
6583         if os.path.isabs(src_path):
6584           raise errors.OpPrereqError("Importing an instance from an absolute"
6585                                      " path requires a source node option.",
6586                                      errors.ECODE_INVAL)
6587       else:
6588         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6589         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6590           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6591         if not os.path.isabs(src_path):
6592           self.op.src_path = src_path = \
6593             utils.PathJoin(constants.EXPORT_DIR, src_path)
6594
6595   def _RunAllocator(self):
6596     """Run the allocator based on input opcode.
6597
6598     """
6599     nics = [n.ToDict() for n in self.nics]
6600     ial = IAllocator(self.cfg, self.rpc,
6601                      mode=constants.IALLOCATOR_MODE_ALLOC,
6602                      name=self.op.instance_name,
6603                      disk_template=self.op.disk_template,
6604                      tags=[],
6605                      os=self.op.os_type,
6606                      vcpus=self.be_full[constants.BE_VCPUS],
6607                      mem_size=self.be_full[constants.BE_MEMORY],
6608                      disks=self.disks,
6609                      nics=nics,
6610                      hypervisor=self.op.hypervisor,
6611                      )
6612
6613     ial.Run(self.op.iallocator)
6614
6615     if not ial.success:
6616       raise errors.OpPrereqError("Can't compute nodes using"
6617                                  " iallocator '%s': %s" %
6618                                  (self.op.iallocator, ial.info),
6619                                  errors.ECODE_NORES)
6620     if len(ial.result) != ial.required_nodes:
6621       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6622                                  " of nodes (%s), required %s" %
6623                                  (self.op.iallocator, len(ial.result),
6624                                   ial.required_nodes), errors.ECODE_FAULT)
6625     self.op.pnode = ial.result[0]
6626     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6627                  self.op.instance_name, self.op.iallocator,
6628                  utils.CommaJoin(ial.result))
6629     if ial.required_nodes == 2:
6630       self.op.snode = ial.result[1]
6631
6632   def BuildHooksEnv(self):
6633     """Build hooks env.
6634
6635     This runs on master, primary and secondary nodes of the instance.
6636
6637     """
6638     env = {
6639       "ADD_MODE": self.op.mode,
6640       }
6641     if self.op.mode == constants.INSTANCE_IMPORT:
6642       env["SRC_NODE"] = self.op.src_node
6643       env["SRC_PATH"] = self.op.src_path
6644       env["SRC_IMAGES"] = self.src_images
6645
6646     env.update(_BuildInstanceHookEnv(
6647       name=self.op.instance_name,
6648       primary_node=self.op.pnode,
6649       secondary_nodes=self.secondaries,
6650       status=self.op.start,
6651       os_type=self.op.os_type,
6652       memory=self.be_full[constants.BE_MEMORY],
6653       vcpus=self.be_full[constants.BE_VCPUS],
6654       nics=_NICListToTuple(self, self.nics),
6655       disk_template=self.op.disk_template,
6656       disks=[(d["size"], d["mode"]) for d in self.disks],
6657       bep=self.be_full,
6658       hvp=self.hv_full,
6659       hypervisor_name=self.op.hypervisor,
6660     ))
6661
6662     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6663           self.secondaries)
6664     return env, nl, nl
6665
6666   def _ReadExportInfo(self):
6667     """Reads the export information from disk.
6668
6669     It will override the opcode source node and path with the actual
6670     information, if these two were not specified before.
6671
6672     @return: the export information
6673
6674     """
6675     assert self.op.mode == constants.INSTANCE_IMPORT
6676
6677     src_node = self.op.src_node
6678     src_path = self.op.src_path
6679
6680     if src_node is None:
6681       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6682       exp_list = self.rpc.call_export_list(locked_nodes)
6683       found = False
6684       for node in exp_list:
6685         if exp_list[node].fail_msg:
6686           continue
6687         if src_path in exp_list[node].payload:
6688           found = True
6689           self.op.src_node = src_node = node
6690           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6691                                                        src_path)
6692           break
6693       if not found:
6694         raise errors.OpPrereqError("No export found for relative path %s" %
6695                                     src_path, errors.ECODE_INVAL)
6696
6697     _CheckNodeOnline(self, src_node)
6698     result = self.rpc.call_export_info(src_node, src_path)
6699     result.Raise("No export or invalid export found in dir %s" % src_path)
6700
6701     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6702     if not export_info.has_section(constants.INISECT_EXP):
6703       raise errors.ProgrammerError("Corrupted export config",
6704                                    errors.ECODE_ENVIRON)
6705
6706     ei_version = export_info.get(constants.INISECT_EXP, "version")
6707     if (int(ei_version) != constants.EXPORT_VERSION):
6708       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6709                                  (ei_version, constants.EXPORT_VERSION),
6710                                  errors.ECODE_ENVIRON)
6711     return export_info
6712
6713   def _ReadExportParams(self, einfo):
6714     """Use export parameters as defaults.
6715
6716     In case the opcode doesn't specify (as in override) some instance
6717     parameters, then try to use them from the export information, if
6718     that declares them.
6719
6720     """
6721     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6722
6723     if self.op.disk_template is None:
6724       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6725         self.op.disk_template = einfo.get(constants.INISECT_INS,
6726                                           "disk_template")
6727       else:
6728         raise errors.OpPrereqError("No disk template specified and the export"
6729                                    " is missing the disk_template information",
6730                                    errors.ECODE_INVAL)
6731
6732     if not self.op.disks:
6733       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6734         disks = []
6735         # TODO: import the disk iv_name too
6736         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6737           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6738           disks.append({"size": disk_sz})
6739         self.op.disks = disks
6740       else:
6741         raise errors.OpPrereqError("No disk info specified and the export"
6742                                    " is missing the disk information",
6743                                    errors.ECODE_INVAL)
6744
6745     if (not self.op.nics and
6746         einfo.has_option(constants.INISECT_INS, "nic_count")):
6747       nics = []
6748       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6749         ndict = {}
6750         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6751           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6752           ndict[name] = v
6753         nics.append(ndict)
6754       self.op.nics = nics
6755
6756     if (self.op.hypervisor is None and
6757         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6758       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6759     if einfo.has_section(constants.INISECT_HYP):
6760       # use the export parameters but do not override the ones
6761       # specified by the user
6762       for name, value in einfo.items(constants.INISECT_HYP):
6763         if name not in self.op.hvparams:
6764           self.op.hvparams[name] = value
6765
6766     if einfo.has_section(constants.INISECT_BEP):
6767       # use the parameters, without overriding
6768       for name, value in einfo.items(constants.INISECT_BEP):
6769         if name not in self.op.beparams:
6770           self.op.beparams[name] = value
6771     else:
6772       # try to read the parameters old style, from the main section
6773       for name in constants.BES_PARAMETERS:
6774         if (name not in self.op.beparams and
6775             einfo.has_option(constants.INISECT_INS, name)):
6776           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6777
6778     if einfo.has_section(constants.INISECT_OSP):
6779       # use the parameters, without overriding
6780       for name, value in einfo.items(constants.INISECT_OSP):
6781         if name not in self.op.osparams:
6782           self.op.osparams[name] = value
6783
6784   def _RevertToDefaults(self, cluster):
6785     """Revert the instance parameters to the default values.
6786
6787     """
6788     # hvparams
6789     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6790     for name in self.op.hvparams.keys():
6791       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6792         del self.op.hvparams[name]
6793     # beparams
6794     be_defs = cluster.SimpleFillBE({})
6795     for name in self.op.beparams.keys():
6796       if name in be_defs and be_defs[name] == self.op.beparams[name]:
6797         del self.op.beparams[name]
6798     # nic params
6799     nic_defs = cluster.SimpleFillNIC({})
6800     for nic in self.op.nics:
6801       for name in constants.NICS_PARAMETERS:
6802         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6803           del nic[name]
6804     # osparams
6805     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6806     for name in self.op.osparams.keys():
6807       if name in os_defs and os_defs[name] == self.op.osparams[name]:
6808         del self.op.osparams[name]
6809
6810   def CheckPrereq(self):
6811     """Check prerequisites.
6812
6813     """
6814     if self.op.mode == constants.INSTANCE_IMPORT:
6815       export_info = self._ReadExportInfo()
6816       self._ReadExportParams(export_info)
6817
6818     _CheckDiskTemplate(self.op.disk_template)
6819
6820     if (not self.cfg.GetVGName() and
6821         self.op.disk_template not in constants.DTS_NOT_LVM):
6822       raise errors.OpPrereqError("Cluster does not support lvm-based"
6823                                  " instances", errors.ECODE_STATE)
6824
6825     if self.op.hypervisor is None:
6826       self.op.hypervisor = self.cfg.GetHypervisorType()
6827
6828     cluster = self.cfg.GetClusterInfo()
6829     enabled_hvs = cluster.enabled_hypervisors
6830     if self.op.hypervisor not in enabled_hvs:
6831       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6832                                  " cluster (%s)" % (self.op.hypervisor,
6833                                   ",".join(enabled_hvs)),
6834                                  errors.ECODE_STATE)
6835
6836     # check hypervisor parameter syntax (locally)
6837     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6838     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6839                                       self.op.hvparams)
6840     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6841     hv_type.CheckParameterSyntax(filled_hvp)
6842     self.hv_full = filled_hvp
6843     # check that we don't specify global parameters on an instance
6844     _CheckGlobalHvParams(self.op.hvparams)
6845
6846     # fill and remember the beparams dict
6847     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6848     self.be_full = cluster.SimpleFillBE(self.op.beparams)
6849
6850     # build os parameters
6851     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6852
6853     # now that hvp/bep are in final format, let's reset to defaults,
6854     # if told to do so
6855     if self.op.identify_defaults:
6856       self._RevertToDefaults(cluster)
6857
6858     # NIC buildup
6859     self.nics = []
6860     for idx, nic in enumerate(self.op.nics):
6861       nic_mode_req = nic.get("mode", None)
6862       nic_mode = nic_mode_req
6863       if nic_mode is None:
6864         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6865
6866       # in routed mode, for the first nic, the default ip is 'auto'
6867       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6868         default_ip_mode = constants.VALUE_AUTO
6869       else:
6870         default_ip_mode = constants.VALUE_NONE
6871
6872       # ip validity checks
6873       ip = nic.get("ip", default_ip_mode)
6874       if ip is None or ip.lower() == constants.VALUE_NONE:
6875         nic_ip = None
6876       elif ip.lower() == constants.VALUE_AUTO:
6877         if not self.op.name_check:
6878           raise errors.OpPrereqError("IP address set to auto but name checks"
6879                                      " have been skipped. Aborting.",
6880                                      errors.ECODE_INVAL)
6881         nic_ip = self.hostname1.ip
6882       else:
6883         if not utils.IsValidIP4(ip):
6884           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6885                                      " like a valid IP" % ip,
6886                                      errors.ECODE_INVAL)
6887         nic_ip = ip
6888
6889       # TODO: check the ip address for uniqueness
6890       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6891         raise errors.OpPrereqError("Routed nic mode requires an ip address",
6892                                    errors.ECODE_INVAL)
6893
6894       # MAC address verification
6895       mac = nic.get("mac", constants.VALUE_AUTO)
6896       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6897         mac = utils.NormalizeAndValidateMac(mac)
6898
6899         try:
6900           self.cfg.ReserveMAC(mac, self.proc.GetECId())
6901         except errors.ReservationError:
6902           raise errors.OpPrereqError("MAC address %s already in use"
6903                                      " in cluster" % mac,
6904                                      errors.ECODE_NOTUNIQUE)
6905
6906       # bridge verification
6907       bridge = nic.get("bridge", None)
6908       link = nic.get("link", None)
6909       if bridge and link:
6910         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6911                                    " at the same time", errors.ECODE_INVAL)
6912       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6913         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6914                                    errors.ECODE_INVAL)
6915       elif bridge:
6916         link = bridge
6917
6918       nicparams = {}
6919       if nic_mode_req:
6920         nicparams[constants.NIC_MODE] = nic_mode_req
6921       if link:
6922         nicparams[constants.NIC_LINK] = link
6923
6924       check_params = cluster.SimpleFillNIC(nicparams)
6925       objects.NIC.CheckParameterSyntax(check_params)
6926       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6927
6928     # disk checks/pre-build
6929     self.disks = []
6930     for disk in self.op.disks:
6931       mode = disk.get("mode", constants.DISK_RDWR)
6932       if mode not in constants.DISK_ACCESS_SET:
6933         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6934                                    mode, errors.ECODE_INVAL)
6935       size = disk.get("size", None)
6936       if size is None:
6937         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6938       try:
6939         size = int(size)
6940       except (TypeError, ValueError):
6941         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6942                                    errors.ECODE_INVAL)
6943       new_disk = {"size": size, "mode": mode}
6944       if "adopt" in disk:
6945         new_disk["adopt"] = disk["adopt"]
6946       self.disks.append(new_disk)
6947
6948     if self.op.mode == constants.INSTANCE_IMPORT:
6949
6950       # Check that the new instance doesn't have less disks than the export
6951       instance_disks = len(self.disks)
6952       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6953       if instance_disks < export_disks:
6954         raise errors.OpPrereqError("Not enough disks to import."
6955                                    " (instance: %d, export: %d)" %
6956                                    (instance_disks, export_disks),
6957                                    errors.ECODE_INVAL)
6958
6959       disk_images = []
6960       for idx in range(export_disks):
6961         option = 'disk%d_dump' % idx
6962         if export_info.has_option(constants.INISECT_INS, option):
6963           # FIXME: are the old os-es, disk sizes, etc. useful?
6964           export_name = export_info.get(constants.INISECT_INS, option)
6965           image = utils.PathJoin(self.op.src_path, export_name)
6966           disk_images.append(image)
6967         else:
6968           disk_images.append(False)
6969
6970       self.src_images = disk_images
6971
6972       old_name = export_info.get(constants.INISECT_INS, 'name')
6973       try:
6974         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6975       except (TypeError, ValueError), err:
6976         raise errors.OpPrereqError("Invalid export file, nic_count is not"
6977                                    " an integer: %s" % str(err),
6978                                    errors.ECODE_STATE)
6979       if self.op.instance_name == old_name:
6980         for idx, nic in enumerate(self.nics):
6981           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6982             nic_mac_ini = 'nic%d_mac' % idx
6983             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6984
6985     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6986
6987     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6988     if self.op.ip_check:
6989       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6990         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6991                                    (self.check_ip, self.op.instance_name),
6992                                    errors.ECODE_NOTUNIQUE)
6993
6994     #### mac address generation
6995     # By generating here the mac address both the allocator and the hooks get
6996     # the real final mac address rather than the 'auto' or 'generate' value.
6997     # There is a race condition between the generation and the instance object
6998     # creation, which means that we know the mac is valid now, but we're not
6999     # sure it will be when we actually add the instance. If things go bad
7000     # adding the instance will abort because of a duplicate mac, and the
7001     # creation job will fail.
7002     for nic in self.nics:
7003       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7004         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7005
7006     #### allocator run
7007
7008     if self.op.iallocator is not None:
7009       self._RunAllocator()
7010
7011     #### node related checks
7012
7013     # check primary node
7014     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7015     assert self.pnode is not None, \
7016       "Cannot retrieve locked node %s" % self.op.pnode
7017     if pnode.offline:
7018       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7019                                  pnode.name, errors.ECODE_STATE)
7020     if pnode.drained:
7021       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7022                                  pnode.name, errors.ECODE_STATE)
7023
7024     self.secondaries = []
7025
7026     # mirror node verification
7027     if self.op.disk_template in constants.DTS_NET_MIRROR:
7028       if self.op.snode is None:
7029         raise errors.OpPrereqError("The networked disk templates need"
7030                                    " a mirror node", errors.ECODE_INVAL)
7031       if self.op.snode == pnode.name:
7032         raise errors.OpPrereqError("The secondary node cannot be the"
7033                                    " primary node.", errors.ECODE_INVAL)
7034       _CheckNodeOnline(self, self.op.snode)
7035       _CheckNodeNotDrained(self, self.op.snode)
7036       self.secondaries.append(self.op.snode)
7037
7038     nodenames = [pnode.name] + self.secondaries
7039
7040     req_size = _ComputeDiskSize(self.op.disk_template,
7041                                 self.disks)
7042
7043     # Check lv size requirements, if not adopting
7044     if req_size is not None and not self.adopt_disks:
7045       _CheckNodesFreeDisk(self, nodenames, req_size)
7046
7047     if self.adopt_disks: # instead, we must check the adoption data
7048       all_lvs = set([i["adopt"] for i in self.disks])
7049       if len(all_lvs) != len(self.disks):
7050         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7051                                    errors.ECODE_INVAL)
7052       for lv_name in all_lvs:
7053         try:
7054           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7055         except errors.ReservationError:
7056           raise errors.OpPrereqError("LV named %s used by another instance" %
7057                                      lv_name, errors.ECODE_NOTUNIQUE)
7058
7059       node_lvs = self.rpc.call_lv_list([pnode.name],
7060                                        self.cfg.GetVGName())[pnode.name]
7061       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7062       node_lvs = node_lvs.payload
7063       delta = all_lvs.difference(node_lvs.keys())
7064       if delta:
7065         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7066                                    utils.CommaJoin(delta),
7067                                    errors.ECODE_INVAL)
7068       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7069       if online_lvs:
7070         raise errors.OpPrereqError("Online logical volumes found, cannot"
7071                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7072                                    errors.ECODE_STATE)
7073       # update the size of disk based on what is found
7074       for dsk in self.disks:
7075         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7076
7077     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7078
7079     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7080     # check OS parameters (remotely)
7081     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7082
7083     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7084
7085     # memory check on primary node
7086     if self.op.start:
7087       _CheckNodeFreeMemory(self, self.pnode.name,
7088                            "creating instance %s" % self.op.instance_name,
7089                            self.be_full[constants.BE_MEMORY],
7090                            self.op.hypervisor)
7091
7092     self.dry_run_result = list(nodenames)
7093
7094   def Exec(self, feedback_fn):
7095     """Create and add the instance to the cluster.
7096
7097     """
7098     instance = self.op.instance_name
7099     pnode_name = self.pnode.name
7100
7101     ht_kind = self.op.hypervisor
7102     if ht_kind in constants.HTS_REQ_PORT:
7103       network_port = self.cfg.AllocatePort()
7104     else:
7105       network_port = None
7106
7107     if constants.ENABLE_FILE_STORAGE:
7108       # this is needed because os.path.join does not accept None arguments
7109       if self.op.file_storage_dir is None:
7110         string_file_storage_dir = ""
7111       else:
7112         string_file_storage_dir = self.op.file_storage_dir
7113
7114       # build the full file storage dir path
7115       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7116                                         string_file_storage_dir, instance)
7117     else:
7118       file_storage_dir = ""
7119
7120     disks = _GenerateDiskTemplate(self,
7121                                   self.op.disk_template,
7122                                   instance, pnode_name,
7123                                   self.secondaries,
7124                                   self.disks,
7125                                   file_storage_dir,
7126                                   self.op.file_driver,
7127                                   0)
7128
7129     iobj = objects.Instance(name=instance, os=self.op.os_type,
7130                             primary_node=pnode_name,
7131                             nics=self.nics, disks=disks,
7132                             disk_template=self.op.disk_template,
7133                             admin_up=False,
7134                             network_port=network_port,
7135                             beparams=self.op.beparams,
7136                             hvparams=self.op.hvparams,
7137                             hypervisor=self.op.hypervisor,
7138                             osparams=self.op.osparams,
7139                             )
7140
7141     if self.adopt_disks:
7142       # rename LVs to the newly-generated names; we need to construct
7143       # 'fake' LV disks with the old data, plus the new unique_id
7144       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7145       rename_to = []
7146       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7147         rename_to.append(t_dsk.logical_id)
7148         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7149         self.cfg.SetDiskID(t_dsk, pnode_name)
7150       result = self.rpc.call_blockdev_rename(pnode_name,
7151                                              zip(tmp_disks, rename_to))
7152       result.Raise("Failed to rename adoped LVs")
7153     else:
7154       feedback_fn("* creating instance disks...")
7155       try:
7156         _CreateDisks(self, iobj)
7157       except errors.OpExecError:
7158         self.LogWarning("Device creation failed, reverting...")
7159         try:
7160           _RemoveDisks(self, iobj)
7161         finally:
7162           self.cfg.ReleaseDRBDMinors(instance)
7163           raise
7164
7165     feedback_fn("adding instance %s to cluster config" % instance)
7166
7167     self.cfg.AddInstance(iobj, self.proc.GetECId())
7168
7169     # Declare that we don't want to remove the instance lock anymore, as we've
7170     # added the instance to the config
7171     del self.remove_locks[locking.LEVEL_INSTANCE]
7172     # Unlock all the nodes
7173     if self.op.mode == constants.INSTANCE_IMPORT:
7174       nodes_keep = [self.op.src_node]
7175       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7176                        if node != self.op.src_node]
7177       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7178       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7179     else:
7180       self.context.glm.release(locking.LEVEL_NODE)
7181       del self.acquired_locks[locking.LEVEL_NODE]
7182
7183     if self.op.wait_for_sync:
7184       disk_abort = not _WaitForSync(self, iobj)
7185     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7186       # make sure the disks are not degraded (still sync-ing is ok)
7187       time.sleep(15)
7188       feedback_fn("* checking mirrors status")
7189       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7190     else:
7191       disk_abort = False
7192
7193     if disk_abort:
7194       _RemoveDisks(self, iobj)
7195       self.cfg.RemoveInstance(iobj.name)
7196       # Make sure the instance lock gets removed
7197       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7198       raise errors.OpExecError("There are some degraded disks for"
7199                                " this instance")
7200
7201     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7202       if self.op.mode == constants.INSTANCE_CREATE:
7203         if not self.op.no_install:
7204           feedback_fn("* running the instance OS create scripts...")
7205           # FIXME: pass debug option from opcode to backend
7206           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7207                                                  self.op.debug_level)
7208           result.Raise("Could not add os for instance %s"
7209                        " on node %s" % (instance, pnode_name))
7210
7211       elif self.op.mode == constants.INSTANCE_IMPORT:
7212         feedback_fn("* running the instance OS import scripts...")
7213
7214         transfers = []
7215
7216         for idx, image in enumerate(self.src_images):
7217           if not image:
7218             continue
7219
7220           # FIXME: pass debug option from opcode to backend
7221           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7222                                              constants.IEIO_FILE, (image, ),
7223                                              constants.IEIO_SCRIPT,
7224                                              (iobj.disks[idx], idx),
7225                                              None)
7226           transfers.append(dt)
7227
7228         import_result = \
7229           masterd.instance.TransferInstanceData(self, feedback_fn,
7230                                                 self.op.src_node, pnode_name,
7231                                                 self.pnode.secondary_ip,
7232                                                 iobj, transfers)
7233         if not compat.all(import_result):
7234           self.LogWarning("Some disks for instance %s on node %s were not"
7235                           " imported successfully" % (instance, pnode_name))
7236
7237       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7238         feedback_fn("* preparing remote import...")
7239         connect_timeout = constants.RIE_CONNECT_TIMEOUT
7240         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7241
7242         disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7243                                                      self.source_x509_ca,
7244                                                      self._cds, timeouts)
7245         if not compat.all(disk_results):
7246           # TODO: Should the instance still be started, even if some disks
7247           # failed to import (valid for local imports, too)?
7248           self.LogWarning("Some disks for instance %s on node %s were not"
7249                           " imported successfully" % (instance, pnode_name))
7250
7251         # Run rename script on newly imported instance
7252         assert iobj.name == instance
7253         feedback_fn("Running rename script for %s" % instance)
7254         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7255                                                    self.source_instance_name,
7256                                                    self.op.debug_level)
7257         if result.fail_msg:
7258           self.LogWarning("Failed to run rename script for %s on node"
7259                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7260
7261       else:
7262         # also checked in the prereq part
7263         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7264                                      % self.op.mode)
7265
7266     if self.op.start:
7267       iobj.admin_up = True
7268       self.cfg.Update(iobj, feedback_fn)
7269       logging.info("Starting instance %s on node %s", instance, pnode_name)
7270       feedback_fn("* starting instance...")
7271       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7272       result.Raise("Could not start instance")
7273
7274     return list(iobj.all_nodes)
7275
7276
7277 class LUConnectConsole(NoHooksLU):
7278   """Connect to an instance's console.
7279
7280   This is somewhat special in that it returns the command line that
7281   you need to run on the master node in order to connect to the
7282   console.
7283
7284   """
7285   _OP_PARAMS = [
7286     _PInstanceName
7287     ]
7288   REQ_BGL = False
7289
7290   def ExpandNames(self):
7291     self._ExpandAndLockInstance()
7292
7293   def CheckPrereq(self):
7294     """Check prerequisites.
7295
7296     This checks that the instance is in the cluster.
7297
7298     """
7299     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7300     assert self.instance is not None, \
7301       "Cannot retrieve locked instance %s" % self.op.instance_name
7302     _CheckNodeOnline(self, self.instance.primary_node)
7303
7304   def Exec(self, feedback_fn):
7305     """Connect to the console of an instance
7306
7307     """
7308     instance = self.instance
7309     node = instance.primary_node
7310
7311     node_insts = self.rpc.call_instance_list([node],
7312                                              [instance.hypervisor])[node]
7313     node_insts.Raise("Can't get node information from %s" % node)
7314
7315     if instance.name not in node_insts.payload:
7316       raise errors.OpExecError("Instance %s is not running." % instance.name)
7317
7318     logging.debug("Connecting to console of %s on %s", instance.name, node)
7319
7320     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7321     cluster = self.cfg.GetClusterInfo()
7322     # beparams and hvparams are passed separately, to avoid editing the
7323     # instance and then saving the defaults in the instance itself.
7324     hvparams = cluster.FillHV(instance)
7325     beparams = cluster.FillBE(instance)
7326     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7327
7328     # build ssh cmdline
7329     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7330
7331
7332 class LUReplaceDisks(LogicalUnit):
7333   """Replace the disks of an instance.
7334
7335   """
7336   HPATH = "mirrors-replace"
7337   HTYPE = constants.HTYPE_INSTANCE
7338   _OP_PARAMS = [
7339     _PInstanceName,
7340     ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7341     ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7342     ("remote_node", None, _TMaybeString),
7343     ("iallocator", None, _TMaybeString),
7344     ("early_release", False, _TBool),
7345     ]
7346   REQ_BGL = False
7347
7348   def CheckArguments(self):
7349     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7350                                   self.op.iallocator)
7351
7352   def ExpandNames(self):
7353     self._ExpandAndLockInstance()
7354
7355     if self.op.iallocator is not None:
7356       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7357
7358     elif self.op.remote_node is not None:
7359       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7360       self.op.remote_node = remote_node
7361
7362       # Warning: do not remove the locking of the new secondary here
7363       # unless DRBD8.AddChildren is changed to work in parallel;
7364       # currently it doesn't since parallel invocations of
7365       # FindUnusedMinor will conflict
7366       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7367       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7368
7369     else:
7370       self.needed_locks[locking.LEVEL_NODE] = []
7371       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7372
7373     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7374                                    self.op.iallocator, self.op.remote_node,
7375                                    self.op.disks, False, self.op.early_release)
7376
7377     self.tasklets = [self.replacer]
7378
7379   def DeclareLocks(self, level):
7380     # If we're not already locking all nodes in the set we have to declare the
7381     # instance's primary/secondary nodes.
7382     if (level == locking.LEVEL_NODE and
7383         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7384       self._LockInstancesNodes()
7385
7386   def BuildHooksEnv(self):
7387     """Build hooks env.
7388
7389     This runs on the master, the primary and all the secondaries.
7390
7391     """
7392     instance = self.replacer.instance
7393     env = {
7394       "MODE": self.op.mode,
7395       "NEW_SECONDARY": self.op.remote_node,
7396       "OLD_SECONDARY": instance.secondary_nodes[0],
7397       }
7398     env.update(_BuildInstanceHookEnvByObject(self, instance))
7399     nl = [
7400       self.cfg.GetMasterNode(),
7401       instance.primary_node,
7402       ]
7403     if self.op.remote_node is not None:
7404       nl.append(self.op.remote_node)
7405     return env, nl, nl
7406
7407
7408 class TLReplaceDisks(Tasklet):
7409   """Replaces disks for an instance.
7410
7411   Note: Locking is not within the scope of this class.
7412
7413   """
7414   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7415                disks, delay_iallocator, early_release):
7416     """Initializes this class.
7417
7418     """
7419     Tasklet.__init__(self, lu)
7420
7421     # Parameters
7422     self.instance_name = instance_name
7423     self.mode = mode
7424     self.iallocator_name = iallocator_name
7425     self.remote_node = remote_node
7426     self.disks = disks
7427     self.delay_iallocator = delay_iallocator
7428     self.early_release = early_release
7429
7430     # Runtime data
7431     self.instance = None
7432     self.new_node = None
7433     self.target_node = None
7434     self.other_node = None
7435     self.remote_node_info = None
7436     self.node_secondary_ip = None
7437
7438   @staticmethod
7439   def CheckArguments(mode, remote_node, iallocator):
7440     """Helper function for users of this class.
7441
7442     """
7443     # check for valid parameter combination
7444     if mode == constants.REPLACE_DISK_CHG:
7445       if remote_node is None and iallocator is None:
7446         raise errors.OpPrereqError("When changing the secondary either an"
7447                                    " iallocator script must be used or the"
7448                                    " new node given", errors.ECODE_INVAL)
7449
7450       if remote_node is not None and iallocator is not None:
7451         raise errors.OpPrereqError("Give either the iallocator or the new"
7452                                    " secondary, not both", errors.ECODE_INVAL)
7453
7454     elif remote_node is not None or iallocator is not None:
7455       # Not replacing the secondary
7456       raise errors.OpPrereqError("The iallocator and new node options can"
7457                                  " only be used when changing the"
7458                                  " secondary node", errors.ECODE_INVAL)
7459
7460   @staticmethod
7461   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7462     """Compute a new secondary node using an IAllocator.
7463
7464     """
7465     ial = IAllocator(lu.cfg, lu.rpc,
7466                      mode=constants.IALLOCATOR_MODE_RELOC,
7467                      name=instance_name,
7468                      relocate_from=relocate_from)
7469
7470     ial.Run(iallocator_name)
7471
7472     if not ial.success:
7473       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7474                                  " %s" % (iallocator_name, ial.info),
7475                                  errors.ECODE_NORES)
7476
7477     if len(ial.result) != ial.required_nodes:
7478       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7479                                  " of nodes (%s), required %s" %
7480                                  (iallocator_name,
7481                                   len(ial.result), ial.required_nodes),
7482                                  errors.ECODE_FAULT)
7483
7484     remote_node_name = ial.result[0]
7485
7486     lu.LogInfo("Selected new secondary for instance '%s': %s",
7487                instance_name, remote_node_name)
7488
7489     return remote_node_name
7490
7491   def _FindFaultyDisks(self, node_name):
7492     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7493                                     node_name, True)
7494
7495   def CheckPrereq(self):
7496     """Check prerequisites.
7497
7498     This checks that the instance is in the cluster.
7499
7500     """
7501     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7502     assert instance is not None, \
7503       "Cannot retrieve locked instance %s" % self.instance_name
7504
7505     if instance.disk_template != constants.DT_DRBD8:
7506       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7507                                  " instances", errors.ECODE_INVAL)
7508
7509     if len(instance.secondary_nodes) != 1:
7510       raise errors.OpPrereqError("The instance has a strange layout,"
7511                                  " expected one secondary but found %d" %
7512                                  len(instance.secondary_nodes),
7513                                  errors.ECODE_FAULT)
7514
7515     if not self.delay_iallocator:
7516       self._CheckPrereq2()
7517
7518   def _CheckPrereq2(self):
7519     """Check prerequisites, second part.
7520
7521     This function should always be part of CheckPrereq. It was separated and is
7522     now called from Exec because during node evacuation iallocator was only
7523     called with an unmodified cluster model, not taking planned changes into
7524     account.
7525
7526     """
7527     instance = self.instance
7528     secondary_node = instance.secondary_nodes[0]
7529
7530     if self.iallocator_name is None:
7531       remote_node = self.remote_node
7532     else:
7533       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7534                                        instance.name, instance.secondary_nodes)
7535
7536     if remote_node is not None:
7537       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7538       assert self.remote_node_info is not None, \
7539         "Cannot retrieve locked node %s" % remote_node
7540     else:
7541       self.remote_node_info = None
7542
7543     if remote_node == self.instance.primary_node:
7544       raise errors.OpPrereqError("The specified node is the primary node of"
7545                                  " the instance.", errors.ECODE_INVAL)
7546
7547     if remote_node == secondary_node:
7548       raise errors.OpPrereqError("The specified node is already the"
7549                                  " secondary node of the instance.",
7550                                  errors.ECODE_INVAL)
7551
7552     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7553                                     constants.REPLACE_DISK_CHG):
7554       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7555                                  errors.ECODE_INVAL)
7556
7557     if self.mode == constants.REPLACE_DISK_AUTO:
7558       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7559       faulty_secondary = self._FindFaultyDisks(secondary_node)
7560
7561       if faulty_primary and faulty_secondary:
7562         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7563                                    " one node and can not be repaired"
7564                                    " automatically" % self.instance_name,
7565                                    errors.ECODE_STATE)
7566
7567       if faulty_primary:
7568         self.disks = faulty_primary
7569         self.target_node = instance.primary_node
7570         self.other_node = secondary_node
7571         check_nodes = [self.target_node, self.other_node]
7572       elif faulty_secondary:
7573         self.disks = faulty_secondary
7574         self.target_node = secondary_node
7575         self.other_node = instance.primary_node
7576         check_nodes = [self.target_node, self.other_node]
7577       else:
7578         self.disks = []
7579         check_nodes = []
7580
7581     else:
7582       # Non-automatic modes
7583       if self.mode == constants.REPLACE_DISK_PRI:
7584         self.target_node = instance.primary_node
7585         self.other_node = secondary_node
7586         check_nodes = [self.target_node, self.other_node]
7587
7588       elif self.mode == constants.REPLACE_DISK_SEC:
7589         self.target_node = secondary_node
7590         self.other_node = instance.primary_node
7591         check_nodes = [self.target_node, self.other_node]
7592
7593       elif self.mode == constants.REPLACE_DISK_CHG:
7594         self.new_node = remote_node
7595         self.other_node = instance.primary_node
7596         self.target_node = secondary_node
7597         check_nodes = [self.new_node, self.other_node]
7598
7599         _CheckNodeNotDrained(self.lu, remote_node)
7600
7601         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7602         assert old_node_info is not None
7603         if old_node_info.offline and not self.early_release:
7604           # doesn't make sense to delay the release
7605           self.early_release = True
7606           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7607                           " early-release mode", secondary_node)
7608
7609       else:
7610         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7611                                      self.mode)
7612
7613       # If not specified all disks should be replaced
7614       if not self.disks:
7615         self.disks = range(len(self.instance.disks))
7616
7617     for node in check_nodes:
7618       _CheckNodeOnline(self.lu, node)
7619
7620     # Check whether disks are valid
7621     for disk_idx in self.disks:
7622       instance.FindDisk(disk_idx)
7623
7624     # Get secondary node IP addresses
7625     node_2nd_ip = {}
7626
7627     for node_name in [self.target_node, self.other_node, self.new_node]:
7628       if node_name is not None:
7629         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7630
7631     self.node_secondary_ip = node_2nd_ip
7632
7633   def Exec(self, feedback_fn):
7634     """Execute disk replacement.
7635
7636     This dispatches the disk replacement to the appropriate handler.
7637
7638     """
7639     if self.delay_iallocator:
7640       self._CheckPrereq2()
7641
7642     if not self.disks:
7643       feedback_fn("No disks need replacement")
7644       return
7645
7646     feedback_fn("Replacing disk(s) %s for %s" %
7647                 (utils.CommaJoin(self.disks), self.instance.name))
7648
7649     activate_disks = (not self.instance.admin_up)
7650
7651     # Activate the instance disks if we're replacing them on a down instance
7652     if activate_disks:
7653       _StartInstanceDisks(self.lu, self.instance, True)
7654
7655     try:
7656       # Should we replace the secondary node?
7657       if self.new_node is not None:
7658         fn = self._ExecDrbd8Secondary
7659       else:
7660         fn = self._ExecDrbd8DiskOnly
7661
7662       return fn(feedback_fn)
7663
7664     finally:
7665       # Deactivate the instance disks if we're replacing them on a
7666       # down instance
7667       if activate_disks:
7668         _SafeShutdownInstanceDisks(self.lu, self.instance)
7669
7670   def _CheckVolumeGroup(self, nodes):
7671     self.lu.LogInfo("Checking volume groups")
7672
7673     vgname = self.cfg.GetVGName()
7674
7675     # Make sure volume group exists on all involved nodes
7676     results = self.rpc.call_vg_list(nodes)
7677     if not results:
7678       raise errors.OpExecError("Can't list volume groups on the nodes")
7679
7680     for node in nodes:
7681       res = results[node]
7682       res.Raise("Error checking node %s" % node)
7683       if vgname not in res.payload:
7684         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7685                                  (vgname, node))
7686
7687   def _CheckDisksExistence(self, nodes):
7688     # Check disk existence
7689     for idx, dev in enumerate(self.instance.disks):
7690       if idx not in self.disks:
7691         continue
7692
7693       for node in nodes:
7694         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7695         self.cfg.SetDiskID(dev, node)
7696
7697         result = self.rpc.call_blockdev_find(node, dev)
7698
7699         msg = result.fail_msg
7700         if msg or not result.payload:
7701           if not msg:
7702             msg = "disk not found"
7703           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7704                                    (idx, node, msg))
7705
7706   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7707     for idx, dev in enumerate(self.instance.disks):
7708       if idx not in self.disks:
7709         continue
7710
7711       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7712                       (idx, node_name))
7713
7714       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7715                                    ldisk=ldisk):
7716         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7717                                  " replace disks for instance %s" %
7718                                  (node_name, self.instance.name))
7719
7720   def _CreateNewStorage(self, node_name):
7721     vgname = self.cfg.GetVGName()
7722     iv_names = {}
7723
7724     for idx, dev in enumerate(self.instance.disks):
7725       if idx not in self.disks:
7726         continue
7727
7728       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7729
7730       self.cfg.SetDiskID(dev, node_name)
7731
7732       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7733       names = _GenerateUniqueNames(self.lu, lv_names)
7734
7735       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7736                              logical_id=(vgname, names[0]))
7737       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7738                              logical_id=(vgname, names[1]))
7739
7740       new_lvs = [lv_data, lv_meta]
7741       old_lvs = dev.children
7742       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7743
7744       # we pass force_create=True to force the LVM creation
7745       for new_lv in new_lvs:
7746         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7747                         _GetInstanceInfoText(self.instance), False)
7748
7749     return iv_names
7750
7751   def _CheckDevices(self, node_name, iv_names):
7752     for name, (dev, _, _) in iv_names.iteritems():
7753       self.cfg.SetDiskID(dev, node_name)
7754
7755       result = self.rpc.call_blockdev_find(node_name, dev)
7756
7757       msg = result.fail_msg
7758       if msg or not result.payload:
7759         if not msg:
7760           msg = "disk not found"
7761         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7762                                  (name, msg))
7763
7764       if result.payload.is_degraded:
7765         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7766
7767   def _RemoveOldStorage(self, node_name, iv_names):
7768     for name, (_, old_lvs, _) in iv_names.iteritems():
7769       self.lu.LogInfo("Remove logical volumes for %s" % name)
7770
7771       for lv in old_lvs:
7772         self.cfg.SetDiskID(lv, node_name)
7773
7774         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7775         if msg:
7776           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7777                              hint="remove unused LVs manually")
7778
7779   def _ReleaseNodeLock(self, node_name):
7780     """Releases the lock for a given node."""
7781     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7782
7783   def _ExecDrbd8DiskOnly(self, feedback_fn):
7784     """Replace a disk on the primary or secondary for DRBD 8.
7785
7786     The algorithm for replace is quite complicated:
7787
7788       1. for each disk to be replaced:
7789
7790         1. create new LVs on the target node with unique names
7791         1. detach old LVs from the drbd device
7792         1. rename old LVs to name_replaced.<time_t>
7793         1. rename new LVs to old LVs
7794         1. attach the new LVs (with the old names now) to the drbd device
7795
7796       1. wait for sync across all devices
7797
7798       1. for each modified disk:
7799
7800         1. remove old LVs (which have the name name_replaces.<time_t>)
7801
7802     Failures are not very well handled.
7803
7804     """
7805     steps_total = 6
7806
7807     # Step: check device activation
7808     self.lu.LogStep(1, steps_total, "Check device existence")
7809     self._CheckDisksExistence([self.other_node, self.target_node])
7810     self._CheckVolumeGroup([self.target_node, self.other_node])
7811
7812     # Step: check other node consistency
7813     self.lu.LogStep(2, steps_total, "Check peer consistency")
7814     self._CheckDisksConsistency(self.other_node,
7815                                 self.other_node == self.instance.primary_node,
7816                                 False)
7817
7818     # Step: create new storage
7819     self.lu.LogStep(3, steps_total, "Allocate new storage")
7820     iv_names = self._CreateNewStorage(self.target_node)
7821
7822     # Step: for each lv, detach+rename*2+attach
7823     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7824     for dev, old_lvs, new_lvs in iv_names.itervalues():
7825       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7826
7827       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7828                                                      old_lvs)
7829       result.Raise("Can't detach drbd from local storage on node"
7830                    " %s for device %s" % (self.target_node, dev.iv_name))
7831       #dev.children = []
7832       #cfg.Update(instance)
7833
7834       # ok, we created the new LVs, so now we know we have the needed
7835       # storage; as such, we proceed on the target node to rename
7836       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7837       # using the assumption that logical_id == physical_id (which in
7838       # turn is the unique_id on that node)
7839
7840       # FIXME(iustin): use a better name for the replaced LVs
7841       temp_suffix = int(time.time())
7842       ren_fn = lambda d, suff: (d.physical_id[0],
7843                                 d.physical_id[1] + "_replaced-%s" % suff)
7844
7845       # Build the rename list based on what LVs exist on the node
7846       rename_old_to_new = []
7847       for to_ren in old_lvs:
7848         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7849         if not result.fail_msg and result.payload:
7850           # device exists
7851           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7852
7853       self.lu.LogInfo("Renaming the old LVs on the target node")
7854       result = self.rpc.call_blockdev_rename(self.target_node,
7855                                              rename_old_to_new)
7856       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7857
7858       # Now we rename the new LVs to the old LVs
7859       self.lu.LogInfo("Renaming the new LVs on the target node")
7860       rename_new_to_old = [(new, old.physical_id)
7861                            for old, new in zip(old_lvs, new_lvs)]
7862       result = self.rpc.call_blockdev_rename(self.target_node,
7863                                              rename_new_to_old)
7864       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7865
7866       for old, new in zip(old_lvs, new_lvs):
7867         new.logical_id = old.logical_id
7868         self.cfg.SetDiskID(new, self.target_node)
7869
7870       for disk in old_lvs:
7871         disk.logical_id = ren_fn(disk, temp_suffix)
7872         self.cfg.SetDiskID(disk, self.target_node)
7873
7874       # Now that the new lvs have the old name, we can add them to the device
7875       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7876       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7877                                                   new_lvs)
7878       msg = result.fail_msg
7879       if msg:
7880         for new_lv in new_lvs:
7881           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7882                                                new_lv).fail_msg
7883           if msg2:
7884             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7885                                hint=("cleanup manually the unused logical"
7886                                      "volumes"))
7887         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7888
7889       dev.children = new_lvs
7890
7891       self.cfg.Update(self.instance, feedback_fn)
7892
7893     cstep = 5
7894     if self.early_release:
7895       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7896       cstep += 1
7897       self._RemoveOldStorage(self.target_node, iv_names)
7898       # WARNING: we release both node locks here, do not do other RPCs
7899       # than WaitForSync to the primary node
7900       self._ReleaseNodeLock([self.target_node, self.other_node])
7901
7902     # Wait for sync
7903     # This can fail as the old devices are degraded and _WaitForSync
7904     # does a combined result over all disks, so we don't check its return value
7905     self.lu.LogStep(cstep, steps_total, "Sync devices")
7906     cstep += 1
7907     _WaitForSync(self.lu, self.instance)
7908
7909     # Check all devices manually
7910     self._CheckDevices(self.instance.primary_node, iv_names)
7911
7912     # Step: remove old storage
7913     if not self.early_release:
7914       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7915       cstep += 1
7916       self._RemoveOldStorage(self.target_node, iv_names)
7917
7918   def _ExecDrbd8Secondary(self, feedback_fn):
7919     """Replace the secondary node for DRBD 8.
7920
7921     The algorithm for replace is quite complicated:
7922       - for all disks of the instance:
7923         - create new LVs on the new node with same names
7924         - shutdown the drbd device on the old secondary
7925         - disconnect the drbd network on the primary
7926         - create the drbd device on the new secondary
7927         - network attach the drbd on the primary, using an artifice:
7928           the drbd code for Attach() will connect to the network if it
7929           finds a device which is connected to the good local disks but
7930           not network enabled
7931       - wait for sync across all devices
7932       - remove all disks from the old secondary
7933
7934     Failures are not very well handled.
7935
7936     """
7937     steps_total = 6
7938
7939     # Step: check device activation
7940     self.lu.LogStep(1, steps_total, "Check device existence")
7941     self._CheckDisksExistence([self.instance.primary_node])
7942     self._CheckVolumeGroup([self.instance.primary_node])
7943
7944     # Step: check other node consistency
7945     self.lu.LogStep(2, steps_total, "Check peer consistency")
7946     self._CheckDisksConsistency(self.instance.primary_node, True, True)
7947
7948     # Step: create new storage
7949     self.lu.LogStep(3, steps_total, "Allocate new storage")
7950     for idx, dev in enumerate(self.instance.disks):
7951       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7952                       (self.new_node, idx))
7953       # we pass force_create=True to force LVM creation
7954       for new_lv in dev.children:
7955         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7956                         _GetInstanceInfoText(self.instance), False)
7957
7958     # Step 4: dbrd minors and drbd setups changes
7959     # after this, we must manually remove the drbd minors on both the
7960     # error and the success paths
7961     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7962     minors = self.cfg.AllocateDRBDMinor([self.new_node
7963                                          for dev in self.instance.disks],
7964                                         self.instance.name)
7965     logging.debug("Allocated minors %r", minors)
7966
7967     iv_names = {}
7968     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7969       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7970                       (self.new_node, idx))
7971       # create new devices on new_node; note that we create two IDs:
7972       # one without port, so the drbd will be activated without
7973       # networking information on the new node at this stage, and one
7974       # with network, for the latter activation in step 4
7975       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7976       if self.instance.primary_node == o_node1:
7977         p_minor = o_minor1
7978       else:
7979         assert self.instance.primary_node == o_node2, "Three-node instance?"
7980         p_minor = o_minor2
7981
7982       new_alone_id = (self.instance.primary_node, self.new_node, None,
7983                       p_minor, new_minor, o_secret)
7984       new_net_id = (self.instance.primary_node, self.new_node, o_port,
7985                     p_minor, new_minor, o_secret)
7986
7987       iv_names[idx] = (dev, dev.children, new_net_id)
7988       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7989                     new_net_id)
7990       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7991                               logical_id=new_alone_id,
7992                               children=dev.children,
7993                               size=dev.size)
7994       try:
7995         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7996                               _GetInstanceInfoText(self.instance), False)
7997       except errors.GenericError:
7998         self.cfg.ReleaseDRBDMinors(self.instance.name)
7999         raise
8000
8001     # We have new devices, shutdown the drbd on the old secondary
8002     for idx, dev in enumerate(self.instance.disks):
8003       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8004       self.cfg.SetDiskID(dev, self.target_node)
8005       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8006       if msg:
8007         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8008                            "node: %s" % (idx, msg),
8009                            hint=("Please cleanup this device manually as"
8010                                  " soon as possible"))
8011
8012     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8013     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8014                                                self.node_secondary_ip,
8015                                                self.instance.disks)\
8016                                               [self.instance.primary_node]
8017
8018     msg = result.fail_msg
8019     if msg:
8020       # detaches didn't succeed (unlikely)
8021       self.cfg.ReleaseDRBDMinors(self.instance.name)
8022       raise errors.OpExecError("Can't detach the disks from the network on"
8023                                " old node: %s" % (msg,))
8024
8025     # if we managed to detach at least one, we update all the disks of
8026     # the instance to point to the new secondary
8027     self.lu.LogInfo("Updating instance configuration")
8028     for dev, _, new_logical_id in iv_names.itervalues():
8029       dev.logical_id = new_logical_id
8030       self.cfg.SetDiskID(dev, self.instance.primary_node)
8031
8032     self.cfg.Update(self.instance, feedback_fn)
8033
8034     # and now perform the drbd attach
8035     self.lu.LogInfo("Attaching primary drbds to new secondary"
8036                     " (standalone => connected)")
8037     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8038                                             self.new_node],
8039                                            self.node_secondary_ip,
8040                                            self.instance.disks,
8041                                            self.instance.name,
8042                                            False)
8043     for to_node, to_result in result.items():
8044       msg = to_result.fail_msg
8045       if msg:
8046         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8047                            to_node, msg,
8048                            hint=("please do a gnt-instance info to see the"
8049                                  " status of disks"))
8050     cstep = 5
8051     if self.early_release:
8052       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8053       cstep += 1
8054       self._RemoveOldStorage(self.target_node, iv_names)
8055       # WARNING: we release all node locks here, do not do other RPCs
8056       # than WaitForSync to the primary node
8057       self._ReleaseNodeLock([self.instance.primary_node,
8058                              self.target_node,
8059                              self.new_node])
8060
8061     # Wait for sync
8062     # This can fail as the old devices are degraded and _WaitForSync
8063     # does a combined result over all disks, so we don't check its return value
8064     self.lu.LogStep(cstep, steps_total, "Sync devices")
8065     cstep += 1
8066     _WaitForSync(self.lu, self.instance)
8067
8068     # Check all devices manually
8069     self._CheckDevices(self.instance.primary_node, iv_names)
8070
8071     # Step: remove old storage
8072     if not self.early_release:
8073       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8074       self._RemoveOldStorage(self.target_node, iv_names)
8075
8076
8077 class LURepairNodeStorage(NoHooksLU):
8078   """Repairs the volume group on a node.
8079
8080   """
8081   _OP_PARAMS = [
8082     _PNodeName,
8083     ("storage_type", _NoDefault, _CheckStorageType),
8084     ("name", _NoDefault, _TNonEmptyString),
8085     ("ignore_consistency", False, _TBool),
8086     ]
8087   REQ_BGL = False
8088
8089   def CheckArguments(self):
8090     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8091
8092     storage_type = self.op.storage_type
8093
8094     if (constants.SO_FIX_CONSISTENCY not in
8095         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8096       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8097                                  " repaired" % storage_type,
8098                                  errors.ECODE_INVAL)
8099
8100   def ExpandNames(self):
8101     self.needed_locks = {
8102       locking.LEVEL_NODE: [self.op.node_name],
8103       }
8104
8105   def _CheckFaultyDisks(self, instance, node_name):
8106     """Ensure faulty disks abort the opcode or at least warn."""
8107     try:
8108       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8109                                   node_name, True):
8110         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8111                                    " node '%s'" % (instance.name, node_name),
8112                                    errors.ECODE_STATE)
8113     except errors.OpPrereqError, err:
8114       if self.op.ignore_consistency:
8115         self.proc.LogWarning(str(err.args[0]))
8116       else:
8117         raise
8118
8119   def CheckPrereq(self):
8120     """Check prerequisites.
8121
8122     """
8123     # Check whether any instance on this node has faulty disks
8124     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8125       if not inst.admin_up:
8126         continue
8127       check_nodes = set(inst.all_nodes)
8128       check_nodes.discard(self.op.node_name)
8129       for inst_node_name in check_nodes:
8130         self._CheckFaultyDisks(inst, inst_node_name)
8131
8132   def Exec(self, feedback_fn):
8133     feedback_fn("Repairing storage unit '%s' on %s ..." %
8134                 (self.op.name, self.op.node_name))
8135
8136     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8137     result = self.rpc.call_storage_execute(self.op.node_name,
8138                                            self.op.storage_type, st_args,
8139                                            self.op.name,
8140                                            constants.SO_FIX_CONSISTENCY)
8141     result.Raise("Failed to repair storage unit '%s' on %s" %
8142                  (self.op.name, self.op.node_name))
8143
8144
8145 class LUNodeEvacuationStrategy(NoHooksLU):
8146   """Computes the node evacuation strategy.
8147
8148   """
8149   _OP_PARAMS = [
8150     ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8151     ("remote_node", None, _TMaybeString),
8152     ("iallocator", None, _TMaybeString),
8153     ]
8154   REQ_BGL = False
8155
8156   def CheckArguments(self):
8157     if self.op.remote_node is not None and self.op.iallocator is not None:
8158       raise errors.OpPrereqError("Give either the iallocator or the new"
8159                                  " secondary, not both", errors.ECODE_INVAL)
8160
8161   def ExpandNames(self):
8162     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8163     self.needed_locks = locks = {}
8164     if self.op.remote_node is None:
8165       locks[locking.LEVEL_NODE] = locking.ALL_SET
8166     else:
8167       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8168       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8169
8170   def Exec(self, feedback_fn):
8171     if self.op.remote_node is not None:
8172       instances = []
8173       for node in self.op.nodes:
8174         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8175       result = []
8176       for i in instances:
8177         if i.primary_node == self.op.remote_node:
8178           raise errors.OpPrereqError("Node %s is the primary node of"
8179                                      " instance %s, cannot use it as"
8180                                      " secondary" %
8181                                      (self.op.remote_node, i.name),
8182                                      errors.ECODE_INVAL)
8183         result.append([i.name, self.op.remote_node])
8184     else:
8185       ial = IAllocator(self.cfg, self.rpc,
8186                        mode=constants.IALLOCATOR_MODE_MEVAC,
8187                        evac_nodes=self.op.nodes)
8188       ial.Run(self.op.iallocator, validate=True)
8189       if not ial.success:
8190         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8191                                  errors.ECODE_NORES)
8192       result = ial.result
8193     return result
8194
8195
8196 class LUGrowDisk(LogicalUnit):
8197   """Grow a disk of an instance.
8198
8199   """
8200   HPATH = "disk-grow"
8201   HTYPE = constants.HTYPE_INSTANCE
8202   _OP_PARAMS = [
8203     _PInstanceName,
8204     ("disk", _NoDefault, _TInt),
8205     ("amount", _NoDefault, _TInt),
8206     ("wait_for_sync", True, _TBool),
8207     ]
8208   REQ_BGL = False
8209
8210   def ExpandNames(self):
8211     self._ExpandAndLockInstance()
8212     self.needed_locks[locking.LEVEL_NODE] = []
8213     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8214
8215   def DeclareLocks(self, level):
8216     if level == locking.LEVEL_NODE:
8217       self._LockInstancesNodes()
8218
8219   def BuildHooksEnv(self):
8220     """Build hooks env.
8221
8222     This runs on the master, the primary and all the secondaries.
8223
8224     """
8225     env = {
8226       "DISK": self.op.disk,
8227       "AMOUNT": self.op.amount,
8228       }
8229     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8230     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8231     return env, nl, nl
8232
8233   def CheckPrereq(self):
8234     """Check prerequisites.
8235
8236     This checks that the instance is in the cluster.
8237
8238     """
8239     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8240     assert instance is not None, \
8241       "Cannot retrieve locked instance %s" % self.op.instance_name
8242     nodenames = list(instance.all_nodes)
8243     for node in nodenames:
8244       _CheckNodeOnline(self, node)
8245
8246     self.instance = instance
8247
8248     if instance.disk_template not in constants.DTS_GROWABLE:
8249       raise errors.OpPrereqError("Instance's disk layout does not support"
8250                                  " growing.", errors.ECODE_INVAL)
8251
8252     self.disk = instance.FindDisk(self.op.disk)
8253
8254     if instance.disk_template != constants.DT_FILE:
8255       # TODO: check the free disk space for file, when that feature will be
8256       # supported
8257       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8258
8259   def Exec(self, feedback_fn):
8260     """Execute disk grow.
8261
8262     """
8263     instance = self.instance
8264     disk = self.disk
8265
8266     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8267     if not disks_ok:
8268       raise errors.OpExecError("Cannot activate block device to grow")
8269
8270     for node in instance.all_nodes:
8271       self.cfg.SetDiskID(disk, node)
8272       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8273       result.Raise("Grow request failed to node %s" % node)
8274
8275       # TODO: Rewrite code to work properly
8276       # DRBD goes into sync mode for a short amount of time after executing the
8277       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8278       # calling "resize" in sync mode fails. Sleeping for a short amount of
8279       # time is a work-around.
8280       time.sleep(5)
8281
8282     disk.RecordGrow(self.op.amount)
8283     self.cfg.Update(instance, feedback_fn)
8284     if self.op.wait_for_sync:
8285       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8286       if disk_abort:
8287         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8288                              " status.\nPlease check the instance.")
8289       if not instance.admin_up:
8290         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8291     elif not instance.admin_up:
8292       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8293                            " not supposed to be running because no wait for"
8294                            " sync mode was requested.")
8295
8296
8297 class LUQueryInstanceData(NoHooksLU):
8298   """Query runtime instance data.
8299
8300   """
8301   _OP_PARAMS = [
8302     ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8303     ("static", False, _TBool),
8304     ]
8305   REQ_BGL = False
8306
8307   def ExpandNames(self):
8308     self.needed_locks = {}
8309     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8310
8311     if self.op.instances:
8312       self.wanted_names = []
8313       for name in self.op.instances:
8314         full_name = _ExpandInstanceName(self.cfg, name)
8315         self.wanted_names.append(full_name)
8316       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8317     else:
8318       self.wanted_names = None
8319       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8320
8321     self.needed_locks[locking.LEVEL_NODE] = []
8322     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8323
8324   def DeclareLocks(self, level):
8325     if level == locking.LEVEL_NODE:
8326       self._LockInstancesNodes()
8327
8328   def CheckPrereq(self):
8329     """Check prerequisites.
8330
8331     This only checks the optional instance list against the existing names.
8332
8333     """
8334     if self.wanted_names is None:
8335       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8336
8337     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8338                              in self.wanted_names]
8339
8340   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8341     """Returns the status of a block device
8342
8343     """
8344     if self.op.static or not node:
8345       return None
8346
8347     self.cfg.SetDiskID(dev, node)
8348
8349     result = self.rpc.call_blockdev_find(node, dev)
8350     if result.offline:
8351       return None
8352
8353     result.Raise("Can't compute disk status for %s" % instance_name)
8354
8355     status = result.payload
8356     if status is None:
8357       return None
8358
8359     return (status.dev_path, status.major, status.minor,
8360             status.sync_percent, status.estimated_time,
8361             status.is_degraded, status.ldisk_status)
8362
8363   def _ComputeDiskStatus(self, instance, snode, dev):
8364     """Compute block device status.
8365
8366     """
8367     if dev.dev_type in constants.LDS_DRBD:
8368       # we change the snode then (otherwise we use the one passed in)
8369       if dev.logical_id[0] == instance.primary_node:
8370         snode = dev.logical_id[1]
8371       else:
8372         snode = dev.logical_id[0]
8373
8374     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8375                                               instance.name, dev)
8376     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8377
8378     if dev.children:
8379       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8380                       for child in dev.children]
8381     else:
8382       dev_children = []
8383
8384     data = {
8385       "iv_name": dev.iv_name,
8386       "dev_type": dev.dev_type,
8387       "logical_id": dev.logical_id,
8388       "physical_id": dev.physical_id,
8389       "pstatus": dev_pstatus,
8390       "sstatus": dev_sstatus,
8391       "children": dev_children,
8392       "mode": dev.mode,
8393       "size": dev.size,
8394       }
8395
8396     return data
8397
8398   def Exec(self, feedback_fn):
8399     """Gather and return data"""
8400     result = {}
8401
8402     cluster = self.cfg.GetClusterInfo()
8403
8404     for instance in self.wanted_instances:
8405       if not self.op.static:
8406         remote_info = self.rpc.call_instance_info(instance.primary_node,
8407                                                   instance.name,
8408                                                   instance.hypervisor)
8409         remote_info.Raise("Error checking node %s" % instance.primary_node)
8410         remote_info = remote_info.payload
8411         if remote_info and "state" in remote_info:
8412           remote_state = "up"
8413         else:
8414           remote_state = "down"
8415       else:
8416         remote_state = None
8417       if instance.admin_up:
8418         config_state = "up"
8419       else:
8420         config_state = "down"
8421
8422       disks = [self._ComputeDiskStatus(instance, None, device)
8423                for device in instance.disks]
8424
8425       idict = {
8426         "name": instance.name,
8427         "config_state": config_state,
8428         "run_state": remote_state,
8429         "pnode": instance.primary_node,
8430         "snodes": instance.secondary_nodes,
8431         "os": instance.os,
8432         # this happens to be the same format used for hooks
8433         "nics": _NICListToTuple(self, instance.nics),
8434         "disk_template": instance.disk_template,
8435         "disks": disks,
8436         "hypervisor": instance.hypervisor,
8437         "network_port": instance.network_port,
8438         "hv_instance": instance.hvparams,
8439         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8440         "be_instance": instance.beparams,
8441         "be_actual": cluster.FillBE(instance),
8442         "os_instance": instance.osparams,
8443         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8444         "serial_no": instance.serial_no,
8445         "mtime": instance.mtime,
8446         "ctime": instance.ctime,
8447         "uuid": instance.uuid,
8448         }
8449
8450       result[instance.name] = idict
8451
8452     return result
8453
8454
8455 class LUSetInstanceParams(LogicalUnit):
8456   """Modifies an instances's parameters.
8457
8458   """
8459   HPATH = "instance-modify"
8460   HTYPE = constants.HTYPE_INSTANCE
8461   _OP_PARAMS = [
8462     _PInstanceName,
8463     ("nics", _EmptyList, _TList),
8464     ("disks", _EmptyList, _TList),
8465     ("beparams", _EmptyDict, _TDict),
8466     ("hvparams", _EmptyDict, _TDict),
8467     ("disk_template", None, _TMaybeString),
8468     ("remote_node", None, _TMaybeString),
8469     ("os_name", None, _TMaybeString),
8470     ("force_variant", False, _TBool),
8471     ("osparams", None, _TOr(_TDict, _TNone)),
8472     _PForce,
8473     ]
8474   REQ_BGL = False
8475
8476   def CheckArguments(self):
8477     if not (self.op.nics or self.op.disks or self.op.disk_template or
8478             self.op.hvparams or self.op.beparams or self.op.os_name):
8479       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8480
8481     if self.op.hvparams:
8482       _CheckGlobalHvParams(self.op.hvparams)
8483
8484     # Disk validation
8485     disk_addremove = 0
8486     for disk_op, disk_dict in self.op.disks:
8487       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8488       if disk_op == constants.DDM_REMOVE:
8489         disk_addremove += 1
8490         continue
8491       elif disk_op == constants.DDM_ADD:
8492         disk_addremove += 1
8493       else:
8494         if not isinstance(disk_op, int):
8495           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8496         if not isinstance(disk_dict, dict):
8497           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8498           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8499
8500       if disk_op == constants.DDM_ADD:
8501         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8502         if mode not in constants.DISK_ACCESS_SET:
8503           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8504                                      errors.ECODE_INVAL)
8505         size = disk_dict.get('size', None)
8506         if size is None:
8507           raise errors.OpPrereqError("Required disk parameter size missing",
8508                                      errors.ECODE_INVAL)
8509         try:
8510           size = int(size)
8511         except (TypeError, ValueError), err:
8512           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8513                                      str(err), errors.ECODE_INVAL)
8514         disk_dict['size'] = size
8515       else:
8516         # modification of disk
8517         if 'size' in disk_dict:
8518           raise errors.OpPrereqError("Disk size change not possible, use"
8519                                      " grow-disk", errors.ECODE_INVAL)
8520
8521     if disk_addremove > 1:
8522       raise errors.OpPrereqError("Only one disk add or remove operation"
8523                                  " supported at a time", errors.ECODE_INVAL)
8524
8525     if self.op.disks and self.op.disk_template is not None:
8526       raise errors.OpPrereqError("Disk template conversion and other disk"
8527                                  " changes not supported at the same time",
8528                                  errors.ECODE_INVAL)
8529
8530     if self.op.disk_template:
8531       _CheckDiskTemplate(self.op.disk_template)
8532       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8533           self.op.remote_node is None):
8534         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8535                                    " one requires specifying a secondary node",
8536                                    errors.ECODE_INVAL)
8537
8538     # NIC validation
8539     nic_addremove = 0
8540     for nic_op, nic_dict in self.op.nics:
8541       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8542       if nic_op == constants.DDM_REMOVE:
8543         nic_addremove += 1
8544         continue
8545       elif nic_op == constants.DDM_ADD:
8546         nic_addremove += 1
8547       else:
8548         if not isinstance(nic_op, int):
8549           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8550         if not isinstance(nic_dict, dict):
8551           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8552           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8553
8554       # nic_dict should be a dict
8555       nic_ip = nic_dict.get('ip', None)
8556       if nic_ip is not None:
8557         if nic_ip.lower() == constants.VALUE_NONE:
8558           nic_dict['ip'] = None
8559         else:
8560           if not utils.IsValidIP4(nic_ip):
8561             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8562                                        errors.ECODE_INVAL)
8563
8564       nic_bridge = nic_dict.get('bridge', None)
8565       nic_link = nic_dict.get('link', None)
8566       if nic_bridge and nic_link:
8567         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8568                                    " at the same time", errors.ECODE_INVAL)
8569       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8570         nic_dict['bridge'] = None
8571       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8572         nic_dict['link'] = None
8573
8574       if nic_op == constants.DDM_ADD:
8575         nic_mac = nic_dict.get('mac', None)
8576         if nic_mac is None:
8577           nic_dict['mac'] = constants.VALUE_AUTO
8578
8579       if 'mac' in nic_dict:
8580         nic_mac = nic_dict['mac']
8581         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8582           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8583
8584         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8585           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8586                                      " modifying an existing nic",
8587                                      errors.ECODE_INVAL)
8588
8589     if nic_addremove > 1:
8590       raise errors.OpPrereqError("Only one NIC add or remove operation"
8591                                  " supported at a time", errors.ECODE_INVAL)
8592
8593   def ExpandNames(self):
8594     self._ExpandAndLockInstance()
8595     self.needed_locks[locking.LEVEL_NODE] = []
8596     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8597
8598   def DeclareLocks(self, level):
8599     if level == locking.LEVEL_NODE:
8600       self._LockInstancesNodes()
8601       if self.op.disk_template and self.op.remote_node:
8602         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8603         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8604
8605   def BuildHooksEnv(self):
8606     """Build hooks env.
8607
8608     This runs on the master, primary and secondaries.
8609
8610     """
8611     args = dict()
8612     if constants.BE_MEMORY in self.be_new:
8613       args['memory'] = self.be_new[constants.BE_MEMORY]
8614     if constants.BE_VCPUS in self.be_new:
8615       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8616     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8617     # information at all.
8618     if self.op.nics:
8619       args['nics'] = []
8620       nic_override = dict(self.op.nics)
8621       for idx, nic in enumerate(self.instance.nics):
8622         if idx in nic_override:
8623           this_nic_override = nic_override[idx]
8624         else:
8625           this_nic_override = {}
8626         if 'ip' in this_nic_override:
8627           ip = this_nic_override['ip']
8628         else:
8629           ip = nic.ip
8630         if 'mac' in this_nic_override:
8631           mac = this_nic_override['mac']
8632         else:
8633           mac = nic.mac
8634         if idx in self.nic_pnew:
8635           nicparams = self.nic_pnew[idx]
8636         else:
8637           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8638         mode = nicparams[constants.NIC_MODE]
8639         link = nicparams[constants.NIC_LINK]
8640         args['nics'].append((ip, mac, mode, link))
8641       if constants.DDM_ADD in nic_override:
8642         ip = nic_override[constants.DDM_ADD].get('ip', None)
8643         mac = nic_override[constants.DDM_ADD]['mac']
8644         nicparams = self.nic_pnew[constants.DDM_ADD]
8645         mode = nicparams[constants.NIC_MODE]
8646         link = nicparams[constants.NIC_LINK]
8647         args['nics'].append((ip, mac, mode, link))
8648       elif constants.DDM_REMOVE in nic_override:
8649         del args['nics'][-1]
8650
8651     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8652     if self.op.disk_template:
8653       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8654     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8655     return env, nl, nl
8656
8657   def CheckPrereq(self):
8658     """Check prerequisites.
8659
8660     This only checks the instance list against the existing names.
8661
8662     """
8663     # checking the new params on the primary/secondary nodes
8664
8665     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8666     cluster = self.cluster = self.cfg.GetClusterInfo()
8667     assert self.instance is not None, \
8668       "Cannot retrieve locked instance %s" % self.op.instance_name
8669     pnode = instance.primary_node
8670     nodelist = list(instance.all_nodes)
8671
8672     # OS change
8673     if self.op.os_name and not self.op.force:
8674       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8675                       self.op.force_variant)
8676       instance_os = self.op.os_name
8677     else:
8678       instance_os = instance.os
8679
8680     if self.op.disk_template:
8681       if instance.disk_template == self.op.disk_template:
8682         raise errors.OpPrereqError("Instance already has disk template %s" %
8683                                    instance.disk_template, errors.ECODE_INVAL)
8684
8685       if (instance.disk_template,
8686           self.op.disk_template) not in self._DISK_CONVERSIONS:
8687         raise errors.OpPrereqError("Unsupported disk template conversion from"
8688                                    " %s to %s" % (instance.disk_template,
8689                                                   self.op.disk_template),
8690                                    errors.ECODE_INVAL)
8691       _CheckInstanceDown(self, instance, "cannot change disk template")
8692       if self.op.disk_template in constants.DTS_NET_MIRROR:
8693         _CheckNodeOnline(self, self.op.remote_node)
8694         _CheckNodeNotDrained(self, self.op.remote_node)
8695         disks = [{"size": d.size} for d in instance.disks]
8696         required = _ComputeDiskSize(self.op.disk_template, disks)
8697         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8698
8699     # hvparams processing
8700     if self.op.hvparams:
8701       hv_type = instance.hypervisor
8702       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8703       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8704       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8705
8706       # local check
8707       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8708       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8709       self.hv_new = hv_new # the new actual values
8710       self.hv_inst = i_hvdict # the new dict (without defaults)
8711     else:
8712       self.hv_new = self.hv_inst = {}
8713
8714     # beparams processing
8715     if self.op.beparams:
8716       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8717                                    use_none=True)
8718       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8719       be_new = cluster.SimpleFillBE(i_bedict)
8720       self.be_new = be_new # the new actual values
8721       self.be_inst = i_bedict # the new dict (without defaults)
8722     else:
8723       self.be_new = self.be_inst = {}
8724
8725     # osparams processing
8726     if self.op.osparams:
8727       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8728       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8729       self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8730       self.os_inst = i_osdict # the new dict (without defaults)
8731     else:
8732       self.os_new = self.os_inst = {}
8733
8734     self.warn = []
8735
8736     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8737       mem_check_list = [pnode]
8738       if be_new[constants.BE_AUTO_BALANCE]:
8739         # either we changed auto_balance to yes or it was from before
8740         mem_check_list.extend(instance.secondary_nodes)
8741       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8742                                                   instance.hypervisor)
8743       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8744                                          instance.hypervisor)
8745       pninfo = nodeinfo[pnode]
8746       msg = pninfo.fail_msg
8747       if msg:
8748         # Assume the primary node is unreachable and go ahead
8749         self.warn.append("Can't get info from primary node %s: %s" %
8750                          (pnode,  msg))
8751       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8752         self.warn.append("Node data from primary node %s doesn't contain"
8753                          " free memory information" % pnode)
8754       elif instance_info.fail_msg:
8755         self.warn.append("Can't get instance runtime information: %s" %
8756                         instance_info.fail_msg)
8757       else:
8758         if instance_info.payload:
8759           current_mem = int(instance_info.payload['memory'])
8760         else:
8761           # Assume instance not running
8762           # (there is a slight race condition here, but it's not very probable,
8763           # and we have no other way to check)
8764           current_mem = 0
8765         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8766                     pninfo.payload['memory_free'])
8767         if miss_mem > 0:
8768           raise errors.OpPrereqError("This change will prevent the instance"
8769                                      " from starting, due to %d MB of memory"
8770                                      " missing on its primary node" % miss_mem,
8771                                      errors.ECODE_NORES)
8772
8773       if be_new[constants.BE_AUTO_BALANCE]:
8774         for node, nres in nodeinfo.items():
8775           if node not in instance.secondary_nodes:
8776             continue
8777           msg = nres.fail_msg
8778           if msg:
8779             self.warn.append("Can't get info from secondary node %s: %s" %
8780                              (node, msg))
8781           elif not isinstance(nres.payload.get('memory_free', None), int):
8782             self.warn.append("Secondary node %s didn't return free"
8783                              " memory information" % node)
8784           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8785             self.warn.append("Not enough memory to failover instance to"
8786                              " secondary node %s" % node)
8787
8788     # NIC processing
8789     self.nic_pnew = {}
8790     self.nic_pinst = {}
8791     for nic_op, nic_dict in self.op.nics:
8792       if nic_op == constants.DDM_REMOVE:
8793         if not instance.nics:
8794           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8795                                      errors.ECODE_INVAL)
8796         continue
8797       if nic_op != constants.DDM_ADD:
8798         # an existing nic
8799         if not instance.nics:
8800           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8801                                      " no NICs" % nic_op,
8802                                      errors.ECODE_INVAL)
8803         if nic_op < 0 or nic_op >= len(instance.nics):
8804           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8805                                      " are 0 to %d" %
8806                                      (nic_op, len(instance.nics) - 1),
8807                                      errors.ECODE_INVAL)
8808         old_nic_params = instance.nics[nic_op].nicparams
8809         old_nic_ip = instance.nics[nic_op].ip
8810       else:
8811         old_nic_params = {}
8812         old_nic_ip = None
8813
8814       update_params_dict = dict([(key, nic_dict[key])
8815                                  for key in constants.NICS_PARAMETERS
8816                                  if key in nic_dict])
8817
8818       if 'bridge' in nic_dict:
8819         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8820
8821       new_nic_params = _GetUpdatedParams(old_nic_params,
8822                                          update_params_dict)
8823       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8824       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8825       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8826       self.nic_pinst[nic_op] = new_nic_params
8827       self.nic_pnew[nic_op] = new_filled_nic_params
8828       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8829
8830       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8831         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8832         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8833         if msg:
8834           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8835           if self.op.force:
8836             self.warn.append(msg)
8837           else:
8838             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8839       if new_nic_mode == constants.NIC_MODE_ROUTED:
8840         if 'ip' in nic_dict:
8841           nic_ip = nic_dict['ip']
8842         else:
8843           nic_ip = old_nic_ip
8844         if nic_ip is None:
8845           raise errors.OpPrereqError('Cannot set the nic ip to None'
8846                                      ' on a routed nic', errors.ECODE_INVAL)
8847       if 'mac' in nic_dict:
8848         nic_mac = nic_dict['mac']
8849         if nic_mac is None:
8850           raise errors.OpPrereqError('Cannot set the nic mac to None',
8851                                      errors.ECODE_INVAL)
8852         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8853           # otherwise generate the mac
8854           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8855         else:
8856           # or validate/reserve the current one
8857           try:
8858             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8859           except errors.ReservationError:
8860             raise errors.OpPrereqError("MAC address %s already in use"
8861                                        " in cluster" % nic_mac,
8862                                        errors.ECODE_NOTUNIQUE)
8863
8864     # DISK processing
8865     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8866       raise errors.OpPrereqError("Disk operations not supported for"
8867                                  " diskless instances",
8868                                  errors.ECODE_INVAL)
8869     for disk_op, _ in self.op.disks:
8870       if disk_op == constants.DDM_REMOVE:
8871         if len(instance.disks) == 1:
8872           raise errors.OpPrereqError("Cannot remove the last disk of"
8873                                      " an instance", errors.ECODE_INVAL)
8874         _CheckInstanceDown(self, instance, "cannot remove disks")
8875
8876       if (disk_op == constants.DDM_ADD and
8877           len(instance.nics) >= constants.MAX_DISKS):
8878         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8879                                    " add more" % constants.MAX_DISKS,
8880                                    errors.ECODE_STATE)
8881       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8882         # an existing disk
8883         if disk_op < 0 or disk_op >= len(instance.disks):
8884           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8885                                      " are 0 to %d" %
8886                                      (disk_op, len(instance.disks)),
8887                                      errors.ECODE_INVAL)
8888
8889     return
8890
8891   def _ConvertPlainToDrbd(self, feedback_fn):
8892     """Converts an instance from plain to drbd.
8893
8894     """
8895     feedback_fn("Converting template to drbd")
8896     instance = self.instance
8897     pnode = instance.primary_node
8898     snode = self.op.remote_node
8899
8900     # create a fake disk info for _GenerateDiskTemplate
8901     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8902     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8903                                       instance.name, pnode, [snode],
8904                                       disk_info, None, None, 0)
8905     info = _GetInstanceInfoText(instance)
8906     feedback_fn("Creating aditional volumes...")
8907     # first, create the missing data and meta devices
8908     for disk in new_disks:
8909       # unfortunately this is... not too nice
8910       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8911                             info, True)
8912       for child in disk.children:
8913         _CreateSingleBlockDev(self, snode, instance, child, info, True)
8914     # at this stage, all new LVs have been created, we can rename the
8915     # old ones
8916     feedback_fn("Renaming original volumes...")
8917     rename_list = [(o, n.children[0].logical_id)
8918                    for (o, n) in zip(instance.disks, new_disks)]
8919     result = self.rpc.call_blockdev_rename(pnode, rename_list)
8920     result.Raise("Failed to rename original LVs")
8921
8922     feedback_fn("Initializing DRBD devices...")
8923     # all child devices are in place, we can now create the DRBD devices
8924     for disk in new_disks:
8925       for node in [pnode, snode]:
8926         f_create = node == pnode
8927         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8928
8929     # at this point, the instance has been modified
8930     instance.disk_template = constants.DT_DRBD8
8931     instance.disks = new_disks
8932     self.cfg.Update(instance, feedback_fn)
8933
8934     # disks are created, waiting for sync
8935     disk_abort = not _WaitForSync(self, instance)
8936     if disk_abort:
8937       raise errors.OpExecError("There are some degraded disks for"
8938                                " this instance, please cleanup manually")
8939
8940   def _ConvertDrbdToPlain(self, feedback_fn):
8941     """Converts an instance from drbd to plain.
8942
8943     """
8944     instance = self.instance
8945     assert len(instance.secondary_nodes) == 1
8946     pnode = instance.primary_node
8947     snode = instance.secondary_nodes[0]
8948     feedback_fn("Converting template to plain")
8949
8950     old_disks = instance.disks
8951     new_disks = [d.children[0] for d in old_disks]
8952
8953     # copy over size and mode
8954     for parent, child in zip(old_disks, new_disks):
8955       child.size = parent.size
8956       child.mode = parent.mode
8957
8958     # update instance structure
8959     instance.disks = new_disks
8960     instance.disk_template = constants.DT_PLAIN
8961     self.cfg.Update(instance, feedback_fn)
8962
8963     feedback_fn("Removing volumes on the secondary node...")
8964     for disk in old_disks:
8965       self.cfg.SetDiskID(disk, snode)
8966       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8967       if msg:
8968         self.LogWarning("Could not remove block device %s on node %s,"
8969                         " continuing anyway: %s", disk.iv_name, snode, msg)
8970
8971     feedback_fn("Removing unneeded volumes on the primary node...")
8972     for idx, disk in enumerate(old_disks):
8973       meta = disk.children[1]
8974       self.cfg.SetDiskID(meta, pnode)
8975       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8976       if msg:
8977         self.LogWarning("Could not remove metadata for disk %d on node %s,"
8978                         " continuing anyway: %s", idx, pnode, msg)
8979
8980
8981   def Exec(self, feedback_fn):
8982     """Modifies an instance.
8983
8984     All parameters take effect only at the next restart of the instance.
8985
8986     """
8987     # Process here the warnings from CheckPrereq, as we don't have a
8988     # feedback_fn there.
8989     for warn in self.warn:
8990       feedback_fn("WARNING: %s" % warn)
8991
8992     result = []
8993     instance = self.instance
8994     # disk changes
8995     for disk_op, disk_dict in self.op.disks:
8996       if disk_op == constants.DDM_REMOVE:
8997         # remove the last disk
8998         device = instance.disks.pop()
8999         device_idx = len(instance.disks)
9000         for node, disk in device.ComputeNodeTree(instance.primary_node):
9001           self.cfg.SetDiskID(disk, node)
9002           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9003           if msg:
9004             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9005                             " continuing anyway", device_idx, node, msg)
9006         result.append(("disk/%d" % device_idx, "remove"))
9007       elif disk_op == constants.DDM_ADD:
9008         # add a new disk
9009         if instance.disk_template == constants.DT_FILE:
9010           file_driver, file_path = instance.disks[0].logical_id
9011           file_path = os.path.dirname(file_path)
9012         else:
9013           file_driver = file_path = None
9014         disk_idx_base = len(instance.disks)
9015         new_disk = _GenerateDiskTemplate(self,
9016                                          instance.disk_template,
9017                                          instance.name, instance.primary_node,
9018                                          instance.secondary_nodes,
9019                                          [disk_dict],
9020                                          file_path,
9021                                          file_driver,
9022                                          disk_idx_base)[0]
9023         instance.disks.append(new_disk)
9024         info = _GetInstanceInfoText(instance)
9025
9026         logging.info("Creating volume %s for instance %s",
9027                      new_disk.iv_name, instance.name)
9028         # Note: this needs to be kept in sync with _CreateDisks
9029         #HARDCODE
9030         for node in instance.all_nodes:
9031           f_create = node == instance.primary_node
9032           try:
9033             _CreateBlockDev(self, node, instance, new_disk,
9034                             f_create, info, f_create)
9035           except errors.OpExecError, err:
9036             self.LogWarning("Failed to create volume %s (%s) on"
9037                             " node %s: %s",
9038                             new_disk.iv_name, new_disk, node, err)
9039         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9040                        (new_disk.size, new_disk.mode)))
9041       else:
9042         # change a given disk
9043         instance.disks[disk_op].mode = disk_dict['mode']
9044         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9045
9046     if self.op.disk_template:
9047       r_shut = _ShutdownInstanceDisks(self, instance)
9048       if not r_shut:
9049         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9050                                  " proceed with disk template conversion")
9051       mode = (instance.disk_template, self.op.disk_template)
9052       try:
9053         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9054       except:
9055         self.cfg.ReleaseDRBDMinors(instance.name)
9056         raise
9057       result.append(("disk_template", self.op.disk_template))
9058
9059     # NIC changes
9060     for nic_op, nic_dict in self.op.nics:
9061       if nic_op == constants.DDM_REMOVE:
9062         # remove the last nic
9063         del instance.nics[-1]
9064         result.append(("nic.%d" % len(instance.nics), "remove"))
9065       elif nic_op == constants.DDM_ADD:
9066         # mac and bridge should be set, by now
9067         mac = nic_dict['mac']
9068         ip = nic_dict.get('ip', None)
9069         nicparams = self.nic_pinst[constants.DDM_ADD]
9070         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9071         instance.nics.append(new_nic)
9072         result.append(("nic.%d" % (len(instance.nics) - 1),
9073                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9074                        (new_nic.mac, new_nic.ip,
9075                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9076                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9077                        )))
9078       else:
9079         for key in 'mac', 'ip':
9080           if key in nic_dict:
9081             setattr(instance.nics[nic_op], key, nic_dict[key])
9082         if nic_op in self.nic_pinst:
9083           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9084         for key, val in nic_dict.iteritems():
9085           result.append(("nic.%s/%d" % (key, nic_op), val))
9086
9087     # hvparams changes
9088     if self.op.hvparams:
9089       instance.hvparams = self.hv_inst
9090       for key, val in self.op.hvparams.iteritems():
9091         result.append(("hv/%s" % key, val))
9092
9093     # beparams changes
9094     if self.op.beparams:
9095       instance.beparams = self.be_inst
9096       for key, val in self.op.beparams.iteritems():
9097         result.append(("be/%s" % key, val))
9098
9099     # OS change
9100     if self.op.os_name:
9101       instance.os = self.op.os_name
9102
9103     # osparams changes
9104     if self.op.osparams:
9105       instance.osparams = self.os_inst
9106       for key, val in self.op.osparams.iteritems():
9107         result.append(("os/%s" % key, val))
9108
9109     self.cfg.Update(instance, feedback_fn)
9110
9111     return result
9112
9113   _DISK_CONVERSIONS = {
9114     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9115     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9116     }
9117
9118
9119 class LUQueryExports(NoHooksLU):
9120   """Query the exports list
9121
9122   """
9123   _OP_PARAMS = [
9124     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9125     ("use_locking", False, _TBool),
9126     ]
9127   REQ_BGL = False
9128
9129   def ExpandNames(self):
9130     self.needed_locks = {}
9131     self.share_locks[locking.LEVEL_NODE] = 1
9132     if not self.op.nodes:
9133       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9134     else:
9135       self.needed_locks[locking.LEVEL_NODE] = \
9136         _GetWantedNodes(self, self.op.nodes)
9137
9138   def Exec(self, feedback_fn):
9139     """Compute the list of all the exported system images.
9140
9141     @rtype: dict
9142     @return: a dictionary with the structure node->(export-list)
9143         where export-list is a list of the instances exported on
9144         that node.
9145
9146     """
9147     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9148     rpcresult = self.rpc.call_export_list(self.nodes)
9149     result = {}
9150     for node in rpcresult:
9151       if rpcresult[node].fail_msg:
9152         result[node] = False
9153       else:
9154         result[node] = rpcresult[node].payload
9155
9156     return result
9157
9158
9159 class LUPrepareExport(NoHooksLU):
9160   """Prepares an instance for an export and returns useful information.
9161
9162   """
9163   _OP_PARAMS = [
9164     _PInstanceName,
9165     ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9166     ]
9167   REQ_BGL = False
9168
9169   def ExpandNames(self):
9170     self._ExpandAndLockInstance()
9171
9172   def CheckPrereq(self):
9173     """Check prerequisites.
9174
9175     """
9176     instance_name = self.op.instance_name
9177
9178     self.instance = self.cfg.GetInstanceInfo(instance_name)
9179     assert self.instance is not None, \
9180           "Cannot retrieve locked instance %s" % self.op.instance_name
9181     _CheckNodeOnline(self, self.instance.primary_node)
9182
9183     self._cds = _GetClusterDomainSecret()
9184
9185   def Exec(self, feedback_fn):
9186     """Prepares an instance for an export.
9187
9188     """
9189     instance = self.instance
9190
9191     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9192       salt = utils.GenerateSecret(8)
9193
9194       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9195       result = self.rpc.call_x509_cert_create(instance.primary_node,
9196                                               constants.RIE_CERT_VALIDITY)
9197       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9198
9199       (name, cert_pem) = result.payload
9200
9201       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9202                                              cert_pem)
9203
9204       return {
9205         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9206         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9207                           salt),
9208         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9209         }
9210
9211     return None
9212
9213
9214 class LUExportInstance(LogicalUnit):
9215   """Export an instance to an image in the cluster.
9216
9217   """
9218   HPATH = "instance-export"
9219   HTYPE = constants.HTYPE_INSTANCE
9220   _OP_PARAMS = [
9221     _PInstanceName,
9222     ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9223     ("shutdown", True, _TBool),
9224     _PShutdownTimeout,
9225     ("remove_instance", False, _TBool),
9226     ("ignore_remove_failures", False, _TBool),
9227     ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9228     ("x509_key_name", None, _TOr(_TList, _TNone)),
9229     ("destination_x509_ca", None, _TMaybeString),
9230     ]
9231   REQ_BGL = False
9232
9233   def CheckArguments(self):
9234     """Check the arguments.
9235
9236     """
9237     self.x509_key_name = self.op.x509_key_name
9238     self.dest_x509_ca_pem = self.op.destination_x509_ca
9239
9240     if self.op.remove_instance and not self.op.shutdown:
9241       raise errors.OpPrereqError("Can not remove instance without shutting it"
9242                                  " down before")
9243
9244     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9245       if not self.x509_key_name:
9246         raise errors.OpPrereqError("Missing X509 key name for encryption",
9247                                    errors.ECODE_INVAL)
9248
9249       if not self.dest_x509_ca_pem:
9250         raise errors.OpPrereqError("Missing destination X509 CA",
9251                                    errors.ECODE_INVAL)
9252
9253   def ExpandNames(self):
9254     self._ExpandAndLockInstance()
9255
9256     # Lock all nodes for local exports
9257     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9258       # FIXME: lock only instance primary and destination node
9259       #
9260       # Sad but true, for now we have do lock all nodes, as we don't know where
9261       # the previous export might be, and in this LU we search for it and
9262       # remove it from its current node. In the future we could fix this by:
9263       #  - making a tasklet to search (share-lock all), then create the
9264       #    new one, then one to remove, after
9265       #  - removing the removal operation altogether
9266       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9267
9268   def DeclareLocks(self, level):
9269     """Last minute lock declaration."""
9270     # All nodes are locked anyway, so nothing to do here.
9271
9272   def BuildHooksEnv(self):
9273     """Build hooks env.
9274
9275     This will run on the master, primary node and target node.
9276
9277     """
9278     env = {
9279       "EXPORT_MODE": self.op.mode,
9280       "EXPORT_NODE": self.op.target_node,
9281       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9282       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9283       # TODO: Generic function for boolean env variables
9284       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9285       }
9286
9287     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9288
9289     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9290
9291     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9292       nl.append(self.op.target_node)
9293
9294     return env, nl, nl
9295
9296   def CheckPrereq(self):
9297     """Check prerequisites.
9298
9299     This checks that the instance and node names are valid.
9300
9301     """
9302     instance_name = self.op.instance_name
9303
9304     self.instance = self.cfg.GetInstanceInfo(instance_name)
9305     assert self.instance is not None, \
9306           "Cannot retrieve locked instance %s" % self.op.instance_name
9307     _CheckNodeOnline(self, self.instance.primary_node)
9308
9309     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9310       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9311       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9312       assert self.dst_node is not None
9313
9314       _CheckNodeOnline(self, self.dst_node.name)
9315       _CheckNodeNotDrained(self, self.dst_node.name)
9316
9317       self._cds = None
9318       self.dest_disk_info = None
9319       self.dest_x509_ca = None
9320
9321     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9322       self.dst_node = None
9323
9324       if len(self.op.target_node) != len(self.instance.disks):
9325         raise errors.OpPrereqError(("Received destination information for %s"
9326                                     " disks, but instance %s has %s disks") %
9327                                    (len(self.op.target_node), instance_name,
9328                                     len(self.instance.disks)),
9329                                    errors.ECODE_INVAL)
9330
9331       cds = _GetClusterDomainSecret()
9332
9333       # Check X509 key name
9334       try:
9335         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9336       except (TypeError, ValueError), err:
9337         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9338
9339       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9340         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9341                                    errors.ECODE_INVAL)
9342
9343       # Load and verify CA
9344       try:
9345         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9346       except OpenSSL.crypto.Error, err:
9347         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9348                                    (err, ), errors.ECODE_INVAL)
9349
9350       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9351       if errcode is not None:
9352         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9353                                    (msg, ), errors.ECODE_INVAL)
9354
9355       self.dest_x509_ca = cert
9356
9357       # Verify target information
9358       disk_info = []
9359       for idx, disk_data in enumerate(self.op.target_node):
9360         try:
9361           (host, port, magic) = \
9362             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9363         except errors.GenericError, err:
9364           raise errors.OpPrereqError("Target info for disk %s: %s" %
9365                                      (idx, err), errors.ECODE_INVAL)
9366
9367         disk_info.append((host, port, magic))
9368
9369       assert len(disk_info) == len(self.op.target_node)
9370       self.dest_disk_info = disk_info
9371
9372     else:
9373       raise errors.ProgrammerError("Unhandled export mode %r" %
9374                                    self.op.mode)
9375
9376     # instance disk type verification
9377     # TODO: Implement export support for file-based disks
9378     for disk in self.instance.disks:
9379       if disk.dev_type == constants.LD_FILE:
9380         raise errors.OpPrereqError("Export not supported for instances with"
9381                                    " file-based disks", errors.ECODE_INVAL)
9382
9383   def _CleanupExports(self, feedback_fn):
9384     """Removes exports of current instance from all other nodes.
9385
9386     If an instance in a cluster with nodes A..D was exported to node C, its
9387     exports will be removed from the nodes A, B and D.
9388
9389     """
9390     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9391
9392     nodelist = self.cfg.GetNodeList()
9393     nodelist.remove(self.dst_node.name)
9394
9395     # on one-node clusters nodelist will be empty after the removal
9396     # if we proceed the backup would be removed because OpQueryExports
9397     # substitutes an empty list with the full cluster node list.
9398     iname = self.instance.name
9399     if nodelist:
9400       feedback_fn("Removing old exports for instance %s" % iname)
9401       exportlist = self.rpc.call_export_list(nodelist)
9402       for node in exportlist:
9403         if exportlist[node].fail_msg:
9404           continue
9405         if iname in exportlist[node].payload:
9406           msg = self.rpc.call_export_remove(node, iname).fail_msg
9407           if msg:
9408             self.LogWarning("Could not remove older export for instance %s"
9409                             " on node %s: %s", iname, node, msg)
9410
9411   def Exec(self, feedback_fn):
9412     """Export an instance to an image in the cluster.
9413
9414     """
9415     assert self.op.mode in constants.EXPORT_MODES
9416
9417     instance = self.instance
9418     src_node = instance.primary_node
9419
9420     if self.op.shutdown:
9421       # shutdown the instance, but not the disks
9422       feedback_fn("Shutting down instance %s" % instance.name)
9423       result = self.rpc.call_instance_shutdown(src_node, instance,
9424                                                self.op.shutdown_timeout)
9425       # TODO: Maybe ignore failures if ignore_remove_failures is set
9426       result.Raise("Could not shutdown instance %s on"
9427                    " node %s" % (instance.name, src_node))
9428
9429     # set the disks ID correctly since call_instance_start needs the
9430     # correct drbd minor to create the symlinks
9431     for disk in instance.disks:
9432       self.cfg.SetDiskID(disk, src_node)
9433
9434     activate_disks = (not instance.admin_up)
9435
9436     if activate_disks:
9437       # Activate the instance disks if we'exporting a stopped instance
9438       feedback_fn("Activating disks for %s" % instance.name)
9439       _StartInstanceDisks(self, instance, None)
9440
9441     try:
9442       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9443                                                      instance)
9444
9445       helper.CreateSnapshots()
9446       try:
9447         if (self.op.shutdown and instance.admin_up and
9448             not self.op.remove_instance):
9449           assert not activate_disks
9450           feedback_fn("Starting instance %s" % instance.name)
9451           result = self.rpc.call_instance_start(src_node, instance, None, None)
9452           msg = result.fail_msg
9453           if msg:
9454             feedback_fn("Failed to start instance: %s" % msg)
9455             _ShutdownInstanceDisks(self, instance)
9456             raise errors.OpExecError("Could not start instance: %s" % msg)
9457
9458         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9459           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9460         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9461           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9462           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9463
9464           (key_name, _, _) = self.x509_key_name
9465
9466           dest_ca_pem = \
9467             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9468                                             self.dest_x509_ca)
9469
9470           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9471                                                      key_name, dest_ca_pem,
9472                                                      timeouts)
9473       finally:
9474         helper.Cleanup()
9475
9476       # Check for backwards compatibility
9477       assert len(dresults) == len(instance.disks)
9478       assert compat.all(isinstance(i, bool) for i in dresults), \
9479              "Not all results are boolean: %r" % dresults
9480
9481     finally:
9482       if activate_disks:
9483         feedback_fn("Deactivating disks for %s" % instance.name)
9484         _ShutdownInstanceDisks(self, instance)
9485
9486     # Remove instance if requested
9487     if self.op.remove_instance:
9488       if not (compat.all(dresults) and fin_resu):
9489         feedback_fn("Not removing instance %s as parts of the export failed" %
9490                     instance.name)
9491       else:
9492         feedback_fn("Removing instance %s" % instance.name)
9493         _RemoveInstance(self, feedback_fn, instance,
9494                         self.op.ignore_remove_failures)
9495
9496     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9497       self._CleanupExports(feedback_fn)
9498
9499     return fin_resu, dresults
9500
9501
9502 class LURemoveExport(NoHooksLU):
9503   """Remove exports related to the named instance.
9504
9505   """
9506   _OP_PARAMS = [
9507     _PInstanceName,
9508     ]
9509   REQ_BGL = False
9510
9511   def ExpandNames(self):
9512     self.needed_locks = {}
9513     # We need all nodes to be locked in order for RemoveExport to work, but we
9514     # don't need to lock the instance itself, as nothing will happen to it (and
9515     # we can remove exports also for a removed instance)
9516     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9517
9518   def Exec(self, feedback_fn):
9519     """Remove any export.
9520
9521     """
9522     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9523     # If the instance was not found we'll try with the name that was passed in.
9524     # This will only work if it was an FQDN, though.
9525     fqdn_warn = False
9526     if not instance_name:
9527       fqdn_warn = True
9528       instance_name = self.op.instance_name
9529
9530     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9531     exportlist = self.rpc.call_export_list(locked_nodes)
9532     found = False
9533     for node in exportlist:
9534       msg = exportlist[node].fail_msg
9535       if msg:
9536         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9537         continue
9538       if instance_name in exportlist[node].payload:
9539         found = True
9540         result = self.rpc.call_export_remove(node, instance_name)
9541         msg = result.fail_msg
9542         if msg:
9543           logging.error("Could not remove export for instance %s"
9544                         " on node %s: %s", instance_name, node, msg)
9545
9546     if fqdn_warn and not found:
9547       feedback_fn("Export not found. If trying to remove an export belonging"
9548                   " to a deleted instance please use its Fully Qualified"
9549                   " Domain Name.")
9550
9551
9552 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9553   """Generic tags LU.
9554
9555   This is an abstract class which is the parent of all the other tags LUs.
9556
9557   """
9558
9559   def ExpandNames(self):
9560     self.needed_locks = {}
9561     if self.op.kind == constants.TAG_NODE:
9562       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9563       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9564     elif self.op.kind == constants.TAG_INSTANCE:
9565       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9566       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9567
9568   def CheckPrereq(self):
9569     """Check prerequisites.
9570
9571     """
9572     if self.op.kind == constants.TAG_CLUSTER:
9573       self.target = self.cfg.GetClusterInfo()
9574     elif self.op.kind == constants.TAG_NODE:
9575       self.target = self.cfg.GetNodeInfo(self.op.name)
9576     elif self.op.kind == constants.TAG_INSTANCE:
9577       self.target = self.cfg.GetInstanceInfo(self.op.name)
9578     else:
9579       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9580                                  str(self.op.kind), errors.ECODE_INVAL)
9581
9582
9583 class LUGetTags(TagsLU):
9584   """Returns the tags of a given object.
9585
9586   """
9587   _OP_PARAMS = [
9588     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9589     ("name", _NoDefault, _TNonEmptyString),
9590     ]
9591   REQ_BGL = False
9592
9593   def Exec(self, feedback_fn):
9594     """Returns the tag list.
9595
9596     """
9597     return list(self.target.GetTags())
9598
9599
9600 class LUSearchTags(NoHooksLU):
9601   """Searches the tags for a given pattern.
9602
9603   """
9604   _OP_PARAMS = [
9605     ("pattern", _NoDefault, _TNonEmptyString),
9606     ]
9607   REQ_BGL = False
9608
9609   def ExpandNames(self):
9610     self.needed_locks = {}
9611
9612   def CheckPrereq(self):
9613     """Check prerequisites.
9614
9615     This checks the pattern passed for validity by compiling it.
9616
9617     """
9618     try:
9619       self.re = re.compile(self.op.pattern)
9620     except re.error, err:
9621       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9622                                  (self.op.pattern, err), errors.ECODE_INVAL)
9623
9624   def Exec(self, feedback_fn):
9625     """Returns the tag list.
9626
9627     """
9628     cfg = self.cfg
9629     tgts = [("/cluster", cfg.GetClusterInfo())]
9630     ilist = cfg.GetAllInstancesInfo().values()
9631     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9632     nlist = cfg.GetAllNodesInfo().values()
9633     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9634     results = []
9635     for path, target in tgts:
9636       for tag in target.GetTags():
9637         if self.re.search(tag):
9638           results.append((path, tag))
9639     return results
9640
9641
9642 class LUAddTags(TagsLU):
9643   """Sets a tag on a given object.
9644
9645   """
9646   _OP_PARAMS = [
9647     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9648     ("name", _NoDefault, _TNonEmptyString),
9649     ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9650     ]
9651   REQ_BGL = False
9652
9653   def CheckPrereq(self):
9654     """Check prerequisites.
9655
9656     This checks the type and length of the tag name and value.
9657
9658     """
9659     TagsLU.CheckPrereq(self)
9660     for tag in self.op.tags:
9661       objects.TaggableObject.ValidateTag(tag)
9662
9663   def Exec(self, feedback_fn):
9664     """Sets the tag.
9665
9666     """
9667     try:
9668       for tag in self.op.tags:
9669         self.target.AddTag(tag)
9670     except errors.TagError, err:
9671       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9672     self.cfg.Update(self.target, feedback_fn)
9673
9674
9675 class LUDelTags(TagsLU):
9676   """Delete a list of tags from a given object.
9677
9678   """
9679   _OP_PARAMS = [
9680     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9681     ("name", _NoDefault, _TNonEmptyString),
9682     ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9683     ]
9684   REQ_BGL = False
9685
9686   def CheckPrereq(self):
9687     """Check prerequisites.
9688
9689     This checks that we have the given tag.
9690
9691     """
9692     TagsLU.CheckPrereq(self)
9693     for tag in self.op.tags:
9694       objects.TaggableObject.ValidateTag(tag)
9695     del_tags = frozenset(self.op.tags)
9696     cur_tags = self.target.GetTags()
9697     if not del_tags <= cur_tags:
9698       diff_tags = del_tags - cur_tags
9699       diff_names = ["'%s'" % tag for tag in diff_tags]
9700       diff_names.sort()
9701       raise errors.OpPrereqError("Tag(s) %s not found" %
9702                                  (",".join(diff_names)), errors.ECODE_NOENT)
9703
9704   def Exec(self, feedback_fn):
9705     """Remove the tag from the object.
9706
9707     """
9708     for tag in self.op.tags:
9709       self.target.RemoveTag(tag)
9710     self.cfg.Update(self.target, feedback_fn)
9711
9712
9713 class LUTestDelay(NoHooksLU):
9714   """Sleep for a specified amount of time.
9715
9716   This LU sleeps on the master and/or nodes for a specified amount of
9717   time.
9718
9719   """
9720   _OP_PARAMS = [
9721     ("duration", _NoDefault, _TFloat),
9722     ("on_master", True, _TBool),
9723     ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9724     ("repeat", 0, _TPositiveInt)
9725     ]
9726   REQ_BGL = False
9727
9728   def ExpandNames(self):
9729     """Expand names and set required locks.
9730
9731     This expands the node list, if any.
9732
9733     """
9734     self.needed_locks = {}
9735     if self.op.on_nodes:
9736       # _GetWantedNodes can be used here, but is not always appropriate to use
9737       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9738       # more information.
9739       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9740       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9741
9742   def _TestDelay(self):
9743     """Do the actual sleep.
9744
9745     """
9746     if self.op.on_master:
9747       if not utils.TestDelay(self.op.duration):
9748         raise errors.OpExecError("Error during master delay test")
9749     if self.op.on_nodes:
9750       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9751       for node, node_result in result.items():
9752         node_result.Raise("Failure during rpc call to node %s" % node)
9753
9754   def Exec(self, feedback_fn):
9755     """Execute the test delay opcode, with the wanted repetitions.
9756
9757     """
9758     if self.op.repeat == 0:
9759       self._TestDelay()
9760     else:
9761       top_value = self.op.repeat - 1
9762       for i in range(self.op.repeat):
9763         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9764         self._TestDelay()
9765
9766
9767 class IAllocator(object):
9768   """IAllocator framework.
9769
9770   An IAllocator instance has three sets of attributes:
9771     - cfg that is needed to query the cluster
9772     - input data (all members of the _KEYS class attribute are required)
9773     - four buffer attributes (in|out_data|text), that represent the
9774       input (to the external script) in text and data structure format,
9775       and the output from it, again in two formats
9776     - the result variables from the script (success, info, nodes) for
9777       easy usage
9778
9779   """
9780   # pylint: disable-msg=R0902
9781   # lots of instance attributes
9782   _ALLO_KEYS = [
9783     "name", "mem_size", "disks", "disk_template",
9784     "os", "tags", "nics", "vcpus", "hypervisor",
9785     ]
9786   _RELO_KEYS = [
9787     "name", "relocate_from",
9788     ]
9789   _EVAC_KEYS = [
9790     "evac_nodes",
9791     ]
9792
9793   def __init__(self, cfg, rpc, mode, **kwargs):
9794     self.cfg = cfg
9795     self.rpc = rpc
9796     # init buffer variables
9797     self.in_text = self.out_text = self.in_data = self.out_data = None
9798     # init all input fields so that pylint is happy
9799     self.mode = mode
9800     self.mem_size = self.disks = self.disk_template = None
9801     self.os = self.tags = self.nics = self.vcpus = None
9802     self.hypervisor = None
9803     self.relocate_from = None
9804     self.name = None
9805     self.evac_nodes = None
9806     # computed fields
9807     self.required_nodes = None
9808     # init result fields
9809     self.success = self.info = self.result = None
9810     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9811       keyset = self._ALLO_KEYS
9812       fn = self._AddNewInstance
9813     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9814       keyset = self._RELO_KEYS
9815       fn = self._AddRelocateInstance
9816     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9817       keyset = self._EVAC_KEYS
9818       fn = self._AddEvacuateNodes
9819     else:
9820       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9821                                    " IAllocator" % self.mode)
9822     for key in kwargs:
9823       if key not in keyset:
9824         raise errors.ProgrammerError("Invalid input parameter '%s' to"
9825                                      " IAllocator" % key)
9826       setattr(self, key, kwargs[key])
9827
9828     for key in keyset:
9829       if key not in kwargs:
9830         raise errors.ProgrammerError("Missing input parameter '%s' to"
9831                                      " IAllocator" % key)
9832     self._BuildInputData(fn)
9833
9834   def _ComputeClusterData(self):
9835     """Compute the generic allocator input data.
9836
9837     This is the data that is independent of the actual operation.
9838
9839     """
9840     cfg = self.cfg
9841     cluster_info = cfg.GetClusterInfo()
9842     # cluster data
9843     data = {
9844       "version": constants.IALLOCATOR_VERSION,
9845       "cluster_name": cfg.GetClusterName(),
9846       "cluster_tags": list(cluster_info.GetTags()),
9847       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9848       # we don't have job IDs
9849       }
9850     iinfo = cfg.GetAllInstancesInfo().values()
9851     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9852
9853     # node data
9854     node_results = {}
9855     node_list = cfg.GetNodeList()
9856
9857     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9858       hypervisor_name = self.hypervisor
9859     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9860       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9861     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9862       hypervisor_name = cluster_info.enabled_hypervisors[0]
9863
9864     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9865                                         hypervisor_name)
9866     node_iinfo = \
9867       self.rpc.call_all_instances_info(node_list,
9868                                        cluster_info.enabled_hypervisors)
9869     for nname, nresult in node_data.items():
9870       # first fill in static (config-based) values
9871       ninfo = cfg.GetNodeInfo(nname)
9872       pnr = {
9873         "tags": list(ninfo.GetTags()),
9874         "primary_ip": ninfo.primary_ip,
9875         "secondary_ip": ninfo.secondary_ip,
9876         "offline": ninfo.offline,
9877         "drained": ninfo.drained,
9878         "master_candidate": ninfo.master_candidate,
9879         }
9880
9881       if not (ninfo.offline or ninfo.drained):
9882         nresult.Raise("Can't get data for node %s" % nname)
9883         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9884                                 nname)
9885         remote_info = nresult.payload
9886
9887         for attr in ['memory_total', 'memory_free', 'memory_dom0',
9888                      'vg_size', 'vg_free', 'cpu_total']:
9889           if attr not in remote_info:
9890             raise errors.OpExecError("Node '%s' didn't return attribute"
9891                                      " '%s'" % (nname, attr))
9892           if not isinstance(remote_info[attr], int):
9893             raise errors.OpExecError("Node '%s' returned invalid value"
9894                                      " for '%s': %s" %
9895                                      (nname, attr, remote_info[attr]))
9896         # compute memory used by primary instances
9897         i_p_mem = i_p_up_mem = 0
9898         for iinfo, beinfo in i_list:
9899           if iinfo.primary_node == nname:
9900             i_p_mem += beinfo[constants.BE_MEMORY]
9901             if iinfo.name not in node_iinfo[nname].payload:
9902               i_used_mem = 0
9903             else:
9904               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9905             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9906             remote_info['memory_free'] -= max(0, i_mem_diff)
9907
9908             if iinfo.admin_up:
9909               i_p_up_mem += beinfo[constants.BE_MEMORY]
9910
9911         # compute memory used by instances
9912         pnr_dyn = {
9913           "total_memory": remote_info['memory_total'],
9914           "reserved_memory": remote_info['memory_dom0'],
9915           "free_memory": remote_info['memory_free'],
9916           "total_disk": remote_info['vg_size'],
9917           "free_disk": remote_info['vg_free'],
9918           "total_cpus": remote_info['cpu_total'],
9919           "i_pri_memory": i_p_mem,
9920           "i_pri_up_memory": i_p_up_mem,
9921           }
9922         pnr.update(pnr_dyn)
9923
9924       node_results[nname] = pnr
9925     data["nodes"] = node_results
9926
9927     # instance data
9928     instance_data = {}
9929     for iinfo, beinfo in i_list:
9930       nic_data = []
9931       for nic in iinfo.nics:
9932         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9933         nic_dict = {"mac": nic.mac,
9934                     "ip": nic.ip,
9935                     "mode": filled_params[constants.NIC_MODE],
9936                     "link": filled_params[constants.NIC_LINK],
9937                    }
9938         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9939           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9940         nic_data.append(nic_dict)
9941       pir = {
9942         "tags": list(iinfo.GetTags()),
9943         "admin_up": iinfo.admin_up,
9944         "vcpus": beinfo[constants.BE_VCPUS],
9945         "memory": beinfo[constants.BE_MEMORY],
9946         "os": iinfo.os,
9947         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9948         "nics": nic_data,
9949         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9950         "disk_template": iinfo.disk_template,
9951         "hypervisor": iinfo.hypervisor,
9952         }
9953       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9954                                                  pir["disks"])
9955       instance_data[iinfo.name] = pir
9956
9957     data["instances"] = instance_data
9958
9959     self.in_data = data
9960
9961   def _AddNewInstance(self):
9962     """Add new instance data to allocator structure.
9963
9964     This in combination with _AllocatorGetClusterData will create the
9965     correct structure needed as input for the allocator.
9966
9967     The checks for the completeness of the opcode must have already been
9968     done.
9969
9970     """
9971     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9972
9973     if self.disk_template in constants.DTS_NET_MIRROR:
9974       self.required_nodes = 2
9975     else:
9976       self.required_nodes = 1
9977     request = {
9978       "name": self.name,
9979       "disk_template": self.disk_template,
9980       "tags": self.tags,
9981       "os": self.os,
9982       "vcpus": self.vcpus,
9983       "memory": self.mem_size,
9984       "disks": self.disks,
9985       "disk_space_total": disk_space,
9986       "nics": self.nics,
9987       "required_nodes": self.required_nodes,
9988       }
9989     return request
9990
9991   def _AddRelocateInstance(self):
9992     """Add relocate instance data to allocator structure.
9993
9994     This in combination with _IAllocatorGetClusterData will create the
9995     correct structure needed as input for the allocator.
9996
9997     The checks for the completeness of the opcode must have already been
9998     done.
9999
10000     """
10001     instance = self.cfg.GetInstanceInfo(self.name)
10002     if instance is None:
10003       raise errors.ProgrammerError("Unknown instance '%s' passed to"
10004                                    " IAllocator" % self.name)
10005
10006     if instance.disk_template not in constants.DTS_NET_MIRROR:
10007       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10008                                  errors.ECODE_INVAL)
10009
10010     if len(instance.secondary_nodes) != 1:
10011       raise errors.OpPrereqError("Instance has not exactly one secondary node",
10012                                  errors.ECODE_STATE)
10013
10014     self.required_nodes = 1
10015     disk_sizes = [{'size': disk.size} for disk in instance.disks]
10016     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10017
10018     request = {
10019       "name": self.name,
10020       "disk_space_total": disk_space,
10021       "required_nodes": self.required_nodes,
10022       "relocate_from": self.relocate_from,
10023       }
10024     return request
10025
10026   def _AddEvacuateNodes(self):
10027     """Add evacuate nodes data to allocator structure.
10028
10029     """
10030     request = {
10031       "evac_nodes": self.evac_nodes
10032       }
10033     return request
10034
10035   def _BuildInputData(self, fn):
10036     """Build input data structures.
10037
10038     """
10039     self._ComputeClusterData()
10040
10041     request = fn()
10042     request["type"] = self.mode
10043     self.in_data["request"] = request
10044
10045     self.in_text = serializer.Dump(self.in_data)
10046
10047   def Run(self, name, validate=True, call_fn=None):
10048     """Run an instance allocator and return the results.
10049
10050     """
10051     if call_fn is None:
10052       call_fn = self.rpc.call_iallocator_runner
10053
10054     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10055     result.Raise("Failure while running the iallocator script")
10056
10057     self.out_text = result.payload
10058     if validate:
10059       self._ValidateResult()
10060
10061   def _ValidateResult(self):
10062     """Process the allocator results.
10063
10064     This will process and if successful save the result in
10065     self.out_data and the other parameters.
10066
10067     """
10068     try:
10069       rdict = serializer.Load(self.out_text)
10070     except Exception, err:
10071       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10072
10073     if not isinstance(rdict, dict):
10074       raise errors.OpExecError("Can't parse iallocator results: not a dict")
10075
10076     # TODO: remove backwards compatiblity in later versions
10077     if "nodes" in rdict and "result" not in rdict:
10078       rdict["result"] = rdict["nodes"]
10079       del rdict["nodes"]
10080
10081     for key in "success", "info", "result":
10082       if key not in rdict:
10083         raise errors.OpExecError("Can't parse iallocator results:"
10084                                  " missing key '%s'" % key)
10085       setattr(self, key, rdict[key])
10086
10087     if not isinstance(rdict["result"], list):
10088       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10089                                " is not a list")
10090     self.out_data = rdict
10091
10092
10093 class LUTestAllocator(NoHooksLU):
10094   """Run allocator tests.
10095
10096   This LU runs the allocator tests
10097
10098   """
10099   _OP_PARAMS = [
10100     ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10101     ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10102     ("name", _NoDefault, _TNonEmptyString),
10103     ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10104       _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10105                _TOr(_TNone, _TNonEmptyString))))),
10106     ("disks", _NoDefault, _TOr(_TNone, _TList)),
10107     ("hypervisor", None, _TMaybeString),
10108     ("allocator", None, _TMaybeString),
10109     ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10110     ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10111     ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10112     ("os", None, _TMaybeString),
10113     ("disk_template", None, _TMaybeString),
10114     ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10115     ]
10116
10117   def CheckPrereq(self):
10118     """Check prerequisites.
10119
10120     This checks the opcode parameters depending on the director and mode test.
10121
10122     """
10123     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10124       for attr in ["mem_size", "disks", "disk_template",
10125                    "os", "tags", "nics", "vcpus"]:
10126         if not hasattr(self.op, attr):
10127           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10128                                      attr, errors.ECODE_INVAL)
10129       iname = self.cfg.ExpandInstanceName(self.op.name)
10130       if iname is not None:
10131         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10132                                    iname, errors.ECODE_EXISTS)
10133       if not isinstance(self.op.nics, list):
10134         raise errors.OpPrereqError("Invalid parameter 'nics'",
10135                                    errors.ECODE_INVAL)
10136       if not isinstance(self.op.disks, list):
10137         raise errors.OpPrereqError("Invalid parameter 'disks'",
10138                                    errors.ECODE_INVAL)
10139       for row in self.op.disks:
10140         if (not isinstance(row, dict) or
10141             "size" not in row or
10142             not isinstance(row["size"], int) or
10143             "mode" not in row or
10144             row["mode"] not in ['r', 'w']):
10145           raise errors.OpPrereqError("Invalid contents of the 'disks'"
10146                                      " parameter", errors.ECODE_INVAL)
10147       if self.op.hypervisor is None:
10148         self.op.hypervisor = self.cfg.GetHypervisorType()
10149     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10150       fname = _ExpandInstanceName(self.cfg, self.op.name)
10151       self.op.name = fname
10152       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10153     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10154       if not hasattr(self.op, "evac_nodes"):
10155         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10156                                    " opcode input", errors.ECODE_INVAL)
10157     else:
10158       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10159                                  self.op.mode, errors.ECODE_INVAL)
10160
10161     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10162       if self.op.allocator is None:
10163         raise errors.OpPrereqError("Missing allocator name",
10164                                    errors.ECODE_INVAL)
10165     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10166       raise errors.OpPrereqError("Wrong allocator test '%s'" %
10167                                  self.op.direction, errors.ECODE_INVAL)
10168
10169   def Exec(self, feedback_fn):
10170     """Run the allocator test.
10171
10172     """
10173     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10174       ial = IAllocator(self.cfg, self.rpc,
10175                        mode=self.op.mode,
10176                        name=self.op.name,
10177                        mem_size=self.op.mem_size,
10178                        disks=self.op.disks,
10179                        disk_template=self.op.disk_template,
10180                        os=self.op.os,
10181                        tags=self.op.tags,
10182                        nics=self.op.nics,
10183                        vcpus=self.op.vcpus,
10184                        hypervisor=self.op.hypervisor,
10185                        )
10186     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10187       ial = IAllocator(self.cfg, self.rpc,
10188                        mode=self.op.mode,
10189                        name=self.op.name,
10190                        relocate_from=list(self.relocate_from),
10191                        )
10192     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10193       ial = IAllocator(self.cfg, self.rpc,
10194                        mode=self.op.mode,
10195                        evac_nodes=self.op.evac_nodes)
10196     else:
10197       raise errors.ProgrammerError("Uncatched mode %s in"
10198                                    " LUTestAllocator.Exec", self.op.mode)
10199
10200     if self.op.direction == constants.IALLOCATOR_DIR_IN:
10201       result = ial.in_text
10202     else:
10203       ial.Run(self.op.allocator, validate=False)
10204       result = ial.out_text
10205     return result