code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39
  40 from ganeti import ssh
  41 from ganeti import utils
  42 from ganeti import errors
  43 from ganeti import hypervisor
  44 from ganeti import locking
  45 from ganeti import constants
  46 from ganeti import objects
  47 from ganeti import serializer
  48 from ganeti import ssconf
  49 from ganeti import uidpool
  50 from ganeti import compat
  51 from ganeti import masterd
  52
  53 import ganeti.masterd.instance # pylint: disable-msg=W0611
  54
  55
  56 # Modifiable default values; need to define these here before the
  57 # actual LUs
  58
  59 def _EmptyList():
  60   """Returns an empty list.
  61
  62   """
  63   return []
  64
  65
  66 def _EmptyDict():
  67   """Returns an empty dict.
  68
  69   """
  70   return {}
  71
  72
  73 #: The without-default default value
  74 _NoDefault = object()
  75
  76
  77 #: The no-type (value to complex to check it in the type system)
  78 _NoType = object()
  79
  80
  81 # Some basic types
  82 def _TNotNone(val):
  83   """Checks if the given value is not None.
  84
  85   """
  86   return val is not None
  87
  88
  89 def _TNone(val):
  90   """Checks if the given value is None.
  91
  92   """
  93   return val is None
  94
  95
  96 def _TBool(val):
  97   """Checks if the given value is a boolean.
  98
  99   """
 100   return isinstance(val, bool)
 101
 102
 103 def _TInt(val):
 104   """Checks if the given value is an integer.
 105
 106   """
 107   return isinstance(val, int)
 108
 109
 110 def _TFloat(val):
 111   """Checks if the given value is a float.
 112
 113   """
 114   return isinstance(val, float)
 115
 116
 117 def _TString(val):
 118   """Checks if the given value is a string.
 119
 120   """
 121   return isinstance(val, basestring)
 122
 123
 124 def _TTrue(val):
 125   """Checks if a given value evaluates to a boolean True value.
 126
 127   """
 128   return bool(val)
 129
 130
 131 def _TElemOf(target_list):
 132   """Builds a function that checks if a given value is a member of a list.
 133
 134   """
 135   return lambda val: val in target_list
 136
 137
 138 # Container types
 139 def _TList(val):
 140   """Checks if the given value is a list.
 141
 142   """
 143   return isinstance(val, list)
 144
 145
 146 def _TDict(val):
 147   """Checks if the given value is a dictionary.
 148
 149   """
 150   return isinstance(val, dict)
 151
 152
 153 # Combinator types
 154 def _TAnd(*args):
 155   """Combine multiple functions using an AND operation.
 156
 157   """
 158   def fn(val):
 159     return compat.all(t(val) for t in args)
 160   return fn
 161
 162
 163 def _TOr(*args):
 164   """Combine multiple functions using an AND operation.
 165
 166   """
 167   def fn(val):
 168     return compat.any(t(val) for t in args)
 169   return fn
 170
 171
 172 # Type aliases
 173
 174 #: a non-empty string
 175 _TNonEmptyString = _TAnd(_TString, _TTrue)
 176
 177
 178 #: a maybe non-empty string
 179 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
 180
 181
 182 #: a maybe boolean (bool or none)
 183 _TMaybeBool = _TOr(_TBool, _TNone)
 184
 185
 186 #: a positive integer
 187 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
 188
 189 #: a strictly positive integer
 190 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
 191
 192
 193 def _TListOf(my_type):
 194   """Checks if a given value is a list with all elements of the same type.
 195
 196   """
 197   return _TAnd(_TList,
 198                lambda lst: compat.all(my_type(v) for v in lst))
 199
 200
 201 def _TDictOf(key_type, val_type):
 202   """Checks a dict type for the type of its key/values.
 203
 204   """
 205   return _TAnd(_TDict,
 206                lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
 207                                 and compat.all(val_type(v)
 208                                                for v in my_dict.values())))
 209
 210
 211 # Common opcode attributes
 212
 213 #: output fields for a query operation
 214 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
 215
 216
 217 #: the shutdown timeout
 218 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
 219                      _TPositiveInt)
 220
 221 #: the force parameter
 222 _PForce = ("force", False, _TBool)
 223
 224 #: a required instance name (for single-instance LUs)
 225 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
 226
 227
 228 #: a required node name (for single-node LUs)
 229 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
 230
 231
 232 # End types
 233 class LogicalUnit(object):
 234   """Logical Unit base class.
 235
 236   Subclasses must follow these rules:
 237     - implement ExpandNames
 238     - implement CheckPrereq (except when tasklets are used)
 239     - implement Exec (except when tasklets are used)
 240     - implement BuildHooksEnv
 241     - redefine HPATH and HTYPE
 242     - optionally redefine their run requirements:
 243         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 244
 245   Note that all commands require root permissions.
 246
 247   @ivar dry_run_result: the value (if any) that will be returned to the caller
 248       in dry-run mode (signalled by opcode dry_run parameter)
 249   @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
 250       they should get if not already defined, and types they must match
 251
 252   """
 253   HPATH = None
 254   HTYPE = None
 255   _OP_PARAMS = []
 256   REQ_BGL = True
 257
 258   def __init__(self, processor, op, context, rpc):
 259     """Constructor for LogicalUnit.
 260
 261     This needs to be overridden in derived classes in order to check op
 262     validity.
 263
 264     """
 265     self.proc = processor
 266     self.op = op
 267     self.cfg = context.cfg
 268     self.context = context
 269     self.rpc = rpc
 270     # Dicts used to declare locking needs to mcpu
 271     self.needed_locks = None
 272     self.acquired_locks = {}
 273     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 274     self.add_locks = {}
 275     self.remove_locks = {}
 276     # Used to force good behavior when calling helper functions
 277     self.recalculate_locks = {}
 278     self.__ssh = None
 279     # logging
 280     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 281     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 282     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 283     # support for dry-run
 284     self.dry_run_result = None
 285     # support for generic debug attribute
 286     if (not hasattr(self.op, "debug_level") or
 287         not isinstance(self.op.debug_level, int)):
 288       self.op.debug_level = 0
 289
 290     # Tasklets
 291     self.tasklets = None
 292
 293     # The new kind-of-type-system
 294     op_id = self.op.OP_ID
 295     for attr_name, aval, test in self._OP_PARAMS:
 296       if not hasattr(op, attr_name):
 297         if aval == _NoDefault:
 298           raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
 299                                      (op_id, attr_name), errors.ECODE_INVAL)
 300         else:
 301           if callable(aval):
 302             dval = aval()
 303           else:
 304             dval = aval
 305           setattr(self.op, attr_name, dval)
 306       attr_val = getattr(op, attr_name)
 307       if test == _NoType:
 308         # no tests here
 309         continue
 310       if not callable(test):
 311         raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
 312                                      " given type is not a proper type (%s)" %
 313                                      (op_id, attr_name, test))
 314       if not test(attr_val):
 315         logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
 316                       self.op.OP_ID, attr_name, type(attr_val), attr_val)
 317         raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
 318                                    (op_id, attr_name), errors.ECODE_INVAL)
 319
 320     self.CheckArguments()
 321
 322   def __GetSSH(self):
 323     """Returns the SshRunner object
 324
 325     """
 326     if not self.__ssh:
 327       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 328     return self.__ssh
 329
 330   ssh = property(fget=__GetSSH)
 331
 332   def CheckArguments(self):
 333     """Check syntactic validity for the opcode arguments.
 334
 335     This method is for doing a simple syntactic check and ensure
 336     validity of opcode parameters, without any cluster-related
 337     checks. While the same can be accomplished in ExpandNames and/or
 338     CheckPrereq, doing these separate is better because:
 339
 340       - ExpandNames is left as as purely a lock-related function
 341       - CheckPrereq is run after we have acquired locks (and possible
 342         waited for them)
 343
 344     The function is allowed to change the self.op attribute so that
 345     later methods can no longer worry about missing parameters.
 346
 347     """
 348     pass
 349
 350   def ExpandNames(self):
 351     """Expand names for this LU.
 352
 353     This method is called before starting to execute the opcode, and it should
 354     update all the parameters of the opcode to their canonical form (e.g. a
 355     short node name must be fully expanded after this method has successfully
 356     completed). This way locking, hooks, logging, ecc. can work correctly.
 357
 358     LUs which implement this method must also populate the self.needed_locks
 359     member, as a dict with lock levels as keys, and a list of needed lock names
 360     as values. Rules:
 361
 362       - use an empty dict if you don't need any lock
 363       - if you don't need any lock at a particular level omit that level
 364       - don't put anything for the BGL level
 365       - if you want all locks at a level use locking.ALL_SET as a value
 366
 367     If you need to share locks (rather than acquire them exclusively) at one
 368     level you can modify self.share_locks, setting a true value (usually 1) for
 369     that level. By default locks are not shared.
 370
 371     This function can also define a list of tasklets, which then will be
 372     executed in order instead of the usual LU-level CheckPrereq and Exec
 373     functions, if those are not defined by the LU.
 374
 375     Examples::
 376
 377       # Acquire all nodes and one instance
 378       self.needed_locks = {
 379         locking.LEVEL_NODE: locking.ALL_SET,
 380         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 381       }
 382       # Acquire just two nodes
 383       self.needed_locks = {
 384         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 385       }
 386       # Acquire no locks
 387       self.needed_locks = {} # No, you can't leave it to the default value None
 388
 389     """
 390     # The implementation of this method is mandatory only if the new LU is
 391     # concurrent, so that old LUs don't need to be changed all at the same
 392     # time.
 393     if self.REQ_BGL:
 394       self.needed_locks = {} # Exclusive LUs don't need locks.
 395     else:
 396       raise NotImplementedError
 397
 398   def DeclareLocks(self, level):
 399     """Declare LU locking needs for a level
 400
 401     While most LUs can just declare their locking needs at ExpandNames time,
 402     sometimes there's the need to calculate some locks after having acquired
 403     the ones before. This function is called just before acquiring locks at a
 404     particular level, but after acquiring the ones at lower levels, and permits
 405     such calculations. It can be used to modify self.needed_locks, and by
 406     default it does nothing.
 407
 408     This function is only called if you have something already set in
 409     self.needed_locks for the level.
 410
 411     @param level: Locking level which is going to be locked
 412     @type level: member of ganeti.locking.LEVELS
 413
 414     """
 415
 416   def CheckPrereq(self):
 417     """Check prerequisites for this LU.
 418
 419     This method should check that the prerequisites for the execution
 420     of this LU are fulfilled. It can do internode communication, but
 421     it should be idempotent - no cluster or system changes are
 422     allowed.
 423
 424     The method should raise errors.OpPrereqError in case something is
 425     not fulfilled. Its return value is ignored.
 426
 427     This method should also update all the parameters of the opcode to
 428     their canonical form if it hasn't been done by ExpandNames before.
 429
 430     """
 431     if self.tasklets is not None:
 432       for (idx, tl) in enumerate(self.tasklets):
 433         logging.debug("Checking prerequisites for tasklet %s/%s",
 434                       idx + 1, len(self.tasklets))
 435         tl.CheckPrereq()
 436     else:
 437       pass
 438
 439   def Exec(self, feedback_fn):
 440     """Execute the LU.
 441
 442     This method should implement the actual work. It should raise
 443     errors.OpExecError for failures that are somewhat dealt with in
 444     code, or expected.
 445
 446     """
 447     if self.tasklets is not None:
 448       for (idx, tl) in enumerate(self.tasklets):
 449         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 450         tl.Exec(feedback_fn)
 451     else:
 452       raise NotImplementedError
 453
 454   def BuildHooksEnv(self):
 455     """Build hooks environment for this LU.
 456
 457     This method should return a three-node tuple consisting of: a dict
 458     containing the environment that will be used for running the
 459     specific hook for this LU, a list of node names on which the hook
 460     should run before the execution, and a list of node names on which
 461     the hook should run after the execution.
 462
 463     The keys of the dict must not have 'GANETI_' prefixed as this will
 464     be handled in the hooks runner. Also note additional keys will be
 465     added by the hooks runner. If the LU doesn't define any
 466     environment, an empty dict (and not None) should be returned.
 467
 468     No nodes should be returned as an empty list (and not None).
 469
 470     Note that if the HPATH for a LU class is None, this function will
 471     not be called.
 472
 473     """
 474     raise NotImplementedError
 475
 476   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 477     """Notify the LU about the results of its hooks.
 478
 479     This method is called every time a hooks phase is executed, and notifies
 480     the Logical Unit about the hooks' result. The LU can then use it to alter
 481     its result based on the hooks.  By default the method does nothing and the
 482     previous result is passed back unchanged but any LU can define it if it
 483     wants to use the local cluster hook-scripts somehow.
 484
 485     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 486         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 487     @param hook_results: the results of the multi-node hooks rpc call
 488     @param feedback_fn: function used send feedback back to the caller
 489     @param lu_result: the previous Exec result this LU had, or None
 490         in the PRE phase
 491     @return: the new Exec result, based on the previous result
 492         and hook results
 493
 494     """
 495     # API must be kept, thus we ignore the unused argument and could
 496     # be a function warnings
 497     # pylint: disable-msg=W0613,R0201
 498     return lu_result
 499
 500   def _ExpandAndLockInstance(self):
 501     """Helper function to expand and lock an instance.
 502
 503     Many LUs that work on an instance take its name in self.op.instance_name
 504     and need to expand it and then declare the expanded name for locking. This
 505     function does it, and then updates self.op.instance_name to the expanded
 506     name. It also initializes needed_locks as a dict, if this hasn't been done
 507     before.
 508
 509     """
 510     if self.needed_locks is None:
 511       self.needed_locks = {}
 512     else:
 513       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 514         "_ExpandAndLockInstance called with instance-level locks set"
 515     self.op.instance_name = _ExpandInstanceName(self.cfg,
 516                                                 self.op.instance_name)
 517     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 518
 519   def _LockInstancesNodes(self, primary_only=False):
 520     """Helper function to declare instances' nodes for locking.
 521
 522     This function should be called after locking one or more instances to lock
 523     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 524     with all primary or secondary nodes for instances already locked and
 525     present in self.needed_locks[locking.LEVEL_INSTANCE].
 526
 527     It should be called from DeclareLocks, and for safety only works if
 528     self.recalculate_locks[locking.LEVEL_NODE] is set.
 529
 530     In the future it may grow parameters to just lock some instance's nodes, or
 531     to just lock primaries or secondary nodes, if needed.
 532
 533     If should be called in DeclareLocks in a way similar to::
 534
 535       if level == locking.LEVEL_NODE:
 536         self._LockInstancesNodes()
 537
 538     @type primary_only: boolean
 539     @param primary_only: only lock primary nodes of locked instances
 540
 541     """
 542     assert locking.LEVEL_NODE in self.recalculate_locks, \
 543       "_LockInstancesNodes helper function called with no nodes to recalculate"
 544
 545     # TODO: check if we're really been called with the instance locks held
 546
 547     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 548     # future we might want to have different behaviors depending on the value
 549     # of self.recalculate_locks[locking.LEVEL_NODE]
 550     wanted_nodes = []
 551     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 552       instance = self.context.cfg.GetInstanceInfo(instance_name)
 553       wanted_nodes.append(instance.primary_node)
 554       if not primary_only:
 555         wanted_nodes.extend(instance.secondary_nodes)
 556
 557     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 558       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 559     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 560       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 561
 562     del self.recalculate_locks[locking.LEVEL_NODE]
 563
 564
 565 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 566   """Simple LU which runs no hooks.
 567
 568   This LU is intended as a parent for other LogicalUnits which will
 569   run no hooks, in order to reduce duplicate code.
 570
 571   """
 572   HPATH = None
 573   HTYPE = None
 574
 575   def BuildHooksEnv(self):
 576     """Empty BuildHooksEnv for NoHooksLu.
 577
 578     This just raises an error.
 579
 580     """
 581     assert False, "BuildHooksEnv called for NoHooksLUs"
 582
 583
 584 class Tasklet:
 585   """Tasklet base class.
 586
 587   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 588   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 589   tasklets know nothing about locks.
 590
 591   Subclasses must follow these rules:
 592     - Implement CheckPrereq
 593     - Implement Exec
 594
 595   """
 596   def __init__(self, lu):
 597     self.lu = lu
 598
 599     # Shortcuts
 600     self.cfg = lu.cfg
 601     self.rpc = lu.rpc
 602
 603   def CheckPrereq(self):
 604     """Check prerequisites for this tasklets.
 605
 606     This method should check whether the prerequisites for the execution of
 607     this tasklet are fulfilled. It can do internode communication, but it
 608     should be idempotent - no cluster or system changes are allowed.
 609
 610     The method should raise errors.OpPrereqError in case something is not
 611     fulfilled. Its return value is ignored.
 612
 613     This method should also update all parameters to their canonical form if it
 614     hasn't been done before.
 615
 616     """
 617     pass
 618
 619   def Exec(self, feedback_fn):
 620     """Execute the tasklet.
 621
 622     This method should implement the actual work. It should raise
 623     errors.OpExecError for failures that are somewhat dealt with in code, or
 624     expected.
 625
 626     """
 627     raise NotImplementedError
 628
 629
 630 def _GetWantedNodes(lu, nodes):
 631   """Returns list of checked and expanded node names.
 632
 633   @type lu: L{LogicalUnit}
 634   @param lu: the logical unit on whose behalf we execute
 635   @type nodes: list
 636   @param nodes: list of node names or None for all nodes
 637   @rtype: list
 638   @return: the list of nodes, sorted
 639   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 640
 641   """
 642   if not nodes:
 643     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 644       " non-empty list of nodes whose name is to be expanded.")
 645
 646   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 647   return utils.NiceSort(wanted)
 648
 649
 650 def _GetWantedInstances(lu, instances):
 651   """Returns list of checked and expanded instance names.
 652
 653   @type lu: L{LogicalUnit}
 654   @param lu: the logical unit on whose behalf we execute
 655   @type instances: list
 656   @param instances: list of instance names or None for all instances
 657   @rtype: list
 658   @return: the list of instances, sorted
 659   @raise errors.OpPrereqError: if the instances parameter is wrong type
 660   @raise errors.OpPrereqError: if any of the passed instances is not found
 661
 662   """
 663   if instances:
 664     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 665   else:
 666     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 667   return wanted
 668
 669
 670 def _GetUpdatedParams(old_params, update_dict,
 671                       use_default=True, use_none=False):
 672   """Return the new version of a parameter dictionary.
 673
 674   @type old_params: dict
 675   @param old_params: old parameters
 676   @type update_dict: dict
 677   @param update_dict: dict containing new parameter values, or
 678       constants.VALUE_DEFAULT to reset the parameter to its default
 679       value
 680   @param use_default: boolean
 681   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 682       values as 'to be deleted' values
 683   @param use_none: boolean
 684   @type use_none: whether to recognise C{None} values as 'to be
 685       deleted' values
 686   @rtype: dict
 687   @return: the new parameter dictionary
 688
 689   """
 690   params_copy = copy.deepcopy(old_params)
 691   for key, val in update_dict.iteritems():
 692     if ((use_default and val == constants.VALUE_DEFAULT) or
 693         (use_none and val is None)):
 694       try:
 695         del params_copy[key]
 696       except KeyError:
 697         pass
 698     else:
 699       params_copy[key] = val
 700   return params_copy
 701
 702
 703 def _CheckOutputFields(static, dynamic, selected):
 704   """Checks whether all selected fields are valid.
 705
 706   @type static: L{utils.FieldSet}
 707   @param static: static fields set
 708   @type dynamic: L{utils.FieldSet}
 709   @param dynamic: dynamic fields set
 710
 711   """
 712   f = utils.FieldSet()
 713   f.Extend(static)
 714   f.Extend(dynamic)
 715
 716   delta = f.NonMatching(selected)
 717   if delta:
 718     raise errors.OpPrereqError("Unknown output fields selected: %s"
 719                                % ",".join(delta), errors.ECODE_INVAL)
 720
 721
 722 def _CheckGlobalHvParams(params):
 723   """Validates that given hypervisor params are not global ones.
 724
 725   This will ensure that instances don't get customised versions of
 726   global params.
 727
 728   """
 729   used_globals = constants.HVC_GLOBALS.intersection(params)
 730   if used_globals:
 731     msg = ("The following hypervisor parameters are global and cannot"
 732            " be customized at instance level, please modify them at"
 733            " cluster level: %s" % utils.CommaJoin(used_globals))
 734     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 735
 736
 737 def _CheckNodeOnline(lu, node):
 738   """Ensure that a given node is online.
 739
 740   @param lu: the LU on behalf of which we make the check
 741   @param node: the node to check
 742   @raise errors.OpPrereqError: if the node is offline
 743
 744   """
 745   if lu.cfg.GetNodeInfo(node).offline:
 746     raise errors.OpPrereqError("Can't use offline node %s" % node,
 747                                errors.ECODE_INVAL)
 748
 749
 750 def _CheckNodeNotDrained(lu, node):
 751   """Ensure that a given node is not drained.
 752
 753   @param lu: the LU on behalf of which we make the check
 754   @param node: the node to check
 755   @raise errors.OpPrereqError: if the node is drained
 756
 757   """
 758   if lu.cfg.GetNodeInfo(node).drained:
 759     raise errors.OpPrereqError("Can't use drained node %s" % node,
 760                                errors.ECODE_INVAL)
 761
 762
 763 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 764   """Ensure that a node supports a given OS.
 765
 766   @param lu: the LU on behalf of which we make the check
 767   @param node: the node to check
 768   @param os_name: the OS to query about
 769   @param force_variant: whether to ignore variant errors
 770   @raise errors.OpPrereqError: if the node is not supporting the OS
 771
 772   """
 773   result = lu.rpc.call_os_get(node, os_name)
 774   result.Raise("OS '%s' not in supported OS list for node %s" %
 775                (os_name, node),
 776                prereq=True, ecode=errors.ECODE_INVAL)
 777   if not force_variant:
 778     _CheckOSVariant(result.payload, os_name)
 779
 780
 781 def _RequireFileStorage():
 782   """Checks that file storage is enabled.
 783
 784   @raise errors.OpPrereqError: when file storage is disabled
 785
 786   """
 787   if not constants.ENABLE_FILE_STORAGE:
 788     raise errors.OpPrereqError("File storage disabled at configure time",
 789                                errors.ECODE_INVAL)
 790
 791
 792 def _CheckDiskTemplate(template):
 793   """Ensure a given disk template is valid.
 794
 795   """
 796   if template not in constants.DISK_TEMPLATES:
 797     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 798            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 799     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 800   if template == constants.DT_FILE:
 801     _RequireFileStorage()
 802   return True
 803
 804
 805 def _CheckStorageType(storage_type):
 806   """Ensure a given storage type is valid.
 807
 808   """
 809   if storage_type not in constants.VALID_STORAGE_TYPES:
 810     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 811                                errors.ECODE_INVAL)
 812   if storage_type == constants.ST_FILE:
 813     _RequireFileStorage()
 814   return True
 815
 816
 817 def _GetClusterDomainSecret():
 818   """Reads the cluster domain secret.
 819
 820   """
 821   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 822                                strict=True)
 823
 824
 825 def _CheckInstanceDown(lu, instance, reason):
 826   """Ensure that an instance is not running."""
 827   if instance.admin_up:
 828     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 829                                (instance.name, reason), errors.ECODE_STATE)
 830
 831   pnode = instance.primary_node
 832   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 833   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 834               prereq=True, ecode=errors.ECODE_ENVIRON)
 835
 836   if instance.name in ins_l.payload:
 837     raise errors.OpPrereqError("Instance %s is running, %s" %
 838                                (instance.name, reason), errors.ECODE_STATE)
 839
 840
 841 def _ExpandItemName(fn, name, kind):
 842   """Expand an item name.
 843
 844   @param fn: the function to use for expansion
 845   @param name: requested item name
 846   @param kind: text description ('Node' or 'Instance')
 847   @return: the resolved (full) name
 848   @raise errors.OpPrereqError: if the item is not found
 849
 850   """
 851   full_name = fn(name)
 852   if full_name is None:
 853     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 854                                errors.ECODE_NOENT)
 855   return full_name
 856
 857
 858 def _ExpandNodeName(cfg, name):
 859   """Wrapper over L{_ExpandItemName} for nodes."""
 860   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 861
 862
 863 def _ExpandInstanceName(cfg, name):
 864   """Wrapper over L{_ExpandItemName} for instance."""
 865   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 866
 867
 868 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 869                           memory, vcpus, nics, disk_template, disks,
 870                           bep, hvp, hypervisor_name):
 871   """Builds instance related env variables for hooks
 872
 873   This builds the hook environment from individual variables.
 874
 875   @type name: string
 876   @param name: the name of the instance
 877   @type primary_node: string
 878   @param primary_node: the name of the instance's primary node
 879   @type secondary_nodes: list
 880   @param secondary_nodes: list of secondary nodes as strings
 881   @type os_type: string
 882   @param os_type: the name of the instance's OS
 883   @type status: boolean
 884   @param status: the should_run status of the instance
 885   @type memory: string
 886   @param memory: the memory size of the instance
 887   @type vcpus: string
 888   @param vcpus: the count of VCPUs the instance has
 889   @type nics: list
 890   @param nics: list of tuples (ip, mac, mode, link) representing
 891       the NICs the instance has
 892   @type disk_template: string
 893   @param disk_template: the disk template of the instance
 894   @type disks: list
 895   @param disks: the list of (size, mode) pairs
 896   @type bep: dict
 897   @param bep: the backend parameters for the instance
 898   @type hvp: dict
 899   @param hvp: the hypervisor parameters for the instance
 900   @type hypervisor_name: string
 901   @param hypervisor_name: the hypervisor for the instance
 902   @rtype: dict
 903   @return: the hook environment for this instance
 904
 905   """
 906   if status:
 907     str_status = "up"
 908   else:
 909     str_status = "down"
 910   env = {
 911     "OP_TARGET": name,
 912     "INSTANCE_NAME": name,
 913     "INSTANCE_PRIMARY": primary_node,
 914     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 915     "INSTANCE_OS_TYPE": os_type,
 916     "INSTANCE_STATUS": str_status,
 917     "INSTANCE_MEMORY": memory,
 918     "INSTANCE_VCPUS": vcpus,
 919     "INSTANCE_DISK_TEMPLATE": disk_template,
 920     "INSTANCE_HYPERVISOR": hypervisor_name,
 921   }
 922
 923   if nics:
 924     nic_count = len(nics)
 925     for idx, (ip, mac, mode, link) in enumerate(nics):
 926       if ip is None:
 927         ip = ""
 928       env["INSTANCE_NIC%d_IP" % idx] = ip
 929       env["INSTANCE_NIC%d_MAC" % idx] = mac
 930       env["INSTANCE_NIC%d_MODE" % idx] = mode
 931       env["INSTANCE_NIC%d_LINK" % idx] = link
 932       if mode == constants.NIC_MODE_BRIDGED:
 933         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 934   else:
 935     nic_count = 0
 936
 937   env["INSTANCE_NIC_COUNT"] = nic_count
 938
 939   if disks:
 940     disk_count = len(disks)
 941     for idx, (size, mode) in enumerate(disks):
 942       env["INSTANCE_DISK%d_SIZE" % idx] = size
 943       env["INSTANCE_DISK%d_MODE" % idx] = mode
 944   else:
 945     disk_count = 0
 946
 947   env["INSTANCE_DISK_COUNT"] = disk_count
 948
 949   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 950     for key, value in source.items():
 951       env["INSTANCE_%s_%s" % (kind, key)] = value
 952
 953   return env
 954
 955
 956 def _NICListToTuple(lu, nics):
 957   """Build a list of nic information tuples.
 958
 959   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 960   value in LUQueryInstanceData.
 961
 962   @type lu:  L{LogicalUnit}
 963   @param lu: the logical unit on whose behalf we execute
 964   @type nics: list of L{objects.NIC}
 965   @param nics: list of nics to convert to hooks tuples
 966
 967   """
 968   hooks_nics = []
 969   cluster = lu.cfg.GetClusterInfo()
 970   for nic in nics:
 971     ip = nic.ip
 972     mac = nic.mac
 973     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 974     mode = filled_params[constants.NIC_MODE]
 975     link = filled_params[constants.NIC_LINK]
 976     hooks_nics.append((ip, mac, mode, link))
 977   return hooks_nics
 978
 979
 980 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 981   """Builds instance related env variables for hooks from an object.
 982
 983   @type lu: L{LogicalUnit}
 984   @param lu: the logical unit on whose behalf we execute
 985   @type instance: L{objects.Instance}
 986   @param instance: the instance for which we should build the
 987       environment
 988   @type override: dict
 989   @param override: dictionary with key/values that will override
 990       our values
 991   @rtype: dict
 992   @return: the hook environment dictionary
 993
 994   """
 995   cluster = lu.cfg.GetClusterInfo()
 996   bep = cluster.FillBE(instance)
 997   hvp = cluster.FillHV(instance)
 998   args = {
 999     'name': instance.name,
1000     'primary_node': instance.primary_node,
1001     'secondary_nodes': instance.secondary_nodes,
1002     'os_type': instance.os,
1003     'status': instance.admin_up,
1004     'memory': bep[constants.BE_MEMORY],
1005     'vcpus': bep[constants.BE_VCPUS],
1006     'nics': _NICListToTuple(lu, instance.nics),
1007     'disk_template': instance.disk_template,
1008     'disks': [(disk.size, disk.mode) for disk in instance.disks],
1009     'bep': bep,
1010     'hvp': hvp,
1011     'hypervisor_name': instance.hypervisor,
1012   }
1013   if override:
1014     args.update(override)
1015   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1016
1017
1018 def _AdjustCandidatePool(lu, exceptions):
1019   """Adjust the candidate pool after node operations.
1020
1021   """
1022   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1023   if mod_list:
1024     lu.LogInfo("Promoted nodes to master candidate role: %s",
1025                utils.CommaJoin(node.name for node in mod_list))
1026     for name in mod_list:
1027       lu.context.ReaddNode(name)
1028   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1029   if mc_now > mc_max:
1030     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1031                (mc_now, mc_max))
1032
1033
1034 def _DecideSelfPromotion(lu, exceptions=None):
1035   """Decide whether I should promote myself as a master candidate.
1036
1037   """
1038   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1039   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1040   # the new node will increase mc_max with one, so:
1041   mc_should = min(mc_should + 1, cp_size)
1042   return mc_now < mc_should
1043
1044
1045 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1046   """Check that the brigdes needed by a list of nics exist.
1047
1048   """
1049   cluster = lu.cfg.GetClusterInfo()
1050   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1051   brlist = [params[constants.NIC_LINK] for params in paramslist
1052             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1053   if brlist:
1054     result = lu.rpc.call_bridges_exist(target_node, brlist)
1055     result.Raise("Error checking bridges on destination node '%s'" %
1056                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1057
1058
1059 def _CheckInstanceBridgesExist(lu, instance, node=None):
1060   """Check that the brigdes needed by an instance exist.
1061
1062   """
1063   if node is None:
1064     node = instance.primary_node
1065   _CheckNicsBridgesExist(lu, instance.nics, node)
1066
1067
1068 def _CheckOSVariant(os_obj, name):
1069   """Check whether an OS name conforms to the os variants specification.
1070
1071   @type os_obj: L{objects.OS}
1072   @param os_obj: OS object to check
1073   @type name: string
1074   @param name: OS name passed by the user, to check for validity
1075
1076   """
1077   if not os_obj.supported_variants:
1078     return
1079   try:
1080     variant = name.split("+", 1)[1]
1081   except IndexError:
1082     raise errors.OpPrereqError("OS name must include a variant",
1083                                errors.ECODE_INVAL)
1084
1085   if variant not in os_obj.supported_variants:
1086     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1087
1088
1089 def _GetNodeInstancesInner(cfg, fn):
1090   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1091
1092
1093 def _GetNodeInstances(cfg, node_name):
1094   """Returns a list of all primary and secondary instances on a node.
1095
1096   """
1097
1098   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1099
1100
1101 def _GetNodePrimaryInstances(cfg, node_name):
1102   """Returns primary instances on a node.
1103
1104   """
1105   return _GetNodeInstancesInner(cfg,
1106                                 lambda inst: node_name == inst.primary_node)
1107
1108
1109 def _GetNodeSecondaryInstances(cfg, node_name):
1110   """Returns secondary instances on a node.
1111
1112   """
1113   return _GetNodeInstancesInner(cfg,
1114                                 lambda inst: node_name in inst.secondary_nodes)
1115
1116
1117 def _GetStorageTypeArgs(cfg, storage_type):
1118   """Returns the arguments for a storage type.
1119
1120   """
1121   # Special case for file storage
1122   if storage_type == constants.ST_FILE:
1123     # storage.FileStorage wants a list of storage directories
1124     return [[cfg.GetFileStorageDir()]]
1125
1126   return []
1127
1128
1129 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1130   faulty = []
1131
1132   for dev in instance.disks:
1133     cfg.SetDiskID(dev, node_name)
1134
1135   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1136   result.Raise("Failed to get disk status from node %s" % node_name,
1137                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1138
1139   for idx, bdev_status in enumerate(result.payload):
1140     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1141       faulty.append(idx)
1142
1143   return faulty
1144
1145
1146 class LUPostInitCluster(LogicalUnit):
1147   """Logical unit for running hooks after cluster initialization.
1148
1149   """
1150   HPATH = "cluster-init"
1151   HTYPE = constants.HTYPE_CLUSTER
1152
1153   def BuildHooksEnv(self):
1154     """Build hooks env.
1155
1156     """
1157     env = {"OP_TARGET": self.cfg.GetClusterName()}
1158     mn = self.cfg.GetMasterNode()
1159     return env, [], [mn]
1160
1161   def Exec(self, feedback_fn):
1162     """Nothing to do.
1163
1164     """
1165     return True
1166
1167
1168 class LUDestroyCluster(LogicalUnit):
1169   """Logical unit for destroying the cluster.
1170
1171   """
1172   HPATH = "cluster-destroy"
1173   HTYPE = constants.HTYPE_CLUSTER
1174
1175   def BuildHooksEnv(self):
1176     """Build hooks env.
1177
1178     """
1179     env = {"OP_TARGET": self.cfg.GetClusterName()}
1180     return env, [], []
1181
1182   def CheckPrereq(self):
1183     """Check prerequisites.
1184
1185     This checks whether the cluster is empty.
1186
1187     Any errors are signaled by raising errors.OpPrereqError.
1188
1189     """
1190     master = self.cfg.GetMasterNode()
1191
1192     nodelist = self.cfg.GetNodeList()
1193     if len(nodelist) != 1 or nodelist[0] != master:
1194       raise errors.OpPrereqError("There are still %d node(s) in"
1195                                  " this cluster." % (len(nodelist) - 1),
1196                                  errors.ECODE_INVAL)
1197     instancelist = self.cfg.GetInstanceList()
1198     if instancelist:
1199       raise errors.OpPrereqError("There are still %d instance(s) in"
1200                                  " this cluster." % len(instancelist),
1201                                  errors.ECODE_INVAL)
1202
1203   def Exec(self, feedback_fn):
1204     """Destroys the cluster.
1205
1206     """
1207     master = self.cfg.GetMasterNode()
1208     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1209
1210     # Run post hooks on master node before it's removed
1211     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1212     try:
1213       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1214     except:
1215       # pylint: disable-msg=W0702
1216       self.LogWarning("Errors occurred running hooks on %s" % master)
1217
1218     result = self.rpc.call_node_stop_master(master, False)
1219     result.Raise("Could not disable the master role")
1220
1221     if modify_ssh_setup:
1222       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1223       utils.CreateBackup(priv_key)
1224       utils.CreateBackup(pub_key)
1225
1226     return master
1227
1228
1229 def _VerifyCertificate(filename):
1230   """Verifies a certificate for LUVerifyCluster.
1231
1232   @type filename: string
1233   @param filename: Path to PEM file
1234
1235   """
1236   try:
1237     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1238                                            utils.ReadFile(filename))
1239   except Exception, err: # pylint: disable-msg=W0703
1240     return (LUVerifyCluster.ETYPE_ERROR,
1241             "Failed to load X509 certificate %s: %s" % (filename, err))
1242
1243   (errcode, msg) = \
1244     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1245                                 constants.SSL_CERT_EXPIRATION_ERROR)
1246
1247   if msg:
1248     fnamemsg = "While verifying %s: %s" % (filename, msg)
1249   else:
1250     fnamemsg = None
1251
1252   if errcode is None:
1253     return (None, fnamemsg)
1254   elif errcode == utils.CERT_WARNING:
1255     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1256   elif errcode == utils.CERT_ERROR:
1257     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1258
1259   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1260
1261
1262 class LUVerifyCluster(LogicalUnit):
1263   """Verifies the cluster status.
1264
1265   """
1266   HPATH = "cluster-verify"
1267   HTYPE = constants.HTYPE_CLUSTER
1268   _OP_PARAMS = [
1269     ("skip_checks", _EmptyList,
1270      _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1271     ("verbose", False, _TBool),
1272     ("error_codes", False, _TBool),
1273     ("debug_simulate_errors", False, _TBool),
1274     ]
1275   REQ_BGL = False
1276
1277   TCLUSTER = "cluster"
1278   TNODE = "node"
1279   TINSTANCE = "instance"
1280
1281   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1282   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1283   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1284   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1285   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1286   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1287   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1288   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1289   ENODEDRBD = (TNODE, "ENODEDRBD")
1290   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1291   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1292   ENODEHV = (TNODE, "ENODEHV")
1293   ENODELVM = (TNODE, "ENODELVM")
1294   ENODEN1 = (TNODE, "ENODEN1")
1295   ENODENET = (TNODE, "ENODENET")
1296   ENODEOS = (TNODE, "ENODEOS")
1297   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1298   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1299   ENODERPC = (TNODE, "ENODERPC")
1300   ENODESSH = (TNODE, "ENODESSH")
1301   ENODEVERSION = (TNODE, "ENODEVERSION")
1302   ENODESETUP = (TNODE, "ENODESETUP")
1303   ENODETIME = (TNODE, "ENODETIME")
1304
1305   ETYPE_FIELD = "code"
1306   ETYPE_ERROR = "ERROR"
1307   ETYPE_WARNING = "WARNING"
1308
1309   class NodeImage(object):
1310     """A class representing the logical and physical status of a node.
1311
1312     @type name: string
1313     @ivar name: the node name to which this object refers
1314     @ivar volumes: a structure as returned from
1315         L{ganeti.backend.GetVolumeList} (runtime)
1316     @ivar instances: a list of running instances (runtime)
1317     @ivar pinst: list of configured primary instances (config)
1318     @ivar sinst: list of configured secondary instances (config)
1319     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1320         of this node (config)
1321     @ivar mfree: free memory, as reported by hypervisor (runtime)
1322     @ivar dfree: free disk, as reported by the node (runtime)
1323     @ivar offline: the offline status (config)
1324     @type rpc_fail: boolean
1325     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1326         not whether the individual keys were correct) (runtime)
1327     @type lvm_fail: boolean
1328     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1329     @type hyp_fail: boolean
1330     @ivar hyp_fail: whether the RPC call didn't return the instance list
1331     @type ghost: boolean
1332     @ivar ghost: whether this is a known node or not (config)
1333     @type os_fail: boolean
1334     @ivar os_fail: whether the RPC call didn't return valid OS data
1335     @type oslist: list
1336     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1337
1338     """
1339     def __init__(self, offline=False, name=None):
1340       self.name = name
1341       self.volumes = {}
1342       self.instances = []
1343       self.pinst = []
1344       self.sinst = []
1345       self.sbp = {}
1346       self.mfree = 0
1347       self.dfree = 0
1348       self.offline = offline
1349       self.rpc_fail = False
1350       self.lvm_fail = False
1351       self.hyp_fail = False
1352       self.ghost = False
1353       self.os_fail = False
1354       self.oslist = {}
1355
1356   def ExpandNames(self):
1357     self.needed_locks = {
1358       locking.LEVEL_NODE: locking.ALL_SET,
1359       locking.LEVEL_INSTANCE: locking.ALL_SET,
1360     }
1361     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1362
1363   def _Error(self, ecode, item, msg, *args, **kwargs):
1364     """Format an error message.
1365
1366     Based on the opcode's error_codes parameter, either format a
1367     parseable error code, or a simpler error string.
1368
1369     This must be called only from Exec and functions called from Exec.
1370
1371     """
1372     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1373     itype, etxt = ecode
1374     # first complete the msg
1375     if args:
1376       msg = msg % args
1377     # then format the whole message
1378     if self.op.error_codes:
1379       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1380     else:
1381       if item:
1382         item = " " + item
1383       else:
1384         item = ""
1385       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1386     # and finally report it via the feedback_fn
1387     self._feedback_fn("  - %s" % msg)
1388
1389   def _ErrorIf(self, cond, *args, **kwargs):
1390     """Log an error message if the passed condition is True.
1391
1392     """
1393     cond = bool(cond) or self.op.debug_simulate_errors
1394     if cond:
1395       self._Error(*args, **kwargs)
1396     # do not mark the operation as failed for WARN cases only
1397     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1398       self.bad = self.bad or cond
1399
1400   def _VerifyNode(self, ninfo, nresult):
1401     """Run multiple tests against a node.
1402
1403     Test list:
1404
1405       - compares ganeti version
1406       - checks vg existence and size > 20G
1407       - checks config file checksum
1408       - checks ssh to other nodes
1409
1410     @type ninfo: L{objects.Node}
1411     @param ninfo: the node to check
1412     @param nresult: the results from the node
1413     @rtype: boolean
1414     @return: whether overall this call was successful (and we can expect
1415          reasonable values in the respose)
1416
1417     """
1418     node = ninfo.name
1419     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1420
1421     # main result, nresult should be a non-empty dict
1422     test = not nresult or not isinstance(nresult, dict)
1423     _ErrorIf(test, self.ENODERPC, node,
1424                   "unable to verify node: no data returned")
1425     if test:
1426       return False
1427
1428     # compares ganeti version
1429     local_version = constants.PROTOCOL_VERSION
1430     remote_version = nresult.get("version", None)
1431     test = not (remote_version and
1432                 isinstance(remote_version, (list, tuple)) and
1433                 len(remote_version) == 2)
1434     _ErrorIf(test, self.ENODERPC, node,
1435              "connection to node returned invalid data")
1436     if test:
1437       return False
1438
1439     test = local_version != remote_version[0]
1440     _ErrorIf(test, self.ENODEVERSION, node,
1441              "incompatible protocol versions: master %s,"
1442              " node %s", local_version, remote_version[0])
1443     if test:
1444       return False
1445
1446     # node seems compatible, we can actually try to look into its results
1447
1448     # full package version
1449     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1450                   self.ENODEVERSION, node,
1451                   "software version mismatch: master %s, node %s",
1452                   constants.RELEASE_VERSION, remote_version[1],
1453                   code=self.ETYPE_WARNING)
1454
1455     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1456     if isinstance(hyp_result, dict):
1457       for hv_name, hv_result in hyp_result.iteritems():
1458         test = hv_result is not None
1459         _ErrorIf(test, self.ENODEHV, node,
1460                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1461
1462
1463     test = nresult.get(constants.NV_NODESETUP,
1464                            ["Missing NODESETUP results"])
1465     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1466              "; ".join(test))
1467
1468     return True
1469
1470   def _VerifyNodeTime(self, ninfo, nresult,
1471                       nvinfo_starttime, nvinfo_endtime):
1472     """Check the node time.
1473
1474     @type ninfo: L{objects.Node}
1475     @param ninfo: the node to check
1476     @param nresult: the remote results for the node
1477     @param nvinfo_starttime: the start time of the RPC call
1478     @param nvinfo_endtime: the end time of the RPC call
1479
1480     """
1481     node = ninfo.name
1482     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1483
1484     ntime = nresult.get(constants.NV_TIME, None)
1485     try:
1486       ntime_merged = utils.MergeTime(ntime)
1487     except (ValueError, TypeError):
1488       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1489       return
1490
1491     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1492       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1493     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1494       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1495     else:
1496       ntime_diff = None
1497
1498     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1499              "Node time diverges by at least %s from master node time",
1500              ntime_diff)
1501
1502   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1503     """Check the node time.
1504
1505     @type ninfo: L{objects.Node}
1506     @param ninfo: the node to check
1507     @param nresult: the remote results for the node
1508     @param vg_name: the configured VG name
1509
1510     """
1511     if vg_name is None:
1512       return
1513
1514     node = ninfo.name
1515     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1516
1517     # checks vg existence and size > 20G
1518     vglist = nresult.get(constants.NV_VGLIST, None)
1519     test = not vglist
1520     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1521     if not test:
1522       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1523                                             constants.MIN_VG_SIZE)
1524       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1525
1526     # check pv names
1527     pvlist = nresult.get(constants.NV_PVLIST, None)
1528     test = pvlist is None
1529     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1530     if not test:
1531       # check that ':' is not present in PV names, since it's a
1532       # special character for lvcreate (denotes the range of PEs to
1533       # use on the PV)
1534       for _, pvname, owner_vg in pvlist:
1535         test = ":" in pvname
1536         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1537                  " '%s' of VG '%s'", pvname, owner_vg)
1538
1539   def _VerifyNodeNetwork(self, ninfo, nresult):
1540     """Check the node time.
1541
1542     @type ninfo: L{objects.Node}
1543     @param ninfo: the node to check
1544     @param nresult: the remote results for the node
1545
1546     """
1547     node = ninfo.name
1548     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1549
1550     test = constants.NV_NODELIST not in nresult
1551     _ErrorIf(test, self.ENODESSH, node,
1552              "node hasn't returned node ssh connectivity data")
1553     if not test:
1554       if nresult[constants.NV_NODELIST]:
1555         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1556           _ErrorIf(True, self.ENODESSH, node,
1557                    "ssh communication with node '%s': %s", a_node, a_msg)
1558
1559     test = constants.NV_NODENETTEST not in nresult
1560     _ErrorIf(test, self.ENODENET, node,
1561              "node hasn't returned node tcp connectivity data")
1562     if not test:
1563       if nresult[constants.NV_NODENETTEST]:
1564         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1565         for anode in nlist:
1566           _ErrorIf(True, self.ENODENET, node,
1567                    "tcp communication with node '%s': %s",
1568                    anode, nresult[constants.NV_NODENETTEST][anode])
1569
1570     test = constants.NV_MASTERIP not in nresult
1571     _ErrorIf(test, self.ENODENET, node,
1572              "node hasn't returned node master IP reachability data")
1573     if not test:
1574       if not nresult[constants.NV_MASTERIP]:
1575         if node == self.master_node:
1576           msg = "the master node cannot reach the master IP (not configured?)"
1577         else:
1578           msg = "cannot reach the master IP"
1579         _ErrorIf(True, self.ENODENET, node, msg)
1580
1581
1582   def _VerifyInstance(self, instance, instanceconfig, node_image):
1583     """Verify an instance.
1584
1585     This function checks to see if the required block devices are
1586     available on the instance's node.
1587
1588     """
1589     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1590     node_current = instanceconfig.primary_node
1591
1592     node_vol_should = {}
1593     instanceconfig.MapLVsByNode(node_vol_should)
1594
1595     for node in node_vol_should:
1596       n_img = node_image[node]
1597       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1598         # ignore missing volumes on offline or broken nodes
1599         continue
1600       for volume in node_vol_should[node]:
1601         test = volume not in n_img.volumes
1602         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1603                  "volume %s missing on node %s", volume, node)
1604
1605     if instanceconfig.admin_up:
1606       pri_img = node_image[node_current]
1607       test = instance not in pri_img.instances and not pri_img.offline
1608       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1609                "instance not running on its primary node %s",
1610                node_current)
1611
1612     for node, n_img in node_image.items():
1613       if (not node == node_current):
1614         test = instance in n_img.instances
1615         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1616                  "instance should not run on node %s", node)
1617
1618   def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1619     """Verify if there are any unknown volumes in the cluster.
1620
1621     The .os, .swap and backup volumes are ignored. All other volumes are
1622     reported as unknown.
1623
1624     """
1625     for node, n_img in node_image.items():
1626       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1627         # skip non-healthy nodes
1628         continue
1629       for volume in n_img.volumes:
1630         test = (node not in node_vol_should or
1631                 volume not in node_vol_should[node])
1632         self._ErrorIf(test, self.ENODEORPHANLV, node,
1633                       "volume %s is unknown", volume)
1634
1635   def _VerifyOrphanInstances(self, instancelist, node_image):
1636     """Verify the list of running instances.
1637
1638     This checks what instances are running but unknown to the cluster.
1639
1640     """
1641     for node, n_img in node_image.items():
1642       for o_inst in n_img.instances:
1643         test = o_inst not in instancelist
1644         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1645                       "instance %s on node %s should not exist", o_inst, node)
1646
1647   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1648     """Verify N+1 Memory Resilience.
1649
1650     Check that if one single node dies we can still start all the
1651     instances it was primary for.
1652
1653     """
1654     for node, n_img in node_image.items():
1655       # This code checks that every node which is now listed as
1656       # secondary has enough memory to host all instances it is
1657       # supposed to should a single other node in the cluster fail.
1658       # FIXME: not ready for failover to an arbitrary node
1659       # FIXME: does not support file-backed instances
1660       # WARNING: we currently take into account down instances as well
1661       # as up ones, considering that even if they're down someone
1662       # might want to start them even in the event of a node failure.
1663       for prinode, instances in n_img.sbp.items():
1664         needed_mem = 0
1665         for instance in instances:
1666           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1667           if bep[constants.BE_AUTO_BALANCE]:
1668             needed_mem += bep[constants.BE_MEMORY]
1669         test = n_img.mfree < needed_mem
1670         self._ErrorIf(test, self.ENODEN1, node,
1671                       "not enough memory on to accommodate"
1672                       " failovers should peer node %s fail", prinode)
1673
1674   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1675                        master_files):
1676     """Verifies and computes the node required file checksums.
1677
1678     @type ninfo: L{objects.Node}
1679     @param ninfo: the node to check
1680     @param nresult: the remote results for the node
1681     @param file_list: required list of files
1682     @param local_cksum: dictionary of local files and their checksums
1683     @param master_files: list of files that only masters should have
1684
1685     """
1686     node = ninfo.name
1687     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1688
1689     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1690     test = not isinstance(remote_cksum, dict)
1691     _ErrorIf(test, self.ENODEFILECHECK, node,
1692              "node hasn't returned file checksum data")
1693     if test:
1694       return
1695
1696     for file_name in file_list:
1697       node_is_mc = ninfo.master_candidate
1698       must_have = (file_name not in master_files) or node_is_mc
1699       # missing
1700       test1 = file_name not in remote_cksum
1701       # invalid checksum
1702       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1703       # existing and good
1704       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1705       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1706                "file '%s' missing", file_name)
1707       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1708                "file '%s' has wrong checksum", file_name)
1709       # not candidate and this is not a must-have file
1710       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1711                "file '%s' should not exist on non master"
1712                " candidates (and the file is outdated)", file_name)
1713       # all good, except non-master/non-must have combination
1714       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1715                "file '%s' should not exist"
1716                " on non master candidates", file_name)
1717
1718   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1719     """Verifies and the node DRBD status.
1720
1721     @type ninfo: L{objects.Node}
1722     @param ninfo: the node to check
1723     @param nresult: the remote results for the node
1724     @param instanceinfo: the dict of instances
1725     @param drbd_map: the DRBD map as returned by
1726         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1727
1728     """
1729     node = ninfo.name
1730     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1731
1732     # compute the DRBD minors
1733     node_drbd = {}
1734     for minor, instance in drbd_map[node].items():
1735       test = instance not in instanceinfo
1736       _ErrorIf(test, self.ECLUSTERCFG, None,
1737                "ghost instance '%s' in temporary DRBD map", instance)
1738         # ghost instance should not be running, but otherwise we
1739         # don't give double warnings (both ghost instance and
1740         # unallocated minor in use)
1741       if test:
1742         node_drbd[minor] = (instance, False)
1743       else:
1744         instance = instanceinfo[instance]
1745         node_drbd[minor] = (instance.name, instance.admin_up)
1746
1747     # and now check them
1748     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1749     test = not isinstance(used_minors, (tuple, list))
1750     _ErrorIf(test, self.ENODEDRBD, node,
1751              "cannot parse drbd status file: %s", str(used_minors))
1752     if test:
1753       # we cannot check drbd status
1754       return
1755
1756     for minor, (iname, must_exist) in node_drbd.items():
1757       test = minor not in used_minors and must_exist
1758       _ErrorIf(test, self.ENODEDRBD, node,
1759                "drbd minor %d of instance %s is not active", minor, iname)
1760     for minor in used_minors:
1761       test = minor not in node_drbd
1762       _ErrorIf(test, self.ENODEDRBD, node,
1763                "unallocated drbd minor %d is in use", minor)
1764
1765   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1766     """Builds the node OS structures.
1767
1768     @type ninfo: L{objects.Node}
1769     @param ninfo: the node to check
1770     @param nresult: the remote results for the node
1771     @param nimg: the node image object
1772
1773     """
1774     node = ninfo.name
1775     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1776
1777     remote_os = nresult.get(constants.NV_OSLIST, None)
1778     test = (not isinstance(remote_os, list) or
1779             not compat.all(isinstance(v, list) and len(v) == 7
1780                            for v in remote_os))
1781
1782     _ErrorIf(test, self.ENODEOS, node,
1783              "node hasn't returned valid OS data")
1784
1785     nimg.os_fail = test
1786
1787     if test:
1788       return
1789
1790     os_dict = {}
1791
1792     for (name, os_path, status, diagnose,
1793          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1794
1795       if name not in os_dict:
1796         os_dict[name] = []
1797
1798       # parameters is a list of lists instead of list of tuples due to
1799       # JSON lacking a real tuple type, fix it:
1800       parameters = [tuple(v) for v in parameters]
1801       os_dict[name].append((os_path, status, diagnose,
1802                             set(variants), set(parameters), set(api_ver)))
1803
1804     nimg.oslist = os_dict
1805
1806   def _VerifyNodeOS(self, ninfo, nimg, base):
1807     """Verifies the node OS list.
1808
1809     @type ninfo: L{objects.Node}
1810     @param ninfo: the node to check
1811     @param nimg: the node image object
1812     @param base: the 'template' node we match against (e.g. from the master)
1813
1814     """
1815     node = ninfo.name
1816     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1817
1818     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1819
1820     for os_name, os_data in nimg.oslist.items():
1821       assert os_data, "Empty OS status for OS %s?!" % os_name
1822       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1823       _ErrorIf(not f_status, self.ENODEOS, node,
1824                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1825       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1826                "OS '%s' has multiple entries (first one shadows the rest): %s",
1827                os_name, utils.CommaJoin([v[0] for v in os_data]))
1828       # this will catched in backend too
1829       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1830                and not f_var, self.ENODEOS, node,
1831                "OS %s with API at least %d does not declare any variant",
1832                os_name, constants.OS_API_V15)
1833       # comparisons with the 'base' image
1834       test = os_name not in base.oslist
1835       _ErrorIf(test, self.ENODEOS, node,
1836                "Extra OS %s not present on reference node (%s)",
1837                os_name, base.name)
1838       if test:
1839         continue
1840       assert base.oslist[os_name], "Base node has empty OS status?"
1841       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1842       if not b_status:
1843         # base OS is invalid, skipping
1844         continue
1845       for kind, a, b in [("API version", f_api, b_api),
1846                          ("variants list", f_var, b_var),
1847                          ("parameters", f_param, b_param)]:
1848         _ErrorIf(a != b, self.ENODEOS, node,
1849                  "OS %s %s differs from reference node %s: %s vs. %s",
1850                  kind, os_name, base.name,
1851                  utils.CommaJoin(a), utils.CommaJoin(b))
1852
1853     # check any missing OSes
1854     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1855     _ErrorIf(missing, self.ENODEOS, node,
1856              "OSes present on reference node %s but missing on this node: %s",
1857              base.name, utils.CommaJoin(missing))
1858
1859   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1860     """Verifies and updates the node volume data.
1861
1862     This function will update a L{NodeImage}'s internal structures
1863     with data from the remote call.
1864
1865     @type ninfo: L{objects.Node}
1866     @param ninfo: the node to check
1867     @param nresult: the remote results for the node
1868     @param nimg: the node image object
1869     @param vg_name: the configured VG name
1870
1871     """
1872     node = ninfo.name
1873     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1874
1875     nimg.lvm_fail = True
1876     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1877     if vg_name is None:
1878       pass
1879     elif isinstance(lvdata, basestring):
1880       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1881                utils.SafeEncode(lvdata))
1882     elif not isinstance(lvdata, dict):
1883       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1884     else:
1885       nimg.volumes = lvdata
1886       nimg.lvm_fail = False
1887
1888   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1889     """Verifies and updates the node instance list.
1890
1891     If the listing was successful, then updates this node's instance
1892     list. Otherwise, it marks the RPC call as failed for the instance
1893     list key.
1894
1895     @type ninfo: L{objects.Node}
1896     @param ninfo: the node to check
1897     @param nresult: the remote results for the node
1898     @param nimg: the node image object
1899
1900     """
1901     idata = nresult.get(constants.NV_INSTANCELIST, None)
1902     test = not isinstance(idata, list)
1903     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1904                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1905     if test:
1906       nimg.hyp_fail = True
1907     else:
1908       nimg.instances = idata
1909
1910   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1911     """Verifies and computes a node information map
1912
1913     @type ninfo: L{objects.Node}
1914     @param ninfo: the node to check
1915     @param nresult: the remote results for the node
1916     @param nimg: the node image object
1917     @param vg_name: the configured VG name
1918
1919     """
1920     node = ninfo.name
1921     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1922
1923     # try to read free memory (from the hypervisor)
1924     hv_info = nresult.get(constants.NV_HVINFO, None)
1925     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1926     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1927     if not test:
1928       try:
1929         nimg.mfree = int(hv_info["memory_free"])
1930       except (ValueError, TypeError):
1931         _ErrorIf(True, self.ENODERPC, node,
1932                  "node returned invalid nodeinfo, check hypervisor")
1933
1934     # FIXME: devise a free space model for file based instances as well
1935     if vg_name is not None:
1936       test = (constants.NV_VGLIST not in nresult or
1937               vg_name not in nresult[constants.NV_VGLIST])
1938       _ErrorIf(test, self.ENODELVM, node,
1939                "node didn't return data for the volume group '%s'"
1940                " - it is either missing or broken", vg_name)
1941       if not test:
1942         try:
1943           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1944         except (ValueError, TypeError):
1945           _ErrorIf(True, self.ENODERPC, node,
1946                    "node returned invalid LVM info, check LVM status")
1947
1948   def BuildHooksEnv(self):
1949     """Build hooks env.
1950
1951     Cluster-Verify hooks just ran in the post phase and their failure makes
1952     the output be logged in the verify output and the verification to fail.
1953
1954     """
1955     all_nodes = self.cfg.GetNodeList()
1956     env = {
1957       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1958       }
1959     for node in self.cfg.GetAllNodesInfo().values():
1960       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1961
1962     return env, [], all_nodes
1963
1964   def Exec(self, feedback_fn):
1965     """Verify integrity of cluster, performing various test on nodes.
1966
1967     """
1968     self.bad = False
1969     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1970     verbose = self.op.verbose
1971     self._feedback_fn = feedback_fn
1972     feedback_fn("* Verifying global settings")
1973     for msg in self.cfg.VerifyConfig():
1974       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1975
1976     # Check the cluster certificates
1977     for cert_filename in constants.ALL_CERT_FILES:
1978       (errcode, msg) = _VerifyCertificate(cert_filename)
1979       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1980
1981     vg_name = self.cfg.GetVGName()
1982     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1983     cluster = self.cfg.GetClusterInfo()
1984     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1985     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1986     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1987     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1988                         for iname in instancelist)
1989     i_non_redundant = [] # Non redundant instances
1990     i_non_a_balanced = [] # Non auto-balanced instances
1991     n_offline = 0 # Count of offline nodes
1992     n_drained = 0 # Count of nodes being drained
1993     node_vol_should = {}
1994
1995     # FIXME: verify OS list
1996     # do local checksums
1997     master_files = [constants.CLUSTER_CONF_FILE]
1998     master_node = self.master_node = self.cfg.GetMasterNode()
1999     master_ip = self.cfg.GetMasterIP()
2000
2001     file_names = ssconf.SimpleStore().GetFileList()
2002     file_names.extend(constants.ALL_CERT_FILES)
2003     file_names.extend(master_files)
2004     if cluster.modify_etc_hosts:
2005       file_names.append(constants.ETC_HOSTS)
2006
2007     local_checksums = utils.FingerprintFiles(file_names)
2008
2009     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2010     node_verify_param = {
2011       constants.NV_FILELIST: file_names,
2012       constants.NV_NODELIST: [node.name for node in nodeinfo
2013                               if not node.offline],
2014       constants.NV_HYPERVISOR: hypervisors,
2015       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2016                                   node.secondary_ip) for node in nodeinfo
2017                                  if not node.offline],
2018       constants.NV_INSTANCELIST: hypervisors,
2019       constants.NV_VERSION: None,
2020       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2021       constants.NV_NODESETUP: None,
2022       constants.NV_TIME: None,
2023       constants.NV_MASTERIP: (master_node, master_ip),
2024       constants.NV_OSLIST: None,
2025       }
2026
2027     if vg_name is not None:
2028       node_verify_param[constants.NV_VGLIST] = None
2029       node_verify_param[constants.NV_LVLIST] = vg_name
2030       node_verify_param[constants.NV_PVLIST] = [vg_name]
2031       node_verify_param[constants.NV_DRBDLIST] = None
2032
2033     # Build our expected cluster state
2034     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2035                                                  name=node.name))
2036                       for node in nodeinfo)
2037
2038     for instance in instancelist:
2039       inst_config = instanceinfo[instance]
2040
2041       for nname in inst_config.all_nodes:
2042         if nname not in node_image:
2043           # ghost node
2044           gnode = self.NodeImage(name=nname)
2045           gnode.ghost = True
2046           node_image[nname] = gnode
2047
2048       inst_config.MapLVsByNode(node_vol_should)
2049
2050       pnode = inst_config.primary_node
2051       node_image[pnode].pinst.append(instance)
2052
2053       for snode in inst_config.secondary_nodes:
2054         nimg = node_image[snode]
2055         nimg.sinst.append(instance)
2056         if pnode not in nimg.sbp:
2057           nimg.sbp[pnode] = []
2058         nimg.sbp[pnode].append(instance)
2059
2060     # At this point, we have the in-memory data structures complete,
2061     # except for the runtime information, which we'll gather next
2062
2063     # Due to the way our RPC system works, exact response times cannot be
2064     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2065     # time before and after executing the request, we can at least have a time
2066     # window.
2067     nvinfo_starttime = time.time()
2068     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2069                                            self.cfg.GetClusterName())
2070     nvinfo_endtime = time.time()
2071
2072     all_drbd_map = self.cfg.ComputeDRBDMap()
2073
2074     feedback_fn("* Verifying node status")
2075
2076     refos_img = None
2077
2078     for node_i in nodeinfo:
2079       node = node_i.name
2080       nimg = node_image[node]
2081
2082       if node_i.offline:
2083         if verbose:
2084           feedback_fn("* Skipping offline node %s" % (node,))
2085         n_offline += 1
2086         continue
2087
2088       if node == master_node:
2089         ntype = "master"
2090       elif node_i.master_candidate:
2091         ntype = "master candidate"
2092       elif node_i.drained:
2093         ntype = "drained"
2094         n_drained += 1
2095       else:
2096         ntype = "regular"
2097       if verbose:
2098         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2099
2100       msg = all_nvinfo[node].fail_msg
2101       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2102       if msg:
2103         nimg.rpc_fail = True
2104         continue
2105
2106       nresult = all_nvinfo[node].payload
2107
2108       nimg.call_ok = self._VerifyNode(node_i, nresult)
2109       self._VerifyNodeNetwork(node_i, nresult)
2110       self._VerifyNodeLVM(node_i, nresult, vg_name)
2111       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2112                             master_files)
2113       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
2114       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2115
2116       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2117       self._UpdateNodeInstances(node_i, nresult, nimg)
2118       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2119       self._UpdateNodeOS(node_i, nresult, nimg)
2120       if not nimg.os_fail:
2121         if refos_img is None:
2122           refos_img = nimg
2123         self._VerifyNodeOS(node_i, nimg, refos_img)
2124
2125     feedback_fn("* Verifying instance status")
2126     for instance in instancelist:
2127       if verbose:
2128         feedback_fn("* Verifying instance %s" % instance)
2129       inst_config = instanceinfo[instance]
2130       self._VerifyInstance(instance, inst_config, node_image)
2131       inst_nodes_offline = []
2132
2133       pnode = inst_config.primary_node
2134       pnode_img = node_image[pnode]
2135       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2136                self.ENODERPC, pnode, "instance %s, connection to"
2137                " primary node failed", instance)
2138
2139       if pnode_img.offline:
2140         inst_nodes_offline.append(pnode)
2141
2142       # If the instance is non-redundant we cannot survive losing its primary
2143       # node, so we are not N+1 compliant. On the other hand we have no disk
2144       # templates with more than one secondary so that situation is not well
2145       # supported either.
2146       # FIXME: does not support file-backed instances
2147       if not inst_config.secondary_nodes:
2148         i_non_redundant.append(instance)
2149       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2150                instance, "instance has multiple secondary nodes: %s",
2151                utils.CommaJoin(inst_config.secondary_nodes),
2152                code=self.ETYPE_WARNING)
2153
2154       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2155         i_non_a_balanced.append(instance)
2156
2157       for snode in inst_config.secondary_nodes:
2158         s_img = node_image[snode]
2159         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2160                  "instance %s, connection to secondary node failed", instance)
2161
2162         if s_img.offline:
2163           inst_nodes_offline.append(snode)
2164
2165       # warn that the instance lives on offline nodes
2166       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2167                "instance lives on offline node(s) %s",
2168                utils.CommaJoin(inst_nodes_offline))
2169       # ... or ghost nodes
2170       for node in inst_config.all_nodes:
2171         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2172                  "instance lives on ghost node %s", node)
2173
2174     feedback_fn("* Verifying orphan volumes")
2175     self._VerifyOrphanVolumes(node_vol_should, node_image)
2176
2177     feedback_fn("* Verifying orphan instances")
2178     self._VerifyOrphanInstances(instancelist, node_image)
2179
2180     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2181       feedback_fn("* Verifying N+1 Memory redundancy")
2182       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2183
2184     feedback_fn("* Other Notes")
2185     if i_non_redundant:
2186       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2187                   % len(i_non_redundant))
2188
2189     if i_non_a_balanced:
2190       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2191                   % len(i_non_a_balanced))
2192
2193     if n_offline:
2194       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2195
2196     if n_drained:
2197       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2198
2199     return not self.bad
2200
2201   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2202     """Analyze the post-hooks' result
2203
2204     This method analyses the hook result, handles it, and sends some
2205     nicely-formatted feedback back to the user.
2206
2207     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2208         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2209     @param hooks_results: the results of the multi-node hooks rpc call
2210     @param feedback_fn: function used send feedback back to the caller
2211     @param lu_result: previous Exec result
2212     @return: the new Exec result, based on the previous result
2213         and hook results
2214
2215     """
2216     # We only really run POST phase hooks, and are only interested in
2217     # their results
2218     if phase == constants.HOOKS_PHASE_POST:
2219       # Used to change hooks' output to proper indentation
2220       indent_re = re.compile('^', re.M)
2221       feedback_fn("* Hooks Results")
2222       assert hooks_results, "invalid result from hooks"
2223
2224       for node_name in hooks_results:
2225         res = hooks_results[node_name]
2226         msg = res.fail_msg
2227         test = msg and not res.offline
2228         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2229                       "Communication failure in hooks execution: %s", msg)
2230         if res.offline or msg:
2231           # No need to investigate payload if node is offline or gave an error.
2232           # override manually lu_result here as _ErrorIf only
2233           # overrides self.bad
2234           lu_result = 1
2235           continue
2236         for script, hkr, output in res.payload:
2237           test = hkr == constants.HKR_FAIL
2238           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2239                         "Script %s failed, output:", script)
2240           if test:
2241             output = indent_re.sub('      ', output)
2242             feedback_fn("%s" % output)
2243             lu_result = 0
2244
2245       return lu_result
2246
2247
2248 class LUVerifyDisks(NoHooksLU):
2249   """Verifies the cluster disks status.
2250
2251   """
2252   REQ_BGL = False
2253
2254   def ExpandNames(self):
2255     self.needed_locks = {
2256       locking.LEVEL_NODE: locking.ALL_SET,
2257       locking.LEVEL_INSTANCE: locking.ALL_SET,
2258     }
2259     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2260
2261   def Exec(self, feedback_fn):
2262     """Verify integrity of cluster disks.
2263
2264     @rtype: tuple of three items
2265     @return: a tuple of (dict of node-to-node_error, list of instances
2266         which need activate-disks, dict of instance: (node, volume) for
2267         missing volumes
2268
2269     """
2270     result = res_nodes, res_instances, res_missing = {}, [], {}
2271
2272     vg_name = self.cfg.GetVGName()
2273     nodes = utils.NiceSort(self.cfg.GetNodeList())
2274     instances = [self.cfg.GetInstanceInfo(name)
2275                  for name in self.cfg.GetInstanceList()]
2276
2277     nv_dict = {}
2278     for inst in instances:
2279       inst_lvs = {}
2280       if (not inst.admin_up or
2281           inst.disk_template not in constants.DTS_NET_MIRROR):
2282         continue
2283       inst.MapLVsByNode(inst_lvs)
2284       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2285       for node, vol_list in inst_lvs.iteritems():
2286         for vol in vol_list:
2287           nv_dict[(node, vol)] = inst
2288
2289     if not nv_dict:
2290       return result
2291
2292     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2293
2294     for node in nodes:
2295       # node_volume
2296       node_res = node_lvs[node]
2297       if node_res.offline:
2298         continue
2299       msg = node_res.fail_msg
2300       if msg:
2301         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2302         res_nodes[node] = msg
2303         continue
2304
2305       lvs = node_res.payload
2306       for lv_name, (_, _, lv_online) in lvs.items():
2307         inst = nv_dict.pop((node, lv_name), None)
2308         if (not lv_online and inst is not None
2309             and inst.name not in res_instances):
2310           res_instances.append(inst.name)
2311
2312     # any leftover items in nv_dict are missing LVs, let's arrange the
2313     # data better
2314     for key, inst in nv_dict.iteritems():
2315       if inst.name not in res_missing:
2316         res_missing[inst.name] = []
2317       res_missing[inst.name].append(key)
2318
2319     return result
2320
2321
2322 class LURepairDiskSizes(NoHooksLU):
2323   """Verifies the cluster disks sizes.
2324
2325   """
2326   _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2327   REQ_BGL = False
2328
2329   def ExpandNames(self):
2330     if self.op.instances:
2331       self.wanted_names = []
2332       for name in self.op.instances:
2333         full_name = _ExpandInstanceName(self.cfg, name)
2334         self.wanted_names.append(full_name)
2335       self.needed_locks = {
2336         locking.LEVEL_NODE: [],
2337         locking.LEVEL_INSTANCE: self.wanted_names,
2338         }
2339       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2340     else:
2341       self.wanted_names = None
2342       self.needed_locks = {
2343         locking.LEVEL_NODE: locking.ALL_SET,
2344         locking.LEVEL_INSTANCE: locking.ALL_SET,
2345         }
2346     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2347
2348   def DeclareLocks(self, level):
2349     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2350       self._LockInstancesNodes(primary_only=True)
2351
2352   def CheckPrereq(self):
2353     """Check prerequisites.
2354
2355     This only checks the optional instance list against the existing names.
2356
2357     """
2358     if self.wanted_names is None:
2359       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2360
2361     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2362                              in self.wanted_names]
2363
2364   def _EnsureChildSizes(self, disk):
2365     """Ensure children of the disk have the needed disk size.
2366
2367     This is valid mainly for DRBD8 and fixes an issue where the
2368     children have smaller disk size.
2369
2370     @param disk: an L{ganeti.objects.Disk} object
2371
2372     """
2373     if disk.dev_type == constants.LD_DRBD8:
2374       assert disk.children, "Empty children for DRBD8?"
2375       fchild = disk.children[0]
2376       mismatch = fchild.size < disk.size
2377       if mismatch:
2378         self.LogInfo("Child disk has size %d, parent %d, fixing",
2379                      fchild.size, disk.size)
2380         fchild.size = disk.size
2381
2382       # and we recurse on this child only, not on the metadev
2383       return self._EnsureChildSizes(fchild) or mismatch
2384     else:
2385       return False
2386
2387   def Exec(self, feedback_fn):
2388     """Verify the size of cluster disks.
2389
2390     """
2391     # TODO: check child disks too
2392     # TODO: check differences in size between primary/secondary nodes
2393     per_node_disks = {}
2394     for instance in self.wanted_instances:
2395       pnode = instance.primary_node
2396       if pnode not in per_node_disks:
2397         per_node_disks[pnode] = []
2398       for idx, disk in enumerate(instance.disks):
2399         per_node_disks[pnode].append((instance, idx, disk))
2400
2401     changed = []
2402     for node, dskl in per_node_disks.items():
2403       newl = [v[2].Copy() for v in dskl]
2404       for dsk in newl:
2405         self.cfg.SetDiskID(dsk, node)
2406       result = self.rpc.call_blockdev_getsizes(node, newl)
2407       if result.fail_msg:
2408         self.LogWarning("Failure in blockdev_getsizes call to node"
2409                         " %s, ignoring", node)
2410         continue
2411       if len(result.data) != len(dskl):
2412         self.LogWarning("Invalid result from node %s, ignoring node results",
2413                         node)
2414         continue
2415       for ((instance, idx, disk), size) in zip(dskl, result.data):
2416         if size is None:
2417           self.LogWarning("Disk %d of instance %s did not return size"
2418                           " information, ignoring", idx, instance.name)
2419           continue
2420         if not isinstance(size, (int, long)):
2421           self.LogWarning("Disk %d of instance %s did not return valid"
2422                           " size information, ignoring", idx, instance.name)
2423           continue
2424         size = size >> 20
2425         if size != disk.size:
2426           self.LogInfo("Disk %d of instance %s has mismatched size,"
2427                        " correcting: recorded %d, actual %d", idx,
2428                        instance.name, disk.size, size)
2429           disk.size = size
2430           self.cfg.Update(instance, feedback_fn)
2431           changed.append((instance.name, idx, size))
2432         if self._EnsureChildSizes(disk):
2433           self.cfg.Update(instance, feedback_fn)
2434           changed.append((instance.name, idx, disk.size))
2435     return changed
2436
2437
2438 class LURenameCluster(LogicalUnit):
2439   """Rename the cluster.
2440
2441   """
2442   HPATH = "cluster-rename"
2443   HTYPE = constants.HTYPE_CLUSTER
2444   _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2445
2446   def BuildHooksEnv(self):
2447     """Build hooks env.
2448
2449     """
2450     env = {
2451       "OP_TARGET": self.cfg.GetClusterName(),
2452       "NEW_NAME": self.op.name,
2453       }
2454     mn = self.cfg.GetMasterNode()
2455     all_nodes = self.cfg.GetNodeList()
2456     return env, [mn], all_nodes
2457
2458   def CheckPrereq(self):
2459     """Verify that the passed name is a valid one.
2460
2461     """
2462     hostname = utils.GetHostInfo(self.op.name)
2463
2464     new_name = hostname.name
2465     self.ip = new_ip = hostname.ip
2466     old_name = self.cfg.GetClusterName()
2467     old_ip = self.cfg.GetMasterIP()
2468     if new_name == old_name and new_ip == old_ip:
2469       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2470                                  " cluster has changed",
2471                                  errors.ECODE_INVAL)
2472     if new_ip != old_ip:
2473       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2474         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2475                                    " reachable on the network. Aborting." %
2476                                    new_ip, errors.ECODE_NOTUNIQUE)
2477
2478     self.op.name = new_name
2479
2480   def Exec(self, feedback_fn):
2481     """Rename the cluster.
2482
2483     """
2484     clustername = self.op.name
2485     ip = self.ip
2486
2487     # shutdown the master IP
2488     master = self.cfg.GetMasterNode()
2489     result = self.rpc.call_node_stop_master(master, False)
2490     result.Raise("Could not disable the master role")
2491
2492     try:
2493       cluster = self.cfg.GetClusterInfo()
2494       cluster.cluster_name = clustername
2495       cluster.master_ip = ip
2496       self.cfg.Update(cluster, feedback_fn)
2497
2498       # update the known hosts file
2499       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2500       node_list = self.cfg.GetNodeList()
2501       try:
2502         node_list.remove(master)
2503       except ValueError:
2504         pass
2505       result = self.rpc.call_upload_file(node_list,
2506                                          constants.SSH_KNOWN_HOSTS_FILE)
2507       for to_node, to_result in result.iteritems():
2508         msg = to_result.fail_msg
2509         if msg:
2510           msg = ("Copy of file %s to node %s failed: %s" %
2511                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2512           self.proc.LogWarning(msg)
2513
2514     finally:
2515       result = self.rpc.call_node_start_master(master, False, False)
2516       msg = result.fail_msg
2517       if msg:
2518         self.LogWarning("Could not re-enable the master role on"
2519                         " the master, please restart manually: %s", msg)
2520
2521
2522 def _RecursiveCheckIfLVMBased(disk):
2523   """Check if the given disk or its children are lvm-based.
2524
2525   @type disk: L{objects.Disk}
2526   @param disk: the disk to check
2527   @rtype: boolean
2528   @return: boolean indicating whether a LD_LV dev_type was found or not
2529
2530   """
2531   if disk.children:
2532     for chdisk in disk.children:
2533       if _RecursiveCheckIfLVMBased(chdisk):
2534         return True
2535   return disk.dev_type == constants.LD_LV
2536
2537
2538 class LUSetClusterParams(LogicalUnit):
2539   """Change the parameters of the cluster.
2540
2541   """
2542   HPATH = "cluster-modify"
2543   HTYPE = constants.HTYPE_CLUSTER
2544   _OP_PARAMS = [
2545     ("vg_name", None, _TMaybeString),
2546     ("enabled_hypervisors", None,
2547      _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2548     ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2549     ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2550     ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2551     ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2552     ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2553     ("uid_pool", None, _NoType),
2554     ("add_uids", None, _NoType),
2555     ("remove_uids", None, _NoType),
2556     ("maintain_node_health", None, _TMaybeBool),
2557     ("nicparams", None, _TOr(_TDict, _TNone)),
2558     ]
2559   REQ_BGL = False
2560
2561   def CheckArguments(self):
2562     """Check parameters
2563
2564     """
2565     if self.op.uid_pool:
2566       uidpool.CheckUidPool(self.op.uid_pool)
2567
2568     if self.op.add_uids:
2569       uidpool.CheckUidPool(self.op.add_uids)
2570
2571     if self.op.remove_uids:
2572       uidpool.CheckUidPool(self.op.remove_uids)
2573
2574   def ExpandNames(self):
2575     # FIXME: in the future maybe other cluster params won't require checking on
2576     # all nodes to be modified.
2577     self.needed_locks = {
2578       locking.LEVEL_NODE: locking.ALL_SET,
2579     }
2580     self.share_locks[locking.LEVEL_NODE] = 1
2581
2582   def BuildHooksEnv(self):
2583     """Build hooks env.
2584
2585     """
2586     env = {
2587       "OP_TARGET": self.cfg.GetClusterName(),
2588       "NEW_VG_NAME": self.op.vg_name,
2589       }
2590     mn = self.cfg.GetMasterNode()
2591     return env, [mn], [mn]
2592
2593   def CheckPrereq(self):
2594     """Check prerequisites.
2595
2596     This checks whether the given params don't conflict and
2597     if the given volume group is valid.
2598
2599     """
2600     if self.op.vg_name is not None and not self.op.vg_name:
2601       instances = self.cfg.GetAllInstancesInfo().values()
2602       for inst in instances:
2603         for disk in inst.disks:
2604           if _RecursiveCheckIfLVMBased(disk):
2605             raise errors.OpPrereqError("Cannot disable lvm storage while"
2606                                        " lvm-based instances exist",
2607                                        errors.ECODE_INVAL)
2608
2609     node_list = self.acquired_locks[locking.LEVEL_NODE]
2610
2611     # if vg_name not None, checks given volume group on all nodes
2612     if self.op.vg_name:
2613       vglist = self.rpc.call_vg_list(node_list)
2614       for node in node_list:
2615         msg = vglist[node].fail_msg
2616         if msg:
2617           # ignoring down node
2618           self.LogWarning("Error while gathering data on node %s"
2619                           " (ignoring node): %s", node, msg)
2620           continue
2621         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2622                                               self.op.vg_name,
2623                                               constants.MIN_VG_SIZE)
2624         if vgstatus:
2625           raise errors.OpPrereqError("Error on node '%s': %s" %
2626                                      (node, vgstatus), errors.ECODE_ENVIRON)
2627
2628     self.cluster = cluster = self.cfg.GetClusterInfo()
2629     # validate params changes
2630     if self.op.beparams:
2631       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2632       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2633
2634     if self.op.nicparams:
2635       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2636       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2637       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2638       nic_errors = []
2639
2640       # check all instances for consistency
2641       for instance in self.cfg.GetAllInstancesInfo().values():
2642         for nic_idx, nic in enumerate(instance.nics):
2643           params_copy = copy.deepcopy(nic.nicparams)
2644           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2645
2646           # check parameter syntax
2647           try:
2648             objects.NIC.CheckParameterSyntax(params_filled)
2649           except errors.ConfigurationError, err:
2650             nic_errors.append("Instance %s, nic/%d: %s" %
2651                               (instance.name, nic_idx, err))
2652
2653           # if we're moving instances to routed, check that they have an ip
2654           target_mode = params_filled[constants.NIC_MODE]
2655           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2656             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2657                               (instance.name, nic_idx))
2658       if nic_errors:
2659         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2660                                    "\n".join(nic_errors))
2661
2662     # hypervisor list/parameters
2663     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2664     if self.op.hvparams:
2665       for hv_name, hv_dict in self.op.hvparams.items():
2666         if hv_name not in self.new_hvparams:
2667           self.new_hvparams[hv_name] = hv_dict
2668         else:
2669           self.new_hvparams[hv_name].update(hv_dict)
2670
2671     # os hypervisor parameters
2672     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2673     if self.op.os_hvp:
2674       for os_name, hvs in self.op.os_hvp.items():
2675         if os_name not in self.new_os_hvp:
2676           self.new_os_hvp[os_name] = hvs
2677         else:
2678           for hv_name, hv_dict in hvs.items():
2679             if hv_name not in self.new_os_hvp[os_name]:
2680               self.new_os_hvp[os_name][hv_name] = hv_dict
2681             else:
2682               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2683
2684     # os parameters
2685     self.new_osp = objects.FillDict(cluster.osparams, {})
2686     if self.op.osparams:
2687       for os_name, osp in self.op.osparams.items():
2688         if os_name not in self.new_osp:
2689           self.new_osp[os_name] = {}
2690
2691         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2692                                                   use_none=True)
2693
2694         if not self.new_osp[os_name]:
2695           # we removed all parameters
2696           del self.new_osp[os_name]
2697         else:
2698           # check the parameter validity (remote check)
2699           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2700                          os_name, self.new_osp[os_name])
2701
2702     # changes to the hypervisor list
2703     if self.op.enabled_hypervisors is not None:
2704       self.hv_list = self.op.enabled_hypervisors
2705       for hv in self.hv_list:
2706         # if the hypervisor doesn't already exist in the cluster
2707         # hvparams, we initialize it to empty, and then (in both
2708         # cases) we make sure to fill the defaults, as we might not
2709         # have a complete defaults list if the hypervisor wasn't
2710         # enabled before
2711         if hv not in new_hvp:
2712           new_hvp[hv] = {}
2713         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2714         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2715     else:
2716       self.hv_list = cluster.enabled_hypervisors
2717
2718     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2719       # either the enabled list has changed, or the parameters have, validate
2720       for hv_name, hv_params in self.new_hvparams.items():
2721         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2722             (self.op.enabled_hypervisors and
2723              hv_name in self.op.enabled_hypervisors)):
2724           # either this is a new hypervisor, or its parameters have changed
2725           hv_class = hypervisor.GetHypervisor(hv_name)
2726           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2727           hv_class.CheckParameterSyntax(hv_params)
2728           _CheckHVParams(self, node_list, hv_name, hv_params)
2729
2730     if self.op.os_hvp:
2731       # no need to check any newly-enabled hypervisors, since the
2732       # defaults have already been checked in the above code-block
2733       for os_name, os_hvp in self.new_os_hvp.items():
2734         for hv_name, hv_params in os_hvp.items():
2735           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2736           # we need to fill in the new os_hvp on top of the actual hv_p
2737           cluster_defaults = self.new_hvparams.get(hv_name, {})
2738           new_osp = objects.FillDict(cluster_defaults, hv_params)
2739           hv_class = hypervisor.GetHypervisor(hv_name)
2740           hv_class.CheckParameterSyntax(new_osp)
2741           _CheckHVParams(self, node_list, hv_name, new_osp)
2742
2743
2744   def Exec(self, feedback_fn):
2745     """Change the parameters of the cluster.
2746
2747     """
2748     if self.op.vg_name is not None:
2749       new_volume = self.op.vg_name
2750       if not new_volume:
2751         new_volume = None
2752       if new_volume != self.cfg.GetVGName():
2753         self.cfg.SetVGName(new_volume)
2754       else:
2755         feedback_fn("Cluster LVM configuration already in desired"
2756                     " state, not changing")
2757     if self.op.hvparams:
2758       self.cluster.hvparams = self.new_hvparams
2759     if self.op.os_hvp:
2760       self.cluster.os_hvp = self.new_os_hvp
2761     if self.op.enabled_hypervisors is not None:
2762       self.cluster.hvparams = self.new_hvparams
2763       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2764     if self.op.beparams:
2765       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2766     if self.op.nicparams:
2767       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2768     if self.op.osparams:
2769       self.cluster.osparams = self.new_osp
2770
2771     if self.op.candidate_pool_size is not None:
2772       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2773       # we need to update the pool size here, otherwise the save will fail
2774       _AdjustCandidatePool(self, [])
2775
2776     if self.op.maintain_node_health is not None:
2777       self.cluster.maintain_node_health = self.op.maintain_node_health
2778
2779     if self.op.add_uids is not None:
2780       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2781
2782     if self.op.remove_uids is not None:
2783       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2784
2785     if self.op.uid_pool is not None:
2786       self.cluster.uid_pool = self.op.uid_pool
2787
2788     self.cfg.Update(self.cluster, feedback_fn)
2789
2790
2791 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2792   """Distribute additional files which are part of the cluster configuration.
2793
2794   ConfigWriter takes care of distributing the config and ssconf files, but
2795   there are more files which should be distributed to all nodes. This function
2796   makes sure those are copied.
2797
2798   @param lu: calling logical unit
2799   @param additional_nodes: list of nodes not in the config to distribute to
2800
2801   """
2802   # 1. Gather target nodes
2803   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2804   dist_nodes = lu.cfg.GetOnlineNodeList()
2805   if additional_nodes is not None:
2806     dist_nodes.extend(additional_nodes)
2807   if myself.name in dist_nodes:
2808     dist_nodes.remove(myself.name)
2809
2810   # 2. Gather files to distribute
2811   dist_files = set([constants.ETC_HOSTS,
2812                     constants.SSH_KNOWN_HOSTS_FILE,
2813                     constants.RAPI_CERT_FILE,
2814                     constants.RAPI_USERS_FILE,
2815                     constants.CONFD_HMAC_KEY,
2816                     constants.CLUSTER_DOMAIN_SECRET_FILE,
2817                    ])
2818
2819   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2820   for hv_name in enabled_hypervisors:
2821     hv_class = hypervisor.GetHypervisor(hv_name)
2822     dist_files.update(hv_class.GetAncillaryFiles())
2823
2824   # 3. Perform the files upload
2825   for fname in dist_files:
2826     if os.path.exists(fname):
2827       result = lu.rpc.call_upload_file(dist_nodes, fname)
2828       for to_node, to_result in result.items():
2829         msg = to_result.fail_msg
2830         if msg:
2831           msg = ("Copy of file %s to node %s failed: %s" %
2832                  (fname, to_node, msg))
2833           lu.proc.LogWarning(msg)
2834
2835
2836 class LURedistributeConfig(NoHooksLU):
2837   """Force the redistribution of cluster configuration.
2838
2839   This is a very simple LU.
2840
2841   """
2842   REQ_BGL = False
2843
2844   def ExpandNames(self):
2845     self.needed_locks = {
2846       locking.LEVEL_NODE: locking.ALL_SET,
2847     }
2848     self.share_locks[locking.LEVEL_NODE] = 1
2849
2850   def Exec(self, feedback_fn):
2851     """Redistribute the configuration.
2852
2853     """
2854     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2855     _RedistributeAncillaryFiles(self)
2856
2857
2858 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2859   """Sleep and poll for an instance's disk to sync.
2860
2861   """
2862   if not instance.disks or disks is not None and not disks:
2863     return True
2864
2865   disks = _ExpandCheckDisks(instance, disks)
2866
2867   if not oneshot:
2868     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2869
2870   node = instance.primary_node
2871
2872   for dev in disks:
2873     lu.cfg.SetDiskID(dev, node)
2874
2875   # TODO: Convert to utils.Retry
2876
2877   retries = 0
2878   degr_retries = 10 # in seconds, as we sleep 1 second each time
2879   while True:
2880     max_time = 0
2881     done = True
2882     cumul_degraded = False
2883     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2884     msg = rstats.fail_msg
2885     if msg:
2886       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2887       retries += 1
2888       if retries >= 10:
2889         raise errors.RemoteError("Can't contact node %s for mirror data,"
2890                                  " aborting." % node)
2891       time.sleep(6)
2892       continue
2893     rstats = rstats.payload
2894     retries = 0
2895     for i, mstat in enumerate(rstats):
2896       if mstat is None:
2897         lu.LogWarning("Can't compute data for node %s/%s",
2898                            node, disks[i].iv_name)
2899         continue
2900
2901       cumul_degraded = (cumul_degraded or
2902                         (mstat.is_degraded and mstat.sync_percent is None))
2903       if mstat.sync_percent is not None:
2904         done = False
2905         if mstat.estimated_time is not None:
2906           rem_time = ("%s remaining (estimated)" %
2907                       utils.FormatSeconds(mstat.estimated_time))
2908           max_time = mstat.estimated_time
2909         else:
2910           rem_time = "no time estimate"
2911         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2912                         (disks[i].iv_name, mstat.sync_percent, rem_time))
2913
2914     # if we're done but degraded, let's do a few small retries, to
2915     # make sure we see a stable and not transient situation; therefore
2916     # we force restart of the loop
2917     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2918       logging.info("Degraded disks found, %d retries left", degr_retries)
2919       degr_retries -= 1
2920       time.sleep(1)
2921       continue
2922
2923     if done or oneshot:
2924       break
2925
2926     time.sleep(min(60, max_time))
2927
2928   if done:
2929     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2930   return not cumul_degraded
2931
2932
2933 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2934   """Check that mirrors are not degraded.
2935
2936   The ldisk parameter, if True, will change the test from the
2937   is_degraded attribute (which represents overall non-ok status for
2938   the device(s)) to the ldisk (representing the local storage status).
2939
2940   """
2941   lu.cfg.SetDiskID(dev, node)
2942
2943   result = True
2944
2945   if on_primary or dev.AssembleOnSecondary():
2946     rstats = lu.rpc.call_blockdev_find(node, dev)
2947     msg = rstats.fail_msg
2948     if msg:
2949       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2950       result = False
2951     elif not rstats.payload:
2952       lu.LogWarning("Can't find disk on node %s", node)
2953       result = False
2954     else:
2955       if ldisk:
2956         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2957       else:
2958         result = result and not rstats.payload.is_degraded
2959
2960   if dev.children:
2961     for child in dev.children:
2962       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2963
2964   return result
2965
2966
2967 class LUDiagnoseOS(NoHooksLU):
2968   """Logical unit for OS diagnose/query.
2969
2970   """
2971   _OP_PARAMS = [
2972     _POutputFields,
2973     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
2974     ]
2975   REQ_BGL = False
2976   _FIELDS_STATIC = utils.FieldSet()
2977   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2978                                    "parameters", "api_versions")
2979
2980   def CheckArguments(self):
2981     if self.op.names:
2982       raise errors.OpPrereqError("Selective OS query not supported",
2983                                  errors.ECODE_INVAL)
2984
2985     _CheckOutputFields(static=self._FIELDS_STATIC,
2986                        dynamic=self._FIELDS_DYNAMIC,
2987                        selected=self.op.output_fields)
2988
2989   def ExpandNames(self):
2990     # Lock all nodes, in shared mode
2991     # Temporary removal of locks, should be reverted later
2992     # TODO: reintroduce locks when they are lighter-weight
2993     self.needed_locks = {}
2994     #self.share_locks[locking.LEVEL_NODE] = 1
2995     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2996
2997   @staticmethod
2998   def _DiagnoseByOS(rlist):
2999     """Remaps a per-node return list into an a per-os per-node dictionary
3000
3001     @param rlist: a map with node names as keys and OS objects as values
3002
3003     @rtype: dict
3004     @return: a dictionary with osnames as keys and as value another
3005         map, with nodes as keys and tuples of (path, status, diagnose,
3006         variants, parameters, api_versions) as values, eg::
3007
3008           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3009                                      (/srv/..., False, "invalid api")],
3010                            "node2": [(/srv/..., True, "", [], [])]}
3011           }
3012
3013     """
3014     all_os = {}
3015     # we build here the list of nodes that didn't fail the RPC (at RPC
3016     # level), so that nodes with a non-responding node daemon don't
3017     # make all OSes invalid
3018     good_nodes = [node_name for node_name in rlist
3019                   if not rlist[node_name].fail_msg]
3020     for node_name, nr in rlist.items():
3021       if nr.fail_msg or not nr.payload:
3022         continue
3023       for (name, path, status, diagnose, variants,
3024            params, api_versions) in nr.payload:
3025         if name not in all_os:
3026           # build a list of nodes for this os containing empty lists
3027           # for each node in node_list
3028           all_os[name] = {}
3029           for nname in good_nodes:
3030             all_os[name][nname] = []
3031         # convert params from [name, help] to (name, help)
3032         params = [tuple(v) for v in params]
3033         all_os[name][node_name].append((path, status, diagnose,
3034                                         variants, params, api_versions))
3035     return all_os
3036
3037   def Exec(self, feedback_fn):
3038     """Compute the list of OSes.
3039
3040     """
3041     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3042     node_data = self.rpc.call_os_diagnose(valid_nodes)
3043     pol = self._DiagnoseByOS(node_data)
3044     output = []
3045
3046     for os_name, os_data in pol.items():
3047       row = []
3048       valid = True
3049       (variants, params, api_versions) = null_state = (set(), set(), set())
3050       for idx, osl in enumerate(os_data.values()):
3051         valid = bool(valid and osl and osl[0][1])
3052         if not valid:
3053           (variants, params, api_versions) = null_state
3054           break
3055         node_variants, node_params, node_api = osl[0][3:6]
3056         if idx == 0: # first entry
3057           variants = set(node_variants)
3058           params = set(node_params)
3059           api_versions = set(node_api)
3060         else: # keep consistency
3061           variants.intersection_update(node_variants)
3062           params.intersection_update(node_params)
3063           api_versions.intersection_update(node_api)
3064
3065       for field in self.op.output_fields:
3066         if field == "name":
3067           val = os_name
3068         elif field == "valid":
3069           val = valid
3070         elif field == "node_status":
3071           # this is just a copy of the dict
3072           val = {}
3073           for node_name, nos_list in os_data.items():
3074             val[node_name] = nos_list
3075         elif field == "variants":
3076           val = list(variants)
3077         elif field == "parameters":
3078           val = list(params)
3079         elif field == "api_versions":
3080           val = list(api_versions)
3081         else:
3082           raise errors.ParameterError(field)
3083         row.append(val)
3084       output.append(row)
3085
3086     return output
3087
3088
3089 class LURemoveNode(LogicalUnit):
3090   """Logical unit for removing a node.
3091
3092   """
3093   HPATH = "node-remove"
3094   HTYPE = constants.HTYPE_NODE
3095   _OP_PARAMS = [
3096     _PNodeName,
3097     ]
3098
3099   def BuildHooksEnv(self):
3100     """Build hooks env.
3101
3102     This doesn't run on the target node in the pre phase as a failed
3103     node would then be impossible to remove.
3104
3105     """
3106     env = {
3107       "OP_TARGET": self.op.node_name,
3108       "NODE_NAME": self.op.node_name,
3109       }
3110     all_nodes = self.cfg.GetNodeList()
3111     try:
3112       all_nodes.remove(self.op.node_name)
3113     except ValueError:
3114       logging.warning("Node %s which is about to be removed not found"
3115                       " in the all nodes list", self.op.node_name)
3116     return env, all_nodes, all_nodes
3117
3118   def CheckPrereq(self):
3119     """Check prerequisites.
3120
3121     This checks:
3122      - the node exists in the configuration
3123      - it does not have primary or secondary instances
3124      - it's not the master
3125
3126     Any errors are signaled by raising errors.OpPrereqError.
3127
3128     """
3129     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3130     node = self.cfg.GetNodeInfo(self.op.node_name)
3131     assert node is not None
3132
3133     instance_list = self.cfg.GetInstanceList()
3134
3135     masternode = self.cfg.GetMasterNode()
3136     if node.name == masternode:
3137       raise errors.OpPrereqError("Node is the master node,"
3138                                  " you need to failover first.",
3139                                  errors.ECODE_INVAL)
3140
3141     for instance_name in instance_list:
3142       instance = self.cfg.GetInstanceInfo(instance_name)
3143       if node.name in instance.all_nodes:
3144         raise errors.OpPrereqError("Instance %s is still running on the node,"
3145                                    " please remove first." % instance_name,
3146                                    errors.ECODE_INVAL)
3147     self.op.node_name = node.name
3148     self.node = node
3149
3150   def Exec(self, feedback_fn):
3151     """Removes the node from the cluster.
3152
3153     """
3154     node = self.node
3155     logging.info("Stopping the node daemon and removing configs from node %s",
3156                  node.name)
3157
3158     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3159
3160     # Promote nodes to master candidate as needed
3161     _AdjustCandidatePool(self, exceptions=[node.name])
3162     self.context.RemoveNode(node.name)
3163
3164     # Run post hooks on the node before it's removed
3165     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3166     try:
3167       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3168     except:
3169       # pylint: disable-msg=W0702
3170       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3171
3172     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3173     msg = result.fail_msg
3174     if msg:
3175       self.LogWarning("Errors encountered on the remote node while leaving"
3176                       " the cluster: %s", msg)
3177
3178     # Remove node from our /etc/hosts
3179     if self.cfg.GetClusterInfo().modify_etc_hosts:
3180       # FIXME: this should be done via an rpc call to node daemon
3181       utils.RemoveHostFromEtcHosts(node.name)
3182       _RedistributeAncillaryFiles(self)
3183
3184
3185 class LUQueryNodes(NoHooksLU):
3186   """Logical unit for querying nodes.
3187
3188   """
3189   # pylint: disable-msg=W0142
3190   _OP_PARAMS = [
3191     _POutputFields,
3192     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3193     ("use_locking", False, _TBool),
3194     ]
3195   REQ_BGL = False
3196
3197   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3198                     "master_candidate", "offline", "drained"]
3199
3200   _FIELDS_DYNAMIC = utils.FieldSet(
3201     "dtotal", "dfree",
3202     "mtotal", "mnode", "mfree",
3203     "bootid",
3204     "ctotal", "cnodes", "csockets",
3205     )
3206
3207   _FIELDS_STATIC = utils.FieldSet(*[
3208     "pinst_cnt", "sinst_cnt",
3209     "pinst_list", "sinst_list",
3210     "pip", "sip", "tags",
3211     "master",
3212     "role"] + _SIMPLE_FIELDS
3213     )
3214
3215   def CheckArguments(self):
3216     _CheckOutputFields(static=self._FIELDS_STATIC,
3217                        dynamic=self._FIELDS_DYNAMIC,
3218                        selected=self.op.output_fields)
3219
3220   def ExpandNames(self):
3221     self.needed_locks = {}
3222     self.share_locks[locking.LEVEL_NODE] = 1
3223
3224     if self.op.names:
3225       self.wanted = _GetWantedNodes(self, self.op.names)
3226     else:
3227       self.wanted = locking.ALL_SET
3228
3229     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3230     self.do_locking = self.do_node_query and self.op.use_locking
3231     if self.do_locking:
3232       # if we don't request only static fields, we need to lock the nodes
3233       self.needed_locks[locking.LEVEL_NODE] = self.wanted
3234
3235   def Exec(self, feedback_fn):
3236     """Computes the list of nodes and their attributes.
3237
3238     """
3239     all_info = self.cfg.GetAllNodesInfo()
3240     if self.do_locking:
3241       nodenames = self.acquired_locks[locking.LEVEL_NODE]
3242     elif self.wanted != locking.ALL_SET:
3243       nodenames = self.wanted
3244       missing = set(nodenames).difference(all_info.keys())
3245       if missing:
3246         raise errors.OpExecError(
3247           "Some nodes were removed before retrieving their data: %s" % missing)
3248     else:
3249       nodenames = all_info.keys()
3250
3251     nodenames = utils.NiceSort(nodenames)
3252     nodelist = [all_info[name] for name in nodenames]
3253
3254     # begin data gathering
3255
3256     if self.do_node_query:
3257       live_data = {}
3258       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3259                                           self.cfg.GetHypervisorType())
3260       for name in nodenames:
3261         nodeinfo = node_data[name]
3262         if not nodeinfo.fail_msg and nodeinfo.payload:
3263           nodeinfo = nodeinfo.payload
3264           fn = utils.TryConvert
3265           live_data[name] = {
3266             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3267             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3268             "mfree": fn(int, nodeinfo.get('memory_free', None)),
3269             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3270             "dfree": fn(int, nodeinfo.get('vg_free', None)),
3271             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3272             "bootid": nodeinfo.get('bootid', None),
3273             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3274             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3275             }
3276         else:
3277           live_data[name] = {}
3278     else:
3279       live_data = dict.fromkeys(nodenames, {})
3280
3281     node_to_primary = dict([(name, set()) for name in nodenames])
3282     node_to_secondary = dict([(name, set()) for name in nodenames])
3283
3284     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3285                              "sinst_cnt", "sinst_list"))
3286     if inst_fields & frozenset(self.op.output_fields):
3287       inst_data = self.cfg.GetAllInstancesInfo()
3288
3289       for inst in inst_data.values():
3290         if inst.primary_node in node_to_primary:
3291           node_to_primary[inst.primary_node].add(inst.name)
3292         for secnode in inst.secondary_nodes:
3293           if secnode in node_to_secondary:
3294             node_to_secondary[secnode].add(inst.name)
3295
3296     master_node = self.cfg.GetMasterNode()
3297
3298     # end data gathering
3299
3300     output = []
3301     for node in nodelist:
3302       node_output = []
3303       for field in self.op.output_fields:
3304         if field in self._SIMPLE_FIELDS:
3305           val = getattr(node, field)
3306         elif field == "pinst_list":
3307           val = list(node_to_primary[node.name])
3308         elif field == "sinst_list":
3309           val = list(node_to_secondary[node.name])
3310         elif field == "pinst_cnt":
3311           val = len(node_to_primary[node.name])
3312         elif field == "sinst_cnt":
3313           val = len(node_to_secondary[node.name])
3314         elif field == "pip":
3315           val = node.primary_ip
3316         elif field == "sip":
3317           val = node.secondary_ip
3318         elif field == "tags":
3319           val = list(node.GetTags())
3320         elif field == "master":
3321           val = node.name == master_node
3322         elif self._FIELDS_DYNAMIC.Matches(field):
3323           val = live_data[node.name].get(field, None)
3324         elif field == "role":
3325           if node.name == master_node:
3326             val = "M"
3327           elif node.master_candidate:
3328             val = "C"
3329           elif node.drained:
3330             val = "D"
3331           elif node.offline:
3332             val = "O"
3333           else:
3334             val = "R"
3335         else:
3336           raise errors.ParameterError(field)
3337         node_output.append(val)
3338       output.append(node_output)
3339
3340     return output
3341
3342
3343 class LUQueryNodeVolumes(NoHooksLU):
3344   """Logical unit for getting volumes on node(s).
3345
3346   """
3347   _OP_PARAMS = [
3348     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3349     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3350     ]
3351   REQ_BGL = False
3352   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3353   _FIELDS_STATIC = utils.FieldSet("node")
3354
3355   def CheckArguments(self):
3356     _CheckOutputFields(static=self._FIELDS_STATIC,
3357                        dynamic=self._FIELDS_DYNAMIC,
3358                        selected=self.op.output_fields)
3359
3360   def ExpandNames(self):
3361     self.needed_locks = {}
3362     self.share_locks[locking.LEVEL_NODE] = 1
3363     if not self.op.nodes:
3364       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3365     else:
3366       self.needed_locks[locking.LEVEL_NODE] = \
3367         _GetWantedNodes(self, self.op.nodes)
3368
3369   def Exec(self, feedback_fn):
3370     """Computes the list of nodes and their attributes.
3371
3372     """
3373     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3374     volumes = self.rpc.call_node_volumes(nodenames)
3375
3376     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3377              in self.cfg.GetInstanceList()]
3378
3379     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3380
3381     output = []
3382     for node in nodenames:
3383       nresult = volumes[node]
3384       if nresult.offline:
3385         continue
3386       msg = nresult.fail_msg
3387       if msg:
3388         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3389         continue
3390
3391       node_vols = nresult.payload[:]
3392       node_vols.sort(key=lambda vol: vol['dev'])
3393
3394       for vol in node_vols:
3395         node_output = []
3396         for field in self.op.output_fields:
3397           if field == "node":
3398             val = node
3399           elif field == "phys":
3400             val = vol['dev']
3401           elif field == "vg":
3402             val = vol['vg']
3403           elif field == "name":
3404             val = vol['name']
3405           elif field == "size":
3406             val = int(float(vol['size']))
3407           elif field == "instance":
3408             for inst in ilist:
3409               if node not in lv_by_node[inst]:
3410                 continue
3411               if vol['name'] in lv_by_node[inst][node]:
3412                 val = inst.name
3413                 break
3414             else:
3415               val = '-'
3416           else:
3417             raise errors.ParameterError(field)
3418           node_output.append(str(val))
3419
3420         output.append(node_output)
3421
3422     return output
3423
3424
3425 class LUQueryNodeStorage(NoHooksLU):
3426   """Logical unit for getting information on storage units on node(s).
3427
3428   """
3429   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3430   _OP_PARAMS = [
3431     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3432     ("storage_type", _NoDefault, _CheckStorageType),
3433     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3434     ("name", None, _TMaybeString),
3435     ]
3436   REQ_BGL = False
3437
3438   def CheckArguments(self):
3439     _CheckOutputFields(static=self._FIELDS_STATIC,
3440                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3441                        selected=self.op.output_fields)
3442
3443   def ExpandNames(self):
3444     self.needed_locks = {}
3445     self.share_locks[locking.LEVEL_NODE] = 1
3446
3447     if self.op.nodes:
3448       self.needed_locks[locking.LEVEL_NODE] = \
3449         _GetWantedNodes(self, self.op.nodes)
3450     else:
3451       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3452
3453   def Exec(self, feedback_fn):
3454     """Computes the list of nodes and their attributes.
3455
3456     """
3457     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3458
3459     # Always get name to sort by
3460     if constants.SF_NAME in self.op.output_fields:
3461       fields = self.op.output_fields[:]
3462     else:
3463       fields = [constants.SF_NAME] + self.op.output_fields
3464
3465     # Never ask for node or type as it's only known to the LU
3466     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3467       while extra in fields:
3468         fields.remove(extra)
3469
3470     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3471     name_idx = field_idx[constants.SF_NAME]
3472
3473     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3474     data = self.rpc.call_storage_list(self.nodes,
3475                                       self.op.storage_type, st_args,
3476                                       self.op.name, fields)
3477
3478     result = []
3479
3480     for node in utils.NiceSort(self.nodes):
3481       nresult = data[node]
3482       if nresult.offline:
3483         continue
3484
3485       msg = nresult.fail_msg
3486       if msg:
3487         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3488         continue
3489
3490       rows = dict([(row[name_idx], row) for row in nresult.payload])
3491
3492       for name in utils.NiceSort(rows.keys()):
3493         row = rows[name]
3494
3495         out = []
3496
3497         for field in self.op.output_fields:
3498           if field == constants.SF_NODE:
3499             val = node
3500           elif field == constants.SF_TYPE:
3501             val = self.op.storage_type
3502           elif field in field_idx:
3503             val = row[field_idx[field]]
3504           else:
3505             raise errors.ParameterError(field)
3506
3507           out.append(val)
3508
3509         result.append(out)
3510
3511     return result
3512
3513
3514 class LUModifyNodeStorage(NoHooksLU):
3515   """Logical unit for modifying a storage volume on a node.
3516
3517   """
3518   _OP_PARAMS = [
3519     _PNodeName,
3520     ("storage_type", _NoDefault, _CheckStorageType),
3521     ("name", _NoDefault, _TNonEmptyString),
3522     ("changes", _NoDefault, _TDict),
3523     ]
3524   REQ_BGL = False
3525
3526   def CheckArguments(self):
3527     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3528
3529     storage_type = self.op.storage_type
3530
3531     try:
3532       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3533     except KeyError:
3534       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3535                                  " modified" % storage_type,
3536                                  errors.ECODE_INVAL)
3537
3538     diff = set(self.op.changes.keys()) - modifiable
3539     if diff:
3540       raise errors.OpPrereqError("The following fields can not be modified for"
3541                                  " storage units of type '%s': %r" %
3542                                  (storage_type, list(diff)),
3543                                  errors.ECODE_INVAL)
3544
3545   def ExpandNames(self):
3546     self.needed_locks = {
3547       locking.LEVEL_NODE: self.op.node_name,
3548       }
3549
3550   def Exec(self, feedback_fn):
3551     """Computes the list of nodes and their attributes.
3552
3553     """
3554     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3555     result = self.rpc.call_storage_modify(self.op.node_name,
3556                                           self.op.storage_type, st_args,
3557                                           self.op.name, self.op.changes)
3558     result.Raise("Failed to modify storage unit '%s' on %s" %
3559                  (self.op.name, self.op.node_name))
3560
3561
3562 class LUAddNode(LogicalUnit):
3563   """Logical unit for adding node to the cluster.
3564
3565   """
3566   HPATH = "node-add"
3567   HTYPE = constants.HTYPE_NODE
3568   _OP_PARAMS = [
3569     _PNodeName,
3570     ("primary_ip", None, _NoType),
3571     ("secondary_ip", None, _TMaybeString),
3572     ("readd", False, _TBool),
3573     ]
3574
3575   def CheckArguments(self):
3576     # validate/normalize the node name
3577     self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3578
3579   def BuildHooksEnv(self):
3580     """Build hooks env.
3581
3582     This will run on all nodes before, and on all nodes + the new node after.
3583
3584     """
3585     env = {
3586       "OP_TARGET": self.op.node_name,
3587       "NODE_NAME": self.op.node_name,
3588       "NODE_PIP": self.op.primary_ip,
3589       "NODE_SIP": self.op.secondary_ip,
3590       }
3591     nodes_0 = self.cfg.GetNodeList()
3592     nodes_1 = nodes_0 + [self.op.node_name, ]
3593     return env, nodes_0, nodes_1
3594
3595   def CheckPrereq(self):
3596     """Check prerequisites.
3597
3598     This checks:
3599      - the new node is not already in the config
3600      - it is resolvable
3601      - its parameters (single/dual homed) matches the cluster
3602
3603     Any errors are signaled by raising errors.OpPrereqError.
3604
3605     """
3606     node_name = self.op.node_name
3607     cfg = self.cfg
3608
3609     dns_data = utils.GetHostInfo(node_name)
3610
3611     node = dns_data.name
3612     primary_ip = self.op.primary_ip = dns_data.ip
3613     if self.op.secondary_ip is None:
3614       self.op.secondary_ip = primary_ip
3615     if not utils.IsValidIP4(self.op.secondary_ip):
3616       raise errors.OpPrereqError("Invalid secondary IP given",
3617                                  errors.ECODE_INVAL)
3618     secondary_ip = self.op.secondary_ip
3619
3620     node_list = cfg.GetNodeList()
3621     if not self.op.readd and node in node_list:
3622       raise errors.OpPrereqError("Node %s is already in the configuration" %
3623                                  node, errors.ECODE_EXISTS)
3624     elif self.op.readd and node not in node_list:
3625       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3626                                  errors.ECODE_NOENT)
3627
3628     self.changed_primary_ip = False
3629
3630     for existing_node_name in node_list:
3631       existing_node = cfg.GetNodeInfo(existing_node_name)
3632
3633       if self.op.readd and node == existing_node_name:
3634         if existing_node.secondary_ip != secondary_ip:
3635           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3636                                      " address configuration as before",
3637                                      errors.ECODE_INVAL)
3638         if existing_node.primary_ip != primary_ip:
3639           self.changed_primary_ip = True
3640
3641         continue
3642
3643       if (existing_node.primary_ip == primary_ip or
3644           existing_node.secondary_ip == primary_ip or
3645           existing_node.primary_ip == secondary_ip or
3646           existing_node.secondary_ip == secondary_ip):
3647         raise errors.OpPrereqError("New node ip address(es) conflict with"
3648                                    " existing node %s" % existing_node.name,
3649                                    errors.ECODE_NOTUNIQUE)
3650
3651     # check that the type of the node (single versus dual homed) is the
3652     # same as for the master
3653     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3654     master_singlehomed = myself.secondary_ip == myself.primary_ip
3655     newbie_singlehomed = secondary_ip == primary_ip
3656     if master_singlehomed != newbie_singlehomed:
3657       if master_singlehomed:
3658         raise errors.OpPrereqError("The master has no private ip but the"
3659                                    " new node has one",
3660                                    errors.ECODE_INVAL)
3661       else:
3662         raise errors.OpPrereqError("The master has a private ip but the"
3663                                    " new node doesn't have one",
3664                                    errors.ECODE_INVAL)
3665
3666     # checks reachability
3667     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3668       raise errors.OpPrereqError("Node not reachable by ping",
3669                                  errors.ECODE_ENVIRON)
3670
3671     if not newbie_singlehomed:
3672       # check reachability from my secondary ip to newbie's secondary ip
3673       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3674                            source=myself.secondary_ip):
3675         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3676                                    " based ping to noded port",
3677                                    errors.ECODE_ENVIRON)
3678
3679     if self.op.readd:
3680       exceptions = [node]
3681     else:
3682       exceptions = []
3683
3684     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3685
3686     if self.op.readd:
3687       self.new_node = self.cfg.GetNodeInfo(node)
3688       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3689     else:
3690       self.new_node = objects.Node(name=node,
3691                                    primary_ip=primary_ip,
3692                                    secondary_ip=secondary_ip,
3693                                    master_candidate=self.master_candidate,
3694                                    offline=False, drained=False)
3695
3696   def Exec(self, feedback_fn):
3697     """Adds the new node to the cluster.
3698
3699     """
3700     new_node = self.new_node
3701     node = new_node.name
3702
3703     # for re-adds, reset the offline/drained/master-candidate flags;
3704     # we need to reset here, otherwise offline would prevent RPC calls
3705     # later in the procedure; this also means that if the re-add
3706     # fails, we are left with a non-offlined, broken node
3707     if self.op.readd:
3708       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3709       self.LogInfo("Readding a node, the offline/drained flags were reset")
3710       # if we demote the node, we do cleanup later in the procedure
3711       new_node.master_candidate = self.master_candidate
3712       if self.changed_primary_ip:
3713         new_node.primary_ip = self.op.primary_ip
3714
3715     # notify the user about any possible mc promotion
3716     if new_node.master_candidate:
3717       self.LogInfo("Node will be a master candidate")
3718
3719     # check connectivity
3720     result = self.rpc.call_version([node])[node]
3721     result.Raise("Can't get version information from node %s" % node)
3722     if constants.PROTOCOL_VERSION == result.payload:
3723       logging.info("Communication to node %s fine, sw version %s match",
3724                    node, result.payload)
3725     else:
3726       raise errors.OpExecError("Version mismatch master version %s,"
3727                                " node version %s" %
3728                                (constants.PROTOCOL_VERSION, result.payload))
3729
3730     # setup ssh on node
3731     if self.cfg.GetClusterInfo().modify_ssh_setup:
3732       logging.info("Copy ssh key to node %s", node)
3733       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3734       keyarray = []
3735       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3736                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3737                   priv_key, pub_key]
3738
3739       for i in keyfiles:
3740         keyarray.append(utils.ReadFile(i))
3741
3742       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3743                                       keyarray[2], keyarray[3], keyarray[4],
3744                                       keyarray[5])
3745       result.Raise("Cannot transfer ssh keys to the new node")
3746
3747     # Add node to our /etc/hosts, and add key to known_hosts
3748     if self.cfg.GetClusterInfo().modify_etc_hosts:
3749       # FIXME: this should be done via an rpc call to node daemon
3750       utils.AddHostToEtcHosts(new_node.name)
3751
3752     if new_node.secondary_ip != new_node.primary_ip:
3753       result = self.rpc.call_node_has_ip_address(new_node.name,
3754                                                  new_node.secondary_ip)
3755       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3756                    prereq=True, ecode=errors.ECODE_ENVIRON)
3757       if not result.payload:
3758         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3759                                  " you gave (%s). Please fix and re-run this"
3760                                  " command." % new_node.secondary_ip)
3761
3762     node_verify_list = [self.cfg.GetMasterNode()]
3763     node_verify_param = {
3764       constants.NV_NODELIST: [node],
3765       # TODO: do a node-net-test as well?
3766     }
3767
3768     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3769                                        self.cfg.GetClusterName())
3770     for verifier in node_verify_list:
3771       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3772       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3773       if nl_payload:
3774         for failed in nl_payload:
3775           feedback_fn("ssh/hostname verification failed"
3776                       " (checking from %s): %s" %
3777                       (verifier, nl_payload[failed]))
3778         raise errors.OpExecError("ssh/hostname verification failed.")
3779
3780     if self.op.readd:
3781       _RedistributeAncillaryFiles(self)
3782       self.context.ReaddNode(new_node)
3783       # make sure we redistribute the config
3784       self.cfg.Update(new_node, feedback_fn)
3785       # and make sure the new node will not have old files around
3786       if not new_node.master_candidate:
3787         result = self.rpc.call_node_demote_from_mc(new_node.name)
3788         msg = result.fail_msg
3789         if msg:
3790           self.LogWarning("Node failed to demote itself from master"
3791                           " candidate status: %s" % msg)
3792     else:
3793       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3794       self.context.AddNode(new_node, self.proc.GetECId())
3795
3796
3797 class LUSetNodeParams(LogicalUnit):
3798   """Modifies the parameters of a node.
3799
3800   """
3801   HPATH = "node-modify"
3802   HTYPE = constants.HTYPE_NODE
3803   _OP_PARAMS = [
3804     _PNodeName,
3805     ("master_candidate", None, _TMaybeBool),
3806     ("offline", None, _TMaybeBool),
3807     ("drained", None, _TMaybeBool),
3808     ("auto_promote", False, _TBool),
3809     _PForce,
3810     ]
3811   REQ_BGL = False
3812
3813   def CheckArguments(self):
3814     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3815     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3816     if all_mods.count(None) == 3:
3817       raise errors.OpPrereqError("Please pass at least one modification",
3818                                  errors.ECODE_INVAL)
3819     if all_mods.count(True) > 1:
3820       raise errors.OpPrereqError("Can't set the node into more than one"
3821                                  " state at the same time",
3822                                  errors.ECODE_INVAL)
3823
3824     # Boolean value that tells us whether we're offlining or draining the node
3825     self.offline_or_drain = (self.op.offline == True or
3826                              self.op.drained == True)
3827     self.deoffline_or_drain = (self.op.offline == False or
3828                                self.op.drained == False)
3829     self.might_demote = (self.op.master_candidate == False or
3830                          self.offline_or_drain)
3831
3832     self.lock_all = self.op.auto_promote and self.might_demote
3833
3834
3835   def ExpandNames(self):
3836     if self.lock_all:
3837       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3838     else:
3839       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3840
3841   def BuildHooksEnv(self):
3842     """Build hooks env.
3843
3844     This runs on the master node.
3845
3846     """
3847     env = {
3848       "OP_TARGET": self.op.node_name,
3849       "MASTER_CANDIDATE": str(self.op.master_candidate),
3850       "OFFLINE": str(self.op.offline),
3851       "DRAINED": str(self.op.drained),
3852       }
3853     nl = [self.cfg.GetMasterNode(),
3854           self.op.node_name]
3855     return env, nl, nl
3856
3857   def CheckPrereq(self):
3858     """Check prerequisites.
3859
3860     This only checks the instance list against the existing names.
3861
3862     """
3863     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3864
3865     if (self.op.master_candidate is not None or
3866         self.op.drained is not None or
3867         self.op.offline is not None):
3868       # we can't change the master's node flags
3869       if self.op.node_name == self.cfg.GetMasterNode():
3870         raise errors.OpPrereqError("The master role can be changed"
3871                                    " only via masterfailover",
3872                                    errors.ECODE_INVAL)
3873
3874
3875     if node.master_candidate and self.might_demote and not self.lock_all:
3876       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3877       # check if after removing the current node, we're missing master
3878       # candidates
3879       (mc_remaining, mc_should, _) = \
3880           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3881       if mc_remaining < mc_should:
3882         raise errors.OpPrereqError("Not enough master candidates, please"
3883                                    " pass auto_promote to allow promotion",
3884                                    errors.ECODE_INVAL)
3885
3886     if (self.op.master_candidate == True and
3887         ((node.offline and not self.op.offline == False) or
3888          (node.drained and not self.op.drained == False))):
3889       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3890                                  " to master_candidate" % node.name,
3891                                  errors.ECODE_INVAL)
3892
3893     # If we're being deofflined/drained, we'll MC ourself if needed
3894     if (self.deoffline_or_drain and not self.offline_or_drain and not
3895         self.op.master_candidate == True and not node.master_candidate):
3896       self.op.master_candidate = _DecideSelfPromotion(self)
3897       if self.op.master_candidate:
3898         self.LogInfo("Autopromoting node to master candidate")
3899
3900     return
3901
3902   def Exec(self, feedback_fn):
3903     """Modifies a node.
3904
3905     """
3906     node = self.node
3907
3908     result = []
3909     changed_mc = False
3910
3911     if self.op.offline is not None:
3912       node.offline = self.op.offline
3913       result.append(("offline", str(self.op.offline)))
3914       if self.op.offline == True:
3915         if node.master_candidate:
3916           node.master_candidate = False
3917           changed_mc = True
3918           result.append(("master_candidate", "auto-demotion due to offline"))
3919         if node.drained:
3920           node.drained = False
3921           result.append(("drained", "clear drained status due to offline"))
3922
3923     if self.op.master_candidate is not None:
3924       node.master_candidate = self.op.master_candidate
3925       changed_mc = True
3926       result.append(("master_candidate", str(self.op.master_candidate)))
3927       if self.op.master_candidate == False:
3928         rrc = self.rpc.call_node_demote_from_mc(node.name)
3929         msg = rrc.fail_msg
3930         if msg:
3931           self.LogWarning("Node failed to demote itself: %s" % msg)
3932
3933     if self.op.drained is not None:
3934       node.drained = self.op.drained
3935       result.append(("drained", str(self.op.drained)))
3936       if self.op.drained == True:
3937         if node.master_candidate:
3938           node.master_candidate = False
3939           changed_mc = True
3940           result.append(("master_candidate", "auto-demotion due to drain"))
3941           rrc = self.rpc.call_node_demote_from_mc(node.name)
3942           msg = rrc.fail_msg
3943           if msg:
3944             self.LogWarning("Node failed to demote itself: %s" % msg)
3945         if node.offline:
3946           node.offline = False
3947           result.append(("offline", "clear offline status due to drain"))
3948
3949     # we locked all nodes, we adjust the CP before updating this node
3950     if self.lock_all:
3951       _AdjustCandidatePool(self, [node.name])
3952
3953     # this will trigger configuration file update, if needed
3954     self.cfg.Update(node, feedback_fn)
3955
3956     # this will trigger job queue propagation or cleanup
3957     if changed_mc:
3958       self.context.ReaddNode(node)
3959
3960     return result
3961
3962
3963 class LUPowercycleNode(NoHooksLU):
3964   """Powercycles a node.
3965
3966   """
3967   _OP_PARAMS = [
3968     _PNodeName,
3969     _PForce,
3970     ]
3971   REQ_BGL = False
3972
3973   def CheckArguments(self):
3974     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3975     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3976       raise errors.OpPrereqError("The node is the master and the force"
3977                                  " parameter was not set",
3978                                  errors.ECODE_INVAL)
3979
3980   def ExpandNames(self):
3981     """Locking for PowercycleNode.
3982
3983     This is a last-resort option and shouldn't block on other
3984     jobs. Therefore, we grab no locks.
3985
3986     """
3987     self.needed_locks = {}
3988
3989   def Exec(self, feedback_fn):
3990     """Reboots a node.
3991
3992     """
3993     result = self.rpc.call_node_powercycle(self.op.node_name,
3994                                            self.cfg.GetHypervisorType())
3995     result.Raise("Failed to schedule the reboot")
3996     return result.payload
3997
3998
3999 class LUQueryClusterInfo(NoHooksLU):
4000   """Query cluster configuration.
4001
4002   """
4003   REQ_BGL = False
4004
4005   def ExpandNames(self):
4006     self.needed_locks = {}
4007
4008   def Exec(self, feedback_fn):
4009     """Return cluster config.
4010
4011     """
4012     cluster = self.cfg.GetClusterInfo()
4013     os_hvp = {}
4014
4015     # Filter just for enabled hypervisors
4016     for os_name, hv_dict in cluster.os_hvp.items():
4017       os_hvp[os_name] = {}
4018       for hv_name, hv_params in hv_dict.items():
4019         if hv_name in cluster.enabled_hypervisors:
4020           os_hvp[os_name][hv_name] = hv_params
4021
4022     result = {
4023       "software_version": constants.RELEASE_VERSION,
4024       "protocol_version": constants.PROTOCOL_VERSION,
4025       "config_version": constants.CONFIG_VERSION,
4026       "os_api_version": max(constants.OS_API_VERSIONS),
4027       "export_version": constants.EXPORT_VERSION,
4028       "architecture": (platform.architecture()[0], platform.machine()),
4029       "name": cluster.cluster_name,
4030       "master": cluster.master_node,
4031       "default_hypervisor": cluster.enabled_hypervisors[0],
4032       "enabled_hypervisors": cluster.enabled_hypervisors,
4033       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4034                         for hypervisor_name in cluster.enabled_hypervisors]),
4035       "os_hvp": os_hvp,
4036       "beparams": cluster.beparams,
4037       "osparams": cluster.osparams,
4038       "nicparams": cluster.nicparams,
4039       "candidate_pool_size": cluster.candidate_pool_size,
4040       "master_netdev": cluster.master_netdev,
4041       "volume_group_name": cluster.volume_group_name,
4042       "file_storage_dir": cluster.file_storage_dir,
4043       "maintain_node_health": cluster.maintain_node_health,
4044       "ctime": cluster.ctime,
4045       "mtime": cluster.mtime,
4046       "uuid": cluster.uuid,
4047       "tags": list(cluster.GetTags()),
4048       "uid_pool": cluster.uid_pool,
4049       }
4050
4051     return result
4052
4053
4054 class LUQueryConfigValues(NoHooksLU):
4055   """Return configuration values.
4056
4057   """
4058   _OP_PARAMS = [_POutputFields]
4059   REQ_BGL = False
4060   _FIELDS_DYNAMIC = utils.FieldSet()
4061   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4062                                   "watcher_pause")
4063
4064   def CheckArguments(self):
4065     _CheckOutputFields(static=self._FIELDS_STATIC,
4066                        dynamic=self._FIELDS_DYNAMIC,
4067                        selected=self.op.output_fields)
4068
4069   def ExpandNames(self):
4070     self.needed_locks = {}
4071
4072   def Exec(self, feedback_fn):
4073     """Dump a representation of the cluster config to the standard output.
4074
4075     """
4076     values = []
4077     for field in self.op.output_fields:
4078       if field == "cluster_name":
4079         entry = self.cfg.GetClusterName()
4080       elif field == "master_node":
4081         entry = self.cfg.GetMasterNode()
4082       elif field == "drain_flag":
4083         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4084       elif field == "watcher_pause":
4085         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4086       else:
4087         raise errors.ParameterError(field)
4088       values.append(entry)
4089     return values
4090
4091
4092 class LUActivateInstanceDisks(NoHooksLU):
4093   """Bring up an instance's disks.
4094
4095   """
4096   _OP_PARAMS = [
4097     _PInstanceName,
4098     ("ignore_size", False, _TBool),
4099     ]
4100   REQ_BGL = False
4101
4102   def ExpandNames(self):
4103     self._ExpandAndLockInstance()
4104     self.needed_locks[locking.LEVEL_NODE] = []
4105     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4106
4107   def DeclareLocks(self, level):
4108     if level == locking.LEVEL_NODE:
4109       self._LockInstancesNodes()
4110
4111   def CheckPrereq(self):
4112     """Check prerequisites.
4113
4114     This checks that the instance is in the cluster.
4115
4116     """
4117     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4118     assert self.instance is not None, \
4119       "Cannot retrieve locked instance %s" % self.op.instance_name
4120     _CheckNodeOnline(self, self.instance.primary_node)
4121
4122   def Exec(self, feedback_fn):
4123     """Activate the disks.
4124
4125     """
4126     disks_ok, disks_info = \
4127               _AssembleInstanceDisks(self, self.instance,
4128                                      ignore_size=self.op.ignore_size)
4129     if not disks_ok:
4130       raise errors.OpExecError("Cannot activate block devices")
4131
4132     return disks_info
4133
4134
4135 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4136                            ignore_size=False):
4137   """Prepare the block devices for an instance.
4138
4139   This sets up the block devices on all nodes.
4140
4141   @type lu: L{LogicalUnit}
4142   @param lu: the logical unit on whose behalf we execute
4143   @type instance: L{objects.Instance}
4144   @param instance: the instance for whose disks we assemble
4145   @type disks: list of L{objects.Disk} or None
4146   @param disks: which disks to assemble (or all, if None)
4147   @type ignore_secondaries: boolean
4148   @param ignore_secondaries: if true, errors on secondary nodes
4149       won't result in an error return from the function
4150   @type ignore_size: boolean
4151   @param ignore_size: if true, the current known size of the disk
4152       will not be used during the disk activation, useful for cases
4153       when the size is wrong
4154   @return: False if the operation failed, otherwise a list of
4155       (host, instance_visible_name, node_visible_name)
4156       with the mapping from node devices to instance devices
4157
4158   """
4159   device_info = []
4160   disks_ok = True
4161   iname = instance.name
4162   disks = _ExpandCheckDisks(instance, disks)
4163
4164   # With the two passes mechanism we try to reduce the window of
4165   # opportunity for the race condition of switching DRBD to primary
4166   # before handshaking occured, but we do not eliminate it
4167
4168   # The proper fix would be to wait (with some limits) until the
4169   # connection has been made and drbd transitions from WFConnection
4170   # into any other network-connected state (Connected, SyncTarget,
4171   # SyncSource, etc.)
4172
4173   # 1st pass, assemble on all nodes in secondary mode
4174   for inst_disk in disks:
4175     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4176       if ignore_size:
4177         node_disk = node_disk.Copy()
4178         node_disk.UnsetSize()
4179       lu.cfg.SetDiskID(node_disk, node)
4180       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4181       msg = result.fail_msg
4182       if msg:
4183         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4184                            " (is_primary=False, pass=1): %s",
4185                            inst_disk.iv_name, node, msg)
4186         if not ignore_secondaries:
4187           disks_ok = False
4188
4189   # FIXME: race condition on drbd migration to primary
4190
4191   # 2nd pass, do only the primary node
4192   for inst_disk in disks:
4193     dev_path = None
4194
4195     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4196       if node != instance.primary_node:
4197         continue
4198       if ignore_size:
4199         node_disk = node_disk.Copy()
4200         node_disk.UnsetSize()
4201       lu.cfg.SetDiskID(node_disk, node)
4202       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4203       msg = result.fail_msg
4204       if msg:
4205         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4206                            " (is_primary=True, pass=2): %s",
4207                            inst_disk.iv_name, node, msg)
4208         disks_ok = False
4209       else:
4210         dev_path = result.payload
4211
4212     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4213
4214   # leave the disks configured for the primary node
4215   # this is a workaround that would be fixed better by
4216   # improving the logical/physical id handling
4217   for disk in disks:
4218     lu.cfg.SetDiskID(disk, instance.primary_node)
4219
4220   return disks_ok, device_info
4221
4222
4223 def _StartInstanceDisks(lu, instance, force):
4224   """Start the disks of an instance.
4225
4226   """
4227   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4228                                            ignore_secondaries=force)
4229   if not disks_ok:
4230     _ShutdownInstanceDisks(lu, instance)
4231     if force is not None and not force:
4232       lu.proc.LogWarning("", hint="If the message above refers to a"
4233                          " secondary node,"
4234                          " you can retry the operation using '--force'.")
4235     raise errors.OpExecError("Disk consistency error")
4236
4237
4238 class LUDeactivateInstanceDisks(NoHooksLU):
4239   """Shutdown an instance's disks.
4240
4241   """
4242   _OP_PARAMS = [
4243     _PInstanceName,
4244     ]
4245   REQ_BGL = False
4246
4247   def ExpandNames(self):
4248     self._ExpandAndLockInstance()
4249     self.needed_locks[locking.LEVEL_NODE] = []
4250     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4251
4252   def DeclareLocks(self, level):
4253     if level == locking.LEVEL_NODE:
4254       self._LockInstancesNodes()
4255
4256   def CheckPrereq(self):
4257     """Check prerequisites.
4258
4259     This checks that the instance is in the cluster.
4260
4261     """
4262     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4263     assert self.instance is not None, \
4264       "Cannot retrieve locked instance %s" % self.op.instance_name
4265
4266   def Exec(self, feedback_fn):
4267     """Deactivate the disks
4268
4269     """
4270     instance = self.instance
4271     _SafeShutdownInstanceDisks(self, instance)
4272
4273
4274 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4275   """Shutdown block devices of an instance.
4276
4277   This function checks if an instance is running, before calling
4278   _ShutdownInstanceDisks.
4279
4280   """
4281   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4282   _ShutdownInstanceDisks(lu, instance, disks=disks)
4283
4284
4285 def _ExpandCheckDisks(instance, disks):
4286   """Return the instance disks selected by the disks list
4287
4288   @type disks: list of L{objects.Disk} or None
4289   @param disks: selected disks
4290   @rtype: list of L{objects.Disk}
4291   @return: selected instance disks to act on
4292
4293   """
4294   if disks is None:
4295     return instance.disks
4296   else:
4297     if not set(disks).issubset(instance.disks):
4298       raise errors.ProgrammerError("Can only act on disks belonging to the"
4299                                    " target instance")
4300     return disks
4301
4302
4303 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4304   """Shutdown block devices of an instance.
4305
4306   This does the shutdown on all nodes of the instance.
4307
4308   If the ignore_primary is false, errors on the primary node are
4309   ignored.
4310
4311   """
4312   all_result = True
4313   disks = _ExpandCheckDisks(instance, disks)
4314
4315   for disk in disks:
4316     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4317       lu.cfg.SetDiskID(top_disk, node)
4318       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4319       msg = result.fail_msg
4320       if msg:
4321         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4322                       disk.iv_name, node, msg)
4323         if not ignore_primary or node != instance.primary_node:
4324           all_result = False
4325   return all_result
4326
4327
4328 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4329   """Checks if a node has enough free memory.
4330
4331   This function check if a given node has the needed amount of free
4332   memory. In case the node has less memory or we cannot get the
4333   information from the node, this function raise an OpPrereqError
4334   exception.
4335
4336   @type lu: C{LogicalUnit}
4337   @param lu: a logical unit from which we get configuration data
4338   @type node: C{str}
4339   @param node: the node to check
4340   @type reason: C{str}
4341   @param reason: string to use in the error message
4342   @type requested: C{int}
4343   @param requested: the amount of memory in MiB to check for
4344   @type hypervisor_name: C{str}
4345   @param hypervisor_name: the hypervisor to ask for memory stats
4346   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4347       we cannot check the node
4348
4349   """
4350   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4351   nodeinfo[node].Raise("Can't get data from node %s" % node,
4352                        prereq=True, ecode=errors.ECODE_ENVIRON)
4353   free_mem = nodeinfo[node].payload.get('memory_free', None)
4354   if not isinstance(free_mem, int):
4355     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4356                                " was '%s'" % (node, free_mem),
4357                                errors.ECODE_ENVIRON)
4358   if requested > free_mem:
4359     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4360                                " needed %s MiB, available %s MiB" %
4361                                (node, reason, requested, free_mem),
4362                                errors.ECODE_NORES)
4363
4364
4365 def _CheckNodesFreeDisk(lu, nodenames, requested):
4366   """Checks if nodes have enough free disk space in the default VG.
4367
4368   This function check if all given nodes have the needed amount of
4369   free disk. In case any node has less disk or we cannot get the
4370   information from the node, this function raise an OpPrereqError
4371   exception.
4372
4373   @type lu: C{LogicalUnit}
4374   @param lu: a logical unit from which we get configuration data
4375   @type nodenames: C{list}
4376   @param nodenames: the list of node names to check
4377   @type requested: C{int}
4378   @param requested: the amount of disk in MiB to check for
4379   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4380       we cannot check the node
4381
4382   """
4383   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4384                                    lu.cfg.GetHypervisorType())
4385   for node in nodenames:
4386     info = nodeinfo[node]
4387     info.Raise("Cannot get current information from node %s" % node,
4388                prereq=True, ecode=errors.ECODE_ENVIRON)
4389     vg_free = info.payload.get("vg_free", None)
4390     if not isinstance(vg_free, int):
4391       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4392                                  " result was '%s'" % (node, vg_free),
4393                                  errors.ECODE_ENVIRON)
4394     if requested > vg_free:
4395       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4396                                  " required %d MiB, available %d MiB" %
4397                                  (node, requested, vg_free),
4398                                  errors.ECODE_NORES)
4399
4400
4401 class LUStartupInstance(LogicalUnit):
4402   """Starts an instance.
4403
4404   """
4405   HPATH = "instance-start"
4406   HTYPE = constants.HTYPE_INSTANCE
4407   _OP_PARAMS = [
4408     _PInstanceName,
4409     _PForce,
4410     ("hvparams", _EmptyDict, _TDict),
4411     ("beparams", _EmptyDict, _TDict),
4412     ]
4413   REQ_BGL = False
4414
4415   def CheckArguments(self):
4416     # extra beparams
4417     if self.op.beparams:
4418       # fill the beparams dict
4419       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4420
4421   def ExpandNames(self):
4422     self._ExpandAndLockInstance()
4423
4424   def BuildHooksEnv(self):
4425     """Build hooks env.
4426
4427     This runs on master, primary and secondary nodes of the instance.
4428
4429     """
4430     env = {
4431       "FORCE": self.op.force,
4432       }
4433     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4434     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4435     return env, nl, nl
4436
4437   def CheckPrereq(self):
4438     """Check prerequisites.
4439
4440     This checks that the instance is in the cluster.
4441
4442     """
4443     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4444     assert self.instance is not None, \
4445       "Cannot retrieve locked instance %s" % self.op.instance_name
4446
4447     # extra hvparams
4448     if self.op.hvparams:
4449       # check hypervisor parameter syntax (locally)
4450       cluster = self.cfg.GetClusterInfo()
4451       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4452       filled_hvp = cluster.FillHV(instance)
4453       filled_hvp.update(self.op.hvparams)
4454       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4455       hv_type.CheckParameterSyntax(filled_hvp)
4456       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4457
4458     _CheckNodeOnline(self, instance.primary_node)
4459
4460     bep = self.cfg.GetClusterInfo().FillBE(instance)
4461     # check bridges existence
4462     _CheckInstanceBridgesExist(self, instance)
4463
4464     remote_info = self.rpc.call_instance_info(instance.primary_node,
4465                                               instance.name,
4466                                               instance.hypervisor)
4467     remote_info.Raise("Error checking node %s" % instance.primary_node,
4468                       prereq=True, ecode=errors.ECODE_ENVIRON)
4469     if not remote_info.payload: # not running already
4470       _CheckNodeFreeMemory(self, instance.primary_node,
4471                            "starting instance %s" % instance.name,
4472                            bep[constants.BE_MEMORY], instance.hypervisor)
4473
4474   def Exec(self, feedback_fn):
4475     """Start the instance.
4476
4477     """
4478     instance = self.instance
4479     force = self.op.force
4480
4481     self.cfg.MarkInstanceUp(instance.name)
4482
4483     node_current = instance.primary_node
4484
4485     _StartInstanceDisks(self, instance, force)
4486
4487     result = self.rpc.call_instance_start(node_current, instance,
4488                                           self.op.hvparams, self.op.beparams)
4489     msg = result.fail_msg
4490     if msg:
4491       _ShutdownInstanceDisks(self, instance)
4492       raise errors.OpExecError("Could not start instance: %s" % msg)
4493
4494
4495 class LURebootInstance(LogicalUnit):
4496   """Reboot an instance.
4497
4498   """
4499   HPATH = "instance-reboot"
4500   HTYPE = constants.HTYPE_INSTANCE
4501   _OP_PARAMS = [
4502     _PInstanceName,
4503     ("ignore_secondaries", False, _TBool),
4504     ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4505     _PShutdownTimeout,
4506     ]
4507   REQ_BGL = False
4508
4509   def ExpandNames(self):
4510     self._ExpandAndLockInstance()
4511
4512   def BuildHooksEnv(self):
4513     """Build hooks env.
4514
4515     This runs on master, primary and secondary nodes of the instance.
4516
4517     """
4518     env = {
4519       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4520       "REBOOT_TYPE": self.op.reboot_type,
4521       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4522       }
4523     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4524     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4525     return env, nl, nl
4526
4527   def CheckPrereq(self):
4528     """Check prerequisites.
4529
4530     This checks that the instance is in the cluster.
4531
4532     """
4533     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4534     assert self.instance is not None, \
4535       "Cannot retrieve locked instance %s" % self.op.instance_name
4536
4537     _CheckNodeOnline(self, instance.primary_node)
4538
4539     # check bridges existence
4540     _CheckInstanceBridgesExist(self, instance)
4541
4542   def Exec(self, feedback_fn):
4543     """Reboot the instance.
4544
4545     """
4546     instance = self.instance
4547     ignore_secondaries = self.op.ignore_secondaries
4548     reboot_type = self.op.reboot_type
4549
4550     node_current = instance.primary_node
4551
4552     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4553                        constants.INSTANCE_REBOOT_HARD]:
4554       for disk in instance.disks:
4555         self.cfg.SetDiskID(disk, node_current)
4556       result = self.rpc.call_instance_reboot(node_current, instance,
4557                                              reboot_type,
4558                                              self.op.shutdown_timeout)
4559       result.Raise("Could not reboot instance")
4560     else:
4561       result = self.rpc.call_instance_shutdown(node_current, instance,
4562                                                self.op.shutdown_timeout)
4563       result.Raise("Could not shutdown instance for full reboot")
4564       _ShutdownInstanceDisks(self, instance)
4565       _StartInstanceDisks(self, instance, ignore_secondaries)
4566       result = self.rpc.call_instance_start(node_current, instance, None, None)
4567       msg = result.fail_msg
4568       if msg:
4569         _ShutdownInstanceDisks(self, instance)
4570         raise errors.OpExecError("Could not start instance for"
4571                                  " full reboot: %s" % msg)
4572
4573     self.cfg.MarkInstanceUp(instance.name)
4574
4575
4576 class LUShutdownInstance(LogicalUnit):
4577   """Shutdown an instance.
4578
4579   """
4580   HPATH = "instance-stop"
4581   HTYPE = constants.HTYPE_INSTANCE
4582   _OP_PARAMS = [
4583     _PInstanceName,
4584     ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4585     ]
4586   REQ_BGL = False
4587
4588   def ExpandNames(self):
4589     self._ExpandAndLockInstance()
4590
4591   def BuildHooksEnv(self):
4592     """Build hooks env.
4593
4594     This runs on master, primary and secondary nodes of the instance.
4595
4596     """
4597     env = _BuildInstanceHookEnvByObject(self, self.instance)
4598     env["TIMEOUT"] = self.op.timeout
4599     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4600     return env, nl, nl
4601
4602   def CheckPrereq(self):
4603     """Check prerequisites.
4604
4605     This checks that the instance is in the cluster.
4606
4607     """
4608     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4609     assert self.instance is not None, \
4610       "Cannot retrieve locked instance %s" % self.op.instance_name
4611     _CheckNodeOnline(self, self.instance.primary_node)
4612
4613   def Exec(self, feedback_fn):
4614     """Shutdown the instance.
4615
4616     """
4617     instance = self.instance
4618     node_current = instance.primary_node
4619     timeout = self.op.timeout
4620     self.cfg.MarkInstanceDown(instance.name)
4621     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4622     msg = result.fail_msg
4623     if msg:
4624       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4625
4626     _ShutdownInstanceDisks(self, instance)
4627
4628
4629 class LUReinstallInstance(LogicalUnit):
4630   """Reinstall an instance.
4631
4632   """
4633   HPATH = "instance-reinstall"
4634   HTYPE = constants.HTYPE_INSTANCE
4635   _OP_PARAMS = [
4636     _PInstanceName,
4637     ("os_type", None, _TMaybeString),
4638     ("force_variant", False, _TBool),
4639     ]
4640   REQ_BGL = False
4641
4642   def ExpandNames(self):
4643     self._ExpandAndLockInstance()
4644
4645   def BuildHooksEnv(self):
4646     """Build hooks env.
4647
4648     This runs on master, primary and secondary nodes of the instance.
4649
4650     """
4651     env = _BuildInstanceHookEnvByObject(self, self.instance)
4652     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4653     return env, nl, nl
4654
4655   def CheckPrereq(self):
4656     """Check prerequisites.
4657
4658     This checks that the instance is in the cluster and is not running.
4659
4660     """
4661     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4662     assert instance is not None, \
4663       "Cannot retrieve locked instance %s" % self.op.instance_name
4664     _CheckNodeOnline(self, instance.primary_node)
4665
4666     if instance.disk_template == constants.DT_DISKLESS:
4667       raise errors.OpPrereqError("Instance '%s' has no disks" %
4668                                  self.op.instance_name,
4669                                  errors.ECODE_INVAL)
4670     _CheckInstanceDown(self, instance, "cannot reinstall")
4671
4672     if self.op.os_type is not None:
4673       # OS verification
4674       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4675       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4676
4677     self.instance = instance
4678
4679   def Exec(self, feedback_fn):
4680     """Reinstall the instance.
4681
4682     """
4683     inst = self.instance
4684
4685     if self.op.os_type is not None:
4686       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4687       inst.os = self.op.os_type
4688       self.cfg.Update(inst, feedback_fn)
4689
4690     _StartInstanceDisks(self, inst, None)
4691     try:
4692       feedback_fn("Running the instance OS create scripts...")
4693       # FIXME: pass debug option from opcode to backend
4694       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4695                                              self.op.debug_level)
4696       result.Raise("Could not install OS for instance %s on node %s" %
4697                    (inst.name, inst.primary_node))
4698     finally:
4699       _ShutdownInstanceDisks(self, inst)
4700
4701
4702 class LURecreateInstanceDisks(LogicalUnit):
4703   """Recreate an instance's missing disks.
4704
4705   """
4706   HPATH = "instance-recreate-disks"
4707   HTYPE = constants.HTYPE_INSTANCE
4708   _OP_PARAMS = [
4709     _PInstanceName,
4710     ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4711     ]
4712   REQ_BGL = False
4713
4714   def ExpandNames(self):
4715     self._ExpandAndLockInstance()
4716
4717   def BuildHooksEnv(self):
4718     """Build hooks env.
4719
4720     This runs on master, primary and secondary nodes of the instance.
4721
4722     """
4723     env = _BuildInstanceHookEnvByObject(self, self.instance)
4724     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4725     return env, nl, nl
4726
4727   def CheckPrereq(self):
4728     """Check prerequisites.
4729
4730     This checks that the instance is in the cluster and is not running.
4731
4732     """
4733     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4734     assert instance is not None, \
4735       "Cannot retrieve locked instance %s" % self.op.instance_name
4736     _CheckNodeOnline(self, instance.primary_node)
4737
4738     if instance.disk_template == constants.DT_DISKLESS:
4739       raise errors.OpPrereqError("Instance '%s' has no disks" %
4740                                  self.op.instance_name, errors.ECODE_INVAL)
4741     _CheckInstanceDown(self, instance, "cannot recreate disks")
4742
4743     if not self.op.disks:
4744       self.op.disks = range(len(instance.disks))
4745     else:
4746       for idx in self.op.disks:
4747         if idx >= len(instance.disks):
4748           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4749                                      errors.ECODE_INVAL)
4750
4751     self.instance = instance
4752
4753   def Exec(self, feedback_fn):
4754     """Recreate the disks.
4755
4756     """
4757     to_skip = []
4758     for idx, _ in enumerate(self.instance.disks):
4759       if idx not in self.op.disks: # disk idx has not been passed in
4760         to_skip.append(idx)
4761         continue
4762
4763     _CreateDisks(self, self.instance, to_skip=to_skip)
4764
4765
4766 class LURenameInstance(LogicalUnit):
4767   """Rename an instance.
4768
4769   """
4770   HPATH = "instance-rename"
4771   HTYPE = constants.HTYPE_INSTANCE
4772   _OP_PARAMS = [
4773     _PInstanceName,
4774     ("new_name", _NoDefault, _TNonEmptyString),
4775     ("ignore_ip", False, _TBool),
4776     ("check_name", True, _TBool),
4777     ]
4778
4779   def BuildHooksEnv(self):
4780     """Build hooks env.
4781
4782     This runs on master, primary and secondary nodes of the instance.
4783
4784     """
4785     env = _BuildInstanceHookEnvByObject(self, self.instance)
4786     env["INSTANCE_NEW_NAME"] = self.op.new_name
4787     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4788     return env, nl, nl
4789
4790   def CheckPrereq(self):
4791     """Check prerequisites.
4792
4793     This checks that the instance is in the cluster and is not running.
4794
4795     """
4796     self.op.instance_name = _ExpandInstanceName(self.cfg,
4797                                                 self.op.instance_name)
4798     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4799     assert instance is not None
4800     _CheckNodeOnline(self, instance.primary_node)
4801     _CheckInstanceDown(self, instance, "cannot rename")
4802     self.instance = instance
4803
4804     # new name verification
4805     if self.op.check_name:
4806       name_info = utils.GetHostInfo(self.op.new_name)
4807       self.op.new_name = name_info.name
4808
4809     new_name = self.op.new_name
4810
4811     instance_list = self.cfg.GetInstanceList()
4812     if new_name in instance_list:
4813       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4814                                  new_name, errors.ECODE_EXISTS)
4815
4816     if not self.op.ignore_ip:
4817       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4818         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4819                                    (name_info.ip, new_name),
4820                                    errors.ECODE_NOTUNIQUE)
4821
4822   def Exec(self, feedback_fn):
4823     """Reinstall the instance.
4824
4825     """
4826     inst = self.instance
4827     old_name = inst.name
4828
4829     if inst.disk_template == constants.DT_FILE:
4830       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4831
4832     self.cfg.RenameInstance(inst.name, self.op.new_name)
4833     # Change the instance lock. This is definitely safe while we hold the BGL
4834     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4835     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4836
4837     # re-read the instance from the configuration after rename
4838     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4839
4840     if inst.disk_template == constants.DT_FILE:
4841       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4842       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4843                                                      old_file_storage_dir,
4844                                                      new_file_storage_dir)
4845       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4846                    " (but the instance has been renamed in Ganeti)" %
4847                    (inst.primary_node, old_file_storage_dir,
4848                     new_file_storage_dir))
4849
4850     _StartInstanceDisks(self, inst, None)
4851     try:
4852       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4853                                                  old_name, self.op.debug_level)
4854       msg = result.fail_msg
4855       if msg:
4856         msg = ("Could not run OS rename script for instance %s on node %s"
4857                " (but the instance has been renamed in Ganeti): %s" %
4858                (inst.name, inst.primary_node, msg))
4859         self.proc.LogWarning(msg)
4860     finally:
4861       _ShutdownInstanceDisks(self, inst)
4862
4863
4864 class LURemoveInstance(LogicalUnit):
4865   """Remove an instance.
4866
4867   """
4868   HPATH = "instance-remove"
4869   HTYPE = constants.HTYPE_INSTANCE
4870   _OP_PARAMS = [
4871     _PInstanceName,
4872     ("ignore_failures", False, _TBool),
4873     _PShutdownTimeout,
4874     ]
4875   REQ_BGL = False
4876
4877   def ExpandNames(self):
4878     self._ExpandAndLockInstance()
4879     self.needed_locks[locking.LEVEL_NODE] = []
4880     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4881
4882   def DeclareLocks(self, level):
4883     if level == locking.LEVEL_NODE:
4884       self._LockInstancesNodes()
4885
4886   def BuildHooksEnv(self):
4887     """Build hooks env.
4888
4889     This runs on master, primary and secondary nodes of the instance.
4890
4891     """
4892     env = _BuildInstanceHookEnvByObject(self, self.instance)
4893     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4894     nl = [self.cfg.GetMasterNode()]
4895     nl_post = list(self.instance.all_nodes) + nl
4896     return env, nl, nl_post
4897
4898   def CheckPrereq(self):
4899     """Check prerequisites.
4900
4901     This checks that the instance is in the cluster.
4902
4903     """
4904     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4905     assert self.instance is not None, \
4906       "Cannot retrieve locked instance %s" % self.op.instance_name
4907
4908   def Exec(self, feedback_fn):
4909     """Remove the instance.
4910
4911     """
4912     instance = self.instance
4913     logging.info("Shutting down instance %s on node %s",
4914                  instance.name, instance.primary_node)
4915
4916     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4917                                              self.op.shutdown_timeout)
4918     msg = result.fail_msg
4919     if msg:
4920       if self.op.ignore_failures:
4921         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4922       else:
4923         raise errors.OpExecError("Could not shutdown instance %s on"
4924                                  " node %s: %s" %
4925                                  (instance.name, instance.primary_node, msg))
4926
4927     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4928
4929
4930 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4931   """Utility function to remove an instance.
4932
4933   """
4934   logging.info("Removing block devices for instance %s", instance.name)
4935
4936   if not _RemoveDisks(lu, instance):
4937     if not ignore_failures:
4938       raise errors.OpExecError("Can't remove instance's disks")
4939     feedback_fn("Warning: can't remove instance's disks")
4940
4941   logging.info("Removing instance %s out of cluster config", instance.name)
4942
4943   lu.cfg.RemoveInstance(instance.name)
4944
4945   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4946     "Instance lock removal conflict"
4947
4948   # Remove lock for the instance
4949   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4950
4951
4952 class LUQueryInstances(NoHooksLU):
4953   """Logical unit for querying instances.
4954
4955   """
4956   # pylint: disable-msg=W0142
4957   _OP_PARAMS = [
4958     ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
4959     ("names", _EmptyList, _TListOf(_TNonEmptyString)),
4960     ("use_locking", False, _TBool),
4961     ]
4962   REQ_BGL = False
4963   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4964                     "serial_no", "ctime", "mtime", "uuid"]
4965   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4966                                     "admin_state",
4967                                     "disk_template", "ip", "mac", "bridge",
4968                                     "nic_mode", "nic_link",
4969                                     "sda_size", "sdb_size", "vcpus", "tags",
4970                                     "network_port", "beparams",
4971                                     r"(disk)\.(size)/([0-9]+)",
4972                                     r"(disk)\.(sizes)", "disk_usage",
4973                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4974                                     r"(nic)\.(bridge)/([0-9]+)",
4975                                     r"(nic)\.(macs|ips|modes|links|bridges)",
4976                                     r"(disk|nic)\.(count)",
4977                                     "hvparams",
4978                                     ] + _SIMPLE_FIELDS +
4979                                   ["hv/%s" % name
4980                                    for name in constants.HVS_PARAMETERS
4981                                    if name not in constants.HVC_GLOBALS] +
4982                                   ["be/%s" % name
4983                                    for name in constants.BES_PARAMETERS])
4984   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4985
4986
4987   def CheckArguments(self):
4988     _CheckOutputFields(static=self._FIELDS_STATIC,
4989                        dynamic=self._FIELDS_DYNAMIC,
4990                        selected=self.op.output_fields)
4991
4992   def ExpandNames(self):
4993     self.needed_locks = {}
4994     self.share_locks[locking.LEVEL_INSTANCE] = 1
4995     self.share_locks[locking.LEVEL_NODE] = 1
4996
4997     if self.op.names:
4998       self.wanted = _GetWantedInstances(self, self.op.names)
4999     else:
5000       self.wanted = locking.ALL_SET
5001
5002     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5003     self.do_locking = self.do_node_query and self.op.use_locking
5004     if self.do_locking:
5005       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5006       self.needed_locks[locking.LEVEL_NODE] = []
5007       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5008
5009   def DeclareLocks(self, level):
5010     if level == locking.LEVEL_NODE and self.do_locking:
5011       self._LockInstancesNodes()
5012
5013   def Exec(self, feedback_fn):
5014     """Computes the list of nodes and their attributes.
5015
5016     """
5017     # pylint: disable-msg=R0912
5018     # way too many branches here
5019     all_info = self.cfg.GetAllInstancesInfo()
5020     if self.wanted == locking.ALL_SET:
5021       # caller didn't specify instance names, so ordering is not important
5022       if self.do_locking:
5023         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5024       else:
5025         instance_names = all_info.keys()
5026       instance_names = utils.NiceSort(instance_names)
5027     else:
5028       # caller did specify names, so we must keep the ordering
5029       if self.do_locking:
5030         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5031       else:
5032         tgt_set = all_info.keys()
5033       missing = set(self.wanted).difference(tgt_set)
5034       if missing:
5035         raise errors.OpExecError("Some instances were removed before"
5036                                  " retrieving their data: %s" % missing)
5037       instance_names = self.wanted
5038
5039     instance_list = [all_info[iname] for iname in instance_names]
5040
5041     # begin data gathering
5042
5043     nodes = frozenset([inst.primary_node for inst in instance_list])
5044     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5045
5046     bad_nodes = []
5047     off_nodes = []
5048     if self.do_node_query:
5049       live_data = {}
5050       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5051       for name in nodes:
5052         result = node_data[name]
5053         if result.offline:
5054           # offline nodes will be in both lists
5055           off_nodes.append(name)
5056         if result.fail_msg:
5057           bad_nodes.append(name)
5058         else:
5059           if result.payload:
5060             live_data.update(result.payload)
5061           # else no instance is alive
5062     else:
5063       live_data = dict([(name, {}) for name in instance_names])
5064
5065     # end data gathering
5066
5067     HVPREFIX = "hv/"
5068     BEPREFIX = "be/"
5069     output = []
5070     cluster = self.cfg.GetClusterInfo()
5071     for instance in instance_list:
5072       iout = []
5073       i_hv = cluster.FillHV(instance, skip_globals=True)
5074       i_be = cluster.FillBE(instance)
5075       i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5076       for field in self.op.output_fields:
5077         st_match = self._FIELDS_STATIC.Matches(field)
5078         if field in self._SIMPLE_FIELDS:
5079           val = getattr(instance, field)
5080         elif field == "pnode":
5081           val = instance.primary_node
5082         elif field == "snodes":
5083           val = list(instance.secondary_nodes)
5084         elif field == "admin_state":
5085           val = instance.admin_up
5086         elif field == "oper_state":
5087           if instance.primary_node in bad_nodes:
5088             val = None
5089           else:
5090             val = bool(live_data.get(instance.name))
5091         elif field == "status":
5092           if instance.primary_node in off_nodes:
5093             val = "ERROR_nodeoffline"
5094           elif instance.primary_node in bad_nodes:
5095             val = "ERROR_nodedown"
5096           else:
5097             running = bool(live_data.get(instance.name))
5098             if running:
5099               if instance.admin_up:
5100                 val = "running"
5101               else:
5102                 val = "ERROR_up"
5103             else:
5104               if instance.admin_up:
5105                 val = "ERROR_down"
5106               else:
5107                 val = "ADMIN_down"
5108         elif field == "oper_ram":
5109           if instance.primary_node in bad_nodes:
5110             val = None
5111           elif instance.name in live_data:
5112             val = live_data[instance.name].get("memory", "?")
5113           else:
5114             val = "-"
5115         elif field == "vcpus":
5116           val = i_be[constants.BE_VCPUS]
5117         elif field == "disk_template":
5118           val = instance.disk_template
5119         elif field == "ip":
5120           if instance.nics:
5121             val = instance.nics[0].ip
5122           else:
5123             val = None
5124         elif field == "nic_mode":
5125           if instance.nics:
5126             val = i_nicp[0][constants.NIC_MODE]
5127           else:
5128             val = None
5129         elif field == "nic_link":
5130           if instance.nics:
5131             val = i_nicp[0][constants.NIC_LINK]
5132           else:
5133             val = None
5134         elif field == "bridge":
5135           if (instance.nics and
5136               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5137             val = i_nicp[0][constants.NIC_LINK]
5138           else:
5139             val = None
5140         elif field == "mac":
5141           if instance.nics:
5142             val = instance.nics[0].mac
5143           else:
5144             val = None
5145         elif field == "sda_size" or field == "sdb_size":
5146           idx = ord(field[2]) - ord('a')
5147           try:
5148             val = instance.FindDisk(idx).size
5149           except errors.OpPrereqError:
5150             val = None
5151         elif field == "disk_usage": # total disk usage per node
5152           disk_sizes = [{'size': disk.size} for disk in instance.disks]
5153           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5154         elif field == "tags":
5155           val = list(instance.GetTags())
5156         elif field == "hvparams":
5157           val = i_hv
5158         elif (field.startswith(HVPREFIX) and
5159               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5160               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5161           val = i_hv.get(field[len(HVPREFIX):], None)
5162         elif field == "beparams":
5163           val = i_be
5164         elif (field.startswith(BEPREFIX) and
5165               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5166           val = i_be.get(field[len(BEPREFIX):], None)
5167         elif st_match and st_match.groups():
5168           # matches a variable list
5169           st_groups = st_match.groups()
5170           if st_groups and st_groups[0] == "disk":
5171             if st_groups[1] == "count":
5172               val = len(instance.disks)
5173             elif st_groups[1] == "sizes":
5174               val = [disk.size for disk in instance.disks]
5175             elif st_groups[1] == "size":
5176               try:
5177                 val = instance.FindDisk(st_groups[2]).size
5178               except errors.OpPrereqError:
5179                 val = None
5180             else:
5181               assert False, "Unhandled disk parameter"
5182           elif st_groups[0] == "nic":
5183             if st_groups[1] == "count":
5184               val = len(instance.nics)
5185             elif st_groups[1] == "macs":
5186               val = [nic.mac for nic in instance.nics]
5187             elif st_groups[1] == "ips":
5188               val = [nic.ip for nic in instance.nics]
5189             elif st_groups[1] == "modes":
5190               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5191             elif st_groups[1] == "links":
5192               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5193             elif st_groups[1] == "bridges":
5194               val = []
5195               for nicp in i_nicp:
5196                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5197                   val.append(nicp[constants.NIC_LINK])
5198                 else:
5199                   val.append(None)
5200             else:
5201               # index-based item
5202               nic_idx = int(st_groups[2])
5203               if nic_idx >= len(instance.nics):
5204                 val = None
5205               else:
5206                 if st_groups[1] == "mac":
5207                   val = instance.nics[nic_idx].mac
5208                 elif st_groups[1] == "ip":
5209                   val = instance.nics[nic_idx].ip
5210                 elif st_groups[1] == "mode":
5211                   val = i_nicp[nic_idx][constants.NIC_MODE]
5212                 elif st_groups[1] == "link":
5213                   val = i_nicp[nic_idx][constants.NIC_LINK]
5214                 elif st_groups[1] == "bridge":
5215                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5216                   if nic_mode == constants.NIC_MODE_BRIDGED:
5217                     val = i_nicp[nic_idx][constants.NIC_LINK]
5218                   else:
5219                     val = None
5220                 else:
5221                   assert False, "Unhandled NIC parameter"
5222           else:
5223             assert False, ("Declared but unhandled variable parameter '%s'" %
5224                            field)
5225         else:
5226           assert False, "Declared but unhandled parameter '%s'" % field
5227         iout.append(val)
5228       output.append(iout)
5229
5230     return output
5231
5232
5233 class LUFailoverInstance(LogicalUnit):
5234   """Failover an instance.
5235
5236   """
5237   HPATH = "instance-failover"
5238   HTYPE = constants.HTYPE_INSTANCE
5239   _OP_PARAMS = [
5240     _PInstanceName,
5241     ("ignore_consistency", False, _TBool),
5242     _PShutdownTimeout,
5243     ]
5244   REQ_BGL = False
5245
5246   def ExpandNames(self):
5247     self._ExpandAndLockInstance()
5248     self.needed_locks[locking.LEVEL_NODE] = []
5249     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5250
5251   def DeclareLocks(self, level):
5252     if level == locking.LEVEL_NODE:
5253       self._LockInstancesNodes()
5254
5255   def BuildHooksEnv(self):
5256     """Build hooks env.
5257
5258     This runs on master, primary and secondary nodes of the instance.
5259
5260     """
5261     instance = self.instance
5262     source_node = instance.primary_node
5263     target_node = instance.secondary_nodes[0]
5264     env = {
5265       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5266       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5267       "OLD_PRIMARY": source_node,
5268       "OLD_SECONDARY": target_node,
5269       "NEW_PRIMARY": target_node,
5270       "NEW_SECONDARY": source_node,
5271       }
5272     env.update(_BuildInstanceHookEnvByObject(self, instance))
5273     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5274     nl_post = list(nl)
5275     nl_post.append(source_node)
5276     return env, nl, nl_post
5277
5278   def CheckPrereq(self):
5279     """Check prerequisites.
5280
5281     This checks that the instance is in the cluster.
5282
5283     """
5284     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5285     assert self.instance is not None, \
5286       "Cannot retrieve locked instance %s" % self.op.instance_name
5287
5288     bep = self.cfg.GetClusterInfo().FillBE(instance)
5289     if instance.disk_template not in constants.DTS_NET_MIRROR:
5290       raise errors.OpPrereqError("Instance's disk layout is not"
5291                                  " network mirrored, cannot failover.",
5292                                  errors.ECODE_STATE)
5293
5294     secondary_nodes = instance.secondary_nodes
5295     if not secondary_nodes:
5296       raise errors.ProgrammerError("no secondary node but using "
5297                                    "a mirrored disk template")
5298
5299     target_node = secondary_nodes[0]
5300     _CheckNodeOnline(self, target_node)
5301     _CheckNodeNotDrained(self, target_node)
5302     if instance.admin_up:
5303       # check memory requirements on the secondary node
5304       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5305                            instance.name, bep[constants.BE_MEMORY],
5306                            instance.hypervisor)
5307     else:
5308       self.LogInfo("Not checking memory on the secondary node as"
5309                    " instance will not be started")
5310
5311     # check bridge existance
5312     _CheckInstanceBridgesExist(self, instance, node=target_node)
5313
5314   def Exec(self, feedback_fn):
5315     """Failover an instance.
5316
5317     The failover is done by shutting it down on its present node and
5318     starting it on the secondary.
5319
5320     """
5321     instance = self.instance
5322
5323     source_node = instance.primary_node
5324     target_node = instance.secondary_nodes[0]
5325
5326     if instance.admin_up:
5327       feedback_fn("* checking disk consistency between source and target")
5328       for dev in instance.disks:
5329         # for drbd, these are drbd over lvm
5330         if not _CheckDiskConsistency(self, dev, target_node, False):
5331           if not self.op.ignore_consistency:
5332             raise errors.OpExecError("Disk %s is degraded on target node,"
5333                                      " aborting failover." % dev.iv_name)
5334     else:
5335       feedback_fn("* not checking disk consistency as instance is not running")
5336
5337     feedback_fn("* shutting down instance on source node")
5338     logging.info("Shutting down instance %s on node %s",
5339                  instance.name, source_node)
5340
5341     result = self.rpc.call_instance_shutdown(source_node, instance,
5342                                              self.op.shutdown_timeout)
5343     msg = result.fail_msg
5344     if msg:
5345       if self.op.ignore_consistency:
5346         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5347                              " Proceeding anyway. Please make sure node"
5348                              " %s is down. Error details: %s",
5349                              instance.name, source_node, source_node, msg)
5350       else:
5351         raise errors.OpExecError("Could not shutdown instance %s on"
5352                                  " node %s: %s" %
5353                                  (instance.name, source_node, msg))
5354
5355     feedback_fn("* deactivating the instance's disks on source node")
5356     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5357       raise errors.OpExecError("Can't shut down the instance's disks.")
5358
5359     instance.primary_node = target_node
5360     # distribute new instance config to the other nodes
5361     self.cfg.Update(instance, feedback_fn)
5362
5363     # Only start the instance if it's marked as up
5364     if instance.admin_up:
5365       feedback_fn("* activating the instance's disks on target node")
5366       logging.info("Starting instance %s on node %s",
5367                    instance.name, target_node)
5368
5369       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5370                                            ignore_secondaries=True)
5371       if not disks_ok:
5372         _ShutdownInstanceDisks(self, instance)
5373         raise errors.OpExecError("Can't activate the instance's disks")
5374
5375       feedback_fn("* starting the instance on the target node")
5376       result = self.rpc.call_instance_start(target_node, instance, None, None)
5377       msg = result.fail_msg
5378       if msg:
5379         _ShutdownInstanceDisks(self, instance)
5380         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5381                                  (instance.name, target_node, msg))
5382
5383
5384 class LUMigrateInstance(LogicalUnit):
5385   """Migrate an instance.
5386
5387   This is migration without shutting down, compared to the failover,
5388   which is done with shutdown.
5389
5390   """
5391   HPATH = "instance-migrate"
5392   HTYPE = constants.HTYPE_INSTANCE
5393   _OP_PARAMS = [
5394     _PInstanceName,
5395     ("live", True, _TBool),
5396     ("cleanup", False, _TBool),
5397     ]
5398
5399   REQ_BGL = False
5400
5401   def ExpandNames(self):
5402     self._ExpandAndLockInstance()
5403
5404     self.needed_locks[locking.LEVEL_NODE] = []
5405     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5406
5407     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5408                                        self.op.live, self.op.cleanup)
5409     self.tasklets = [self._migrater]
5410
5411   def DeclareLocks(self, level):
5412     if level == locking.LEVEL_NODE:
5413       self._LockInstancesNodes()
5414
5415   def BuildHooksEnv(self):
5416     """Build hooks env.
5417
5418     This runs on master, primary and secondary nodes of the instance.
5419
5420     """
5421     instance = self._migrater.instance
5422     source_node = instance.primary_node
5423     target_node = instance.secondary_nodes[0]
5424     env = _BuildInstanceHookEnvByObject(self, instance)
5425     env["MIGRATE_LIVE"] = self.op.live
5426     env["MIGRATE_CLEANUP"] = self.op.cleanup
5427     env.update({
5428         "OLD_PRIMARY": source_node,
5429         "OLD_SECONDARY": target_node,
5430         "NEW_PRIMARY": target_node,
5431         "NEW_SECONDARY": source_node,
5432         })
5433     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5434     nl_post = list(nl)
5435     nl_post.append(source_node)
5436     return env, nl, nl_post
5437
5438
5439 class LUMoveInstance(LogicalUnit):
5440   """Move an instance by data-copying.
5441
5442   """
5443   HPATH = "instance-move"
5444   HTYPE = constants.HTYPE_INSTANCE
5445   _OP_PARAMS = [
5446     _PInstanceName,
5447     ("target_node", _NoDefault, _TNonEmptyString),
5448     _PShutdownTimeout,
5449     ]
5450   REQ_BGL = False
5451
5452   def ExpandNames(self):
5453     self._ExpandAndLockInstance()
5454     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5455     self.op.target_node = target_node
5456     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5457     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5458
5459   def DeclareLocks(self, level):
5460     if level == locking.LEVEL_NODE:
5461       self._LockInstancesNodes(primary_only=True)
5462
5463   def BuildHooksEnv(self):
5464     """Build hooks env.
5465
5466     This runs on master, primary and secondary nodes of the instance.
5467
5468     """
5469     env = {
5470       "TARGET_NODE": self.op.target_node,
5471       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5472       }
5473     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5474     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5475                                        self.op.target_node]
5476     return env, nl, nl
5477
5478   def CheckPrereq(self):
5479     """Check prerequisites.
5480
5481     This checks that the instance is in the cluster.
5482
5483     """
5484     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5485     assert self.instance is not None, \
5486       "Cannot retrieve locked instance %s" % self.op.instance_name
5487
5488     node = self.cfg.GetNodeInfo(self.op.target_node)
5489     assert node is not None, \
5490       "Cannot retrieve locked node %s" % self.op.target_node
5491
5492     self.target_node = target_node = node.name
5493
5494     if target_node == instance.primary_node:
5495       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5496                                  (instance.name, target_node),
5497                                  errors.ECODE_STATE)
5498
5499     bep = self.cfg.GetClusterInfo().FillBE(instance)
5500
5501     for idx, dsk in enumerate(instance.disks):
5502       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5503         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5504                                    " cannot copy" % idx, errors.ECODE_STATE)
5505
5506     _CheckNodeOnline(self, target_node)
5507     _CheckNodeNotDrained(self, target_node)
5508
5509     if instance.admin_up:
5510       # check memory requirements on the secondary node
5511       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5512                            instance.name, bep[constants.BE_MEMORY],
5513                            instance.hypervisor)
5514     else:
5515       self.LogInfo("Not checking memory on the secondary node as"
5516                    " instance will not be started")
5517
5518     # check bridge existance
5519     _CheckInstanceBridgesExist(self, instance, node=target_node)
5520
5521   def Exec(self, feedback_fn):
5522     """Move an instance.
5523
5524     The move is done by shutting it down on its present node, copying
5525     the data over (slow) and starting it on the new node.
5526
5527     """
5528     instance = self.instance
5529
5530     source_node = instance.primary_node
5531     target_node = self.target_node
5532
5533     self.LogInfo("Shutting down instance %s on source node %s",
5534                  instance.name, source_node)
5535
5536     result = self.rpc.call_instance_shutdown(source_node, instance,
5537                                              self.op.shutdown_timeout)
5538     msg = result.fail_msg
5539     if msg:
5540       if self.op.ignore_consistency:
5541         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5542                              " Proceeding anyway. Please make sure node"
5543                              " %s is down. Error details: %s",
5544                              instance.name, source_node, source_node, msg)
5545       else:
5546         raise errors.OpExecError("Could not shutdown instance %s on"
5547                                  " node %s: %s" %
5548                                  (instance.name, source_node, msg))
5549
5550     # create the target disks
5551     try:
5552       _CreateDisks(self, instance, target_node=target_node)
5553     except errors.OpExecError:
5554       self.LogWarning("Device creation failed, reverting...")
5555       try:
5556         _RemoveDisks(self, instance, target_node=target_node)
5557       finally:
5558         self.cfg.ReleaseDRBDMinors(instance.name)
5559         raise
5560
5561     cluster_name = self.cfg.GetClusterInfo().cluster_name
5562
5563     errs = []
5564     # activate, get path, copy the data over
5565     for idx, disk in enumerate(instance.disks):
5566       self.LogInfo("Copying data for disk %d", idx)
5567       result = self.rpc.call_blockdev_assemble(target_node, disk,
5568                                                instance.name, True)
5569       if result.fail_msg:
5570         self.LogWarning("Can't assemble newly created disk %d: %s",
5571                         idx, result.fail_msg)
5572         errs.append(result.fail_msg)
5573         break
5574       dev_path = result.payload
5575       result = self.rpc.call_blockdev_export(source_node, disk,
5576                                              target_node, dev_path,
5577                                              cluster_name)
5578       if result.fail_msg:
5579         self.LogWarning("Can't copy data over for disk %d: %s",
5580                         idx, result.fail_msg)
5581         errs.append(result.fail_msg)
5582         break
5583
5584     if errs:
5585       self.LogWarning("Some disks failed to copy, aborting")
5586       try:
5587         _RemoveDisks(self, instance, target_node=target_node)
5588       finally:
5589         self.cfg.ReleaseDRBDMinors(instance.name)
5590         raise errors.OpExecError("Errors during disk copy: %s" %
5591                                  (",".join(errs),))
5592
5593     instance.primary_node = target_node
5594     self.cfg.Update(instance, feedback_fn)
5595
5596     self.LogInfo("Removing the disks on the original node")
5597     _RemoveDisks(self, instance, target_node=source_node)
5598
5599     # Only start the instance if it's marked as up
5600     if instance.admin_up:
5601       self.LogInfo("Starting instance %s on node %s",
5602                    instance.name, target_node)
5603
5604       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5605                                            ignore_secondaries=True)
5606       if not disks_ok:
5607         _ShutdownInstanceDisks(self, instance)
5608         raise errors.OpExecError("Can't activate the instance's disks")
5609
5610       result = self.rpc.call_instance_start(target_node, instance, None, None)
5611       msg = result.fail_msg
5612       if msg:
5613         _ShutdownInstanceDisks(self, instance)
5614         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5615                                  (instance.name, target_node, msg))
5616
5617
5618 class LUMigrateNode(LogicalUnit):
5619   """Migrate all instances from a node.
5620
5621   """
5622   HPATH = "node-migrate"
5623   HTYPE = constants.HTYPE_NODE
5624   _OP_PARAMS = [
5625     _PNodeName,
5626     ("live", False, _TBool),
5627     ]
5628   REQ_BGL = False
5629
5630   def ExpandNames(self):
5631     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5632
5633     self.needed_locks = {
5634       locking.LEVEL_NODE: [self.op.node_name],
5635       }
5636
5637     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5638
5639     # Create tasklets for migrating instances for all instances on this node
5640     names = []
5641     tasklets = []
5642
5643     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5644       logging.debug("Migrating instance %s", inst.name)
5645       names.append(inst.name)
5646
5647       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5648
5649     self.tasklets = tasklets
5650
5651     # Declare instance locks
5652     self.needed_locks[locking.LEVEL_INSTANCE] = names
5653
5654   def DeclareLocks(self, level):
5655     if level == locking.LEVEL_NODE:
5656       self._LockInstancesNodes()
5657
5658   def BuildHooksEnv(self):
5659     """Build hooks env.
5660
5661     This runs on the master, the primary and all the secondaries.
5662
5663     """
5664     env = {
5665       "NODE_NAME": self.op.node_name,
5666       }
5667
5668     nl = [self.cfg.GetMasterNode()]
5669
5670     return (env, nl, nl)
5671
5672
5673 class TLMigrateInstance(Tasklet):
5674   def __init__(self, lu, instance_name, live, cleanup):
5675     """Initializes this class.
5676
5677     """
5678     Tasklet.__init__(self, lu)
5679
5680     # Parameters
5681     self.instance_name = instance_name
5682     self.live = live
5683     self.cleanup = cleanup
5684
5685   def CheckPrereq(self):
5686     """Check prerequisites.
5687
5688     This checks that the instance is in the cluster.
5689
5690     """
5691     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5692     instance = self.cfg.GetInstanceInfo(instance_name)
5693     assert instance is not None
5694
5695     if instance.disk_template != constants.DT_DRBD8:
5696       raise errors.OpPrereqError("Instance's disk layout is not"
5697                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5698
5699     secondary_nodes = instance.secondary_nodes
5700     if not secondary_nodes:
5701       raise errors.ConfigurationError("No secondary node but using"
5702                                       " drbd8 disk template")
5703
5704     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5705
5706     target_node = secondary_nodes[0]
5707     # check memory requirements on the secondary node
5708     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5709                          instance.name, i_be[constants.BE_MEMORY],
5710                          instance.hypervisor)
5711
5712     # check bridge existance
5713     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5714
5715     if not self.cleanup:
5716       _CheckNodeNotDrained(self.lu, target_node)
5717       result = self.rpc.call_instance_migratable(instance.primary_node,
5718                                                  instance)
5719       result.Raise("Can't migrate, please use failover",
5720                    prereq=True, ecode=errors.ECODE_STATE)
5721
5722     self.instance = instance
5723
5724   def _WaitUntilSync(self):
5725     """Poll with custom rpc for disk sync.
5726
5727     This uses our own step-based rpc call.
5728
5729     """
5730     self.feedback_fn("* wait until resync is done")
5731     all_done = False
5732     while not all_done:
5733       all_done = True
5734       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5735                                             self.nodes_ip,
5736                                             self.instance.disks)
5737       min_percent = 100
5738       for node, nres in result.items():
5739         nres.Raise("Cannot resync disks on node %s" % node)
5740         node_done, node_percent = nres.payload
5741         all_done = all_done and node_done
5742         if node_percent is not None:
5743           min_percent = min(min_percent, node_percent)
5744       if not all_done:
5745         if min_percent < 100:
5746           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5747         time.sleep(2)
5748
5749   def _EnsureSecondary(self, node):
5750     """Demote a node to secondary.
5751
5752     """
5753     self.feedback_fn("* switching node %s to secondary mode" % node)
5754
5755     for dev in self.instance.disks:
5756       self.cfg.SetDiskID(dev, node)
5757
5758     result = self.rpc.call_blockdev_close(node, self.instance.name,
5759                                           self.instance.disks)
5760     result.Raise("Cannot change disk to secondary on node %s" % node)
5761
5762   def _GoStandalone(self):
5763     """Disconnect from the network.
5764
5765     """
5766     self.feedback_fn("* changing into standalone mode")
5767     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5768                                                self.instance.disks)
5769     for node, nres in result.items():
5770       nres.Raise("Cannot disconnect disks node %s" % node)
5771
5772   def _GoReconnect(self, multimaster):
5773     """Reconnect to the network.
5774
5775     """
5776     if multimaster:
5777       msg = "dual-master"
5778     else:
5779       msg = "single-master"
5780     self.feedback_fn("* changing disks into %s mode" % msg)
5781     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5782                                            self.instance.disks,
5783                                            self.instance.name, multimaster)
5784     for node, nres in result.items():
5785       nres.Raise("Cannot change disks config on node %s" % node)
5786
5787   def _ExecCleanup(self):
5788     """Try to cleanup after a failed migration.
5789
5790     The cleanup is done by:
5791       - check that the instance is running only on one node
5792         (and update the config if needed)
5793       - change disks on its secondary node to secondary
5794       - wait until disks are fully synchronized
5795       - disconnect from the network
5796       - change disks into single-master mode
5797       - wait again until disks are fully synchronized
5798
5799     """
5800     instance = self.instance
5801     target_node = self.target_node
5802     source_node = self.source_node
5803
5804     # check running on only one node
5805     self.feedback_fn("* checking where the instance actually runs"
5806                      " (if this hangs, the hypervisor might be in"
5807                      " a bad state)")
5808     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5809     for node, result in ins_l.items():
5810       result.Raise("Can't contact node %s" % node)
5811
5812     runningon_source = instance.name in ins_l[source_node].payload
5813     runningon_target = instance.name in ins_l[target_node].payload
5814
5815     if runningon_source and runningon_target:
5816       raise errors.OpExecError("Instance seems to be running on two nodes,"
5817                                " or the hypervisor is confused. You will have"
5818                                " to ensure manually that it runs only on one"
5819                                " and restart this operation.")
5820
5821     if not (runningon_source or runningon_target):
5822       raise errors.OpExecError("Instance does not seem to be running at all."
5823                                " In this case, it's safer to repair by"
5824                                " running 'gnt-instance stop' to ensure disk"
5825                                " shutdown, and then restarting it.")
5826
5827     if runningon_target:
5828       # the migration has actually succeeded, we need to update the config
5829       self.feedback_fn("* instance running on secondary node (%s),"
5830                        " updating config" % target_node)
5831       instance.primary_node = target_node
5832       self.cfg.Update(instance, self.feedback_fn)
5833       demoted_node = source_node
5834     else:
5835       self.feedback_fn("* instance confirmed to be running on its"
5836                        " primary node (%s)" % source_node)
5837       demoted_node = target_node
5838
5839     self._EnsureSecondary(demoted_node)
5840     try:
5841       self._WaitUntilSync()
5842     except errors.OpExecError:
5843       # we ignore here errors, since if the device is standalone, it
5844       # won't be able to sync
5845       pass
5846     self._GoStandalone()
5847     self._GoReconnect(False)
5848     self._WaitUntilSync()
5849
5850     self.feedback_fn("* done")
5851
5852   def _RevertDiskStatus(self):
5853     """Try to revert the disk status after a failed migration.
5854
5855     """
5856     target_node = self.target_node
5857     try:
5858       self._EnsureSecondary(target_node)
5859       self._GoStandalone()
5860       self._GoReconnect(False)
5861       self._WaitUntilSync()
5862     except errors.OpExecError, err:
5863       self.lu.LogWarning("Migration failed and I can't reconnect the"
5864                          " drives: error '%s'\n"
5865                          "Please look and recover the instance status" %
5866                          str(err))
5867
5868   def _AbortMigration(self):
5869     """Call the hypervisor code to abort a started migration.
5870
5871     """
5872     instance = self.instance
5873     target_node = self.target_node
5874     migration_info = self.migration_info
5875
5876     abort_result = self.rpc.call_finalize_migration(target_node,
5877                                                     instance,
5878                                                     migration_info,
5879                                                     False)
5880     abort_msg = abort_result.fail_msg
5881     if abort_msg:
5882       logging.error("Aborting migration failed on target node %s: %s",
5883                     target_node, abort_msg)
5884       # Don't raise an exception here, as we stil have to try to revert the
5885       # disk status, even if this step failed.
5886
5887   def _ExecMigration(self):
5888     """Migrate an instance.
5889
5890     The migrate is done by:
5891       - change the disks into dual-master mode
5892       - wait until disks are fully synchronized again
5893       - migrate the instance
5894       - change disks on the new secondary node (the old primary) to secondary
5895       - wait until disks are fully synchronized
5896       - change disks into single-master mode
5897
5898     """
5899     instance = self.instance
5900     target_node = self.target_node
5901     source_node = self.source_node
5902
5903     self.feedback_fn("* checking disk consistency between source and target")
5904     for dev in instance.disks:
5905       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5906         raise errors.OpExecError("Disk %s is degraded or not fully"
5907                                  " synchronized on target node,"
5908                                  " aborting migrate." % dev.iv_name)
5909
5910     # First get the migration information from the remote node
5911     result = self.rpc.call_migration_info(source_node, instance)
5912     msg = result.fail_msg
5913     if msg:
5914       log_err = ("Failed fetching source migration information from %s: %s" %
5915                  (source_node, msg))
5916       logging.error(log_err)
5917       raise errors.OpExecError(log_err)
5918
5919     self.migration_info = migration_info = result.payload
5920
5921     # Then switch the disks to master/master mode
5922     self._EnsureSecondary(target_node)
5923     self._GoStandalone()
5924     self._GoReconnect(True)
5925     self._WaitUntilSync()
5926
5927     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5928     result = self.rpc.call_accept_instance(target_node,
5929                                            instance,
5930                                            migration_info,
5931                                            self.nodes_ip[target_node])
5932
5933     msg = result.fail_msg
5934     if msg:
5935       logging.error("Instance pre-migration failed, trying to revert"
5936                     " disk status: %s", msg)
5937       self.feedback_fn("Pre-migration failed, aborting")
5938       self._AbortMigration()
5939       self._RevertDiskStatus()
5940       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5941                                (instance.name, msg))
5942
5943     self.feedback_fn("* migrating instance to %s" % target_node)
5944     time.sleep(10)
5945     result = self.rpc.call_instance_migrate(source_node, instance,
5946                                             self.nodes_ip[target_node],
5947                                             self.live)
5948     msg = result.fail_msg
5949     if msg:
5950       logging.error("Instance migration failed, trying to revert"
5951                     " disk status: %s", msg)
5952       self.feedback_fn("Migration failed, aborting")
5953       self._AbortMigration()
5954       self._RevertDiskStatus()
5955       raise errors.OpExecError("Could not migrate instance %s: %s" %
5956                                (instance.name, msg))
5957     time.sleep(10)
5958
5959     instance.primary_node = target_node
5960     # distribute new instance config to the other nodes
5961     self.cfg.Update(instance, self.feedback_fn)
5962
5963     result = self.rpc.call_finalize_migration(target_node,
5964                                               instance,
5965                                               migration_info,
5966                                               True)
5967     msg = result.fail_msg
5968     if msg:
5969       logging.error("Instance migration succeeded, but finalization failed:"
5970                     " %s", msg)
5971       raise errors.OpExecError("Could not finalize instance migration: %s" %
5972                                msg)
5973
5974     self._EnsureSecondary(source_node)
5975     self._WaitUntilSync()
5976     self._GoStandalone()
5977     self._GoReconnect(False)
5978     self._WaitUntilSync()
5979
5980     self.feedback_fn("* done")
5981
5982   def Exec(self, feedback_fn):
5983     """Perform the migration.
5984
5985     """
5986     feedback_fn("Migrating instance %s" % self.instance.name)
5987
5988     self.feedback_fn = feedback_fn
5989
5990     self.source_node = self.instance.primary_node
5991     self.target_node = self.instance.secondary_nodes[0]
5992     self.all_nodes = [self.source_node, self.target_node]
5993     self.nodes_ip = {
5994       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5995       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5996       }
5997
5998     if self.cleanup:
5999       return self._ExecCleanup()
6000     else:
6001       return self._ExecMigration()
6002
6003
6004 def _CreateBlockDev(lu, node, instance, device, force_create,
6005                     info, force_open):
6006   """Create a tree of block devices on a given node.
6007
6008   If this device type has to be created on secondaries, create it and
6009   all its children.
6010
6011   If not, just recurse to children keeping the same 'force' value.
6012
6013   @param lu: the lu on whose behalf we execute
6014   @param node: the node on which to create the device
6015   @type instance: L{objects.Instance}
6016   @param instance: the instance which owns the device
6017   @type device: L{objects.Disk}
6018   @param device: the device to create
6019   @type force_create: boolean
6020   @param force_create: whether to force creation of this device; this
6021       will be change to True whenever we find a device which has
6022       CreateOnSecondary() attribute
6023   @param info: the extra 'metadata' we should attach to the device
6024       (this will be represented as a LVM tag)
6025   @type force_open: boolean
6026   @param force_open: this parameter will be passes to the
6027       L{backend.BlockdevCreate} function where it specifies
6028       whether we run on primary or not, and it affects both
6029       the child assembly and the device own Open() execution
6030
6031   """
6032   if device.CreateOnSecondary():
6033     force_create = True
6034
6035   if device.children:
6036     for child in device.children:
6037       _CreateBlockDev(lu, node, instance, child, force_create,
6038                       info, force_open)
6039
6040   if not force_create:
6041     return
6042
6043   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6044
6045
6046 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6047   """Create a single block device on a given node.
6048
6049   This will not recurse over children of the device, so they must be
6050   created in advance.
6051
6052   @param lu: the lu on whose behalf we execute
6053   @param node: the node on which to create the device
6054   @type instance: L{objects.Instance}
6055   @param instance: the instance which owns the device
6056   @type device: L{objects.Disk}
6057   @param device: the device to create
6058   @param info: the extra 'metadata' we should attach to the device
6059       (this will be represented as a LVM tag)
6060   @type force_open: boolean
6061   @param force_open: this parameter will be passes to the
6062       L{backend.BlockdevCreate} function where it specifies
6063       whether we run on primary or not, and it affects both
6064       the child assembly and the device own Open() execution
6065
6066   """
6067   lu.cfg.SetDiskID(device, node)
6068   result = lu.rpc.call_blockdev_create(node, device, device.size,
6069                                        instance.name, force_open, info)
6070   result.Raise("Can't create block device %s on"
6071                " node %s for instance %s" % (device, node, instance.name))
6072   if device.physical_id is None:
6073     device.physical_id = result.payload
6074
6075
6076 def _GenerateUniqueNames(lu, exts):
6077   """Generate a suitable LV name.
6078
6079   This will generate a logical volume name for the given instance.
6080
6081   """
6082   results = []
6083   for val in exts:
6084     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6085     results.append("%s%s" % (new_id, val))
6086   return results
6087
6088
6089 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6090                          p_minor, s_minor):
6091   """Generate a drbd8 device complete with its children.
6092
6093   """
6094   port = lu.cfg.AllocatePort()
6095   vgname = lu.cfg.GetVGName()
6096   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6097   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6098                           logical_id=(vgname, names[0]))
6099   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6100                           logical_id=(vgname, names[1]))
6101   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6102                           logical_id=(primary, secondary, port,
6103                                       p_minor, s_minor,
6104                                       shared_secret),
6105                           children=[dev_data, dev_meta],
6106                           iv_name=iv_name)
6107   return drbd_dev
6108
6109
6110 def _GenerateDiskTemplate(lu, template_name,
6111                           instance_name, primary_node,
6112                           secondary_nodes, disk_info,
6113                           file_storage_dir, file_driver,
6114                           base_index):
6115   """Generate the entire disk layout for a given template type.
6116
6117   """
6118   #TODO: compute space requirements
6119
6120   vgname = lu.cfg.GetVGName()
6121   disk_count = len(disk_info)
6122   disks = []
6123   if template_name == constants.DT_DISKLESS:
6124     pass
6125   elif template_name == constants.DT_PLAIN:
6126     if len(secondary_nodes) != 0:
6127       raise errors.ProgrammerError("Wrong template configuration")
6128
6129     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6130                                       for i in range(disk_count)])
6131     for idx, disk in enumerate(disk_info):
6132       disk_index = idx + base_index
6133       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6134                               logical_id=(vgname, names[idx]),
6135                               iv_name="disk/%d" % disk_index,
6136                               mode=disk["mode"])
6137       disks.append(disk_dev)
6138   elif template_name == constants.DT_DRBD8:
6139     if len(secondary_nodes) != 1:
6140       raise errors.ProgrammerError("Wrong template configuration")
6141     remote_node = secondary_nodes[0]
6142     minors = lu.cfg.AllocateDRBDMinor(
6143       [primary_node, remote_node] * len(disk_info), instance_name)
6144
6145     names = []
6146     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6147                                                for i in range(disk_count)]):
6148       names.append(lv_prefix + "_data")
6149       names.append(lv_prefix + "_meta")
6150     for idx, disk in enumerate(disk_info):
6151       disk_index = idx + base_index
6152       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6153                                       disk["size"], names[idx*2:idx*2+2],
6154                                       "disk/%d" % disk_index,
6155                                       minors[idx*2], minors[idx*2+1])
6156       disk_dev.mode = disk["mode"]
6157       disks.append(disk_dev)
6158   elif template_name == constants.DT_FILE:
6159     if len(secondary_nodes) != 0:
6160       raise errors.ProgrammerError("Wrong template configuration")
6161
6162     _RequireFileStorage()
6163
6164     for idx, disk in enumerate(disk_info):
6165       disk_index = idx + base_index
6166       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6167                               iv_name="disk/%d" % disk_index,
6168                               logical_id=(file_driver,
6169                                           "%s/disk%d" % (file_storage_dir,
6170                                                          disk_index)),
6171                               mode=disk["mode"])
6172       disks.append(disk_dev)
6173   else:
6174     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6175   return disks
6176
6177
6178 def _GetInstanceInfoText(instance):
6179   """Compute that text that should be added to the disk's metadata.
6180
6181   """
6182   return "originstname+%s" % instance.name
6183
6184
6185 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6186   """Create all disks for an instance.
6187
6188   This abstracts away some work from AddInstance.
6189
6190   @type lu: L{LogicalUnit}
6191   @param lu: the logical unit on whose behalf we execute
6192   @type instance: L{objects.Instance}
6193   @param instance: the instance whose disks we should create
6194   @type to_skip: list
6195   @param to_skip: list of indices to skip
6196   @type target_node: string
6197   @param target_node: if passed, overrides the target node for creation
6198   @rtype: boolean
6199   @return: the success of the creation
6200
6201   """
6202   info = _GetInstanceInfoText(instance)
6203   if target_node is None:
6204     pnode = instance.primary_node
6205     all_nodes = instance.all_nodes
6206   else:
6207     pnode = target_node
6208     all_nodes = [pnode]
6209
6210   if instance.disk_template == constants.DT_FILE:
6211     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6212     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6213
6214     result.Raise("Failed to create directory '%s' on"
6215                  " node %s" % (file_storage_dir, pnode))
6216
6217   # Note: this needs to be kept in sync with adding of disks in
6218   # LUSetInstanceParams
6219   for idx, device in enumerate(instance.disks):
6220     if to_skip and idx in to_skip:
6221       continue
6222     logging.info("Creating volume %s for instance %s",
6223                  device.iv_name, instance.name)
6224     #HARDCODE
6225     for node in all_nodes:
6226       f_create = node == pnode
6227       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6228
6229
6230 def _RemoveDisks(lu, instance, target_node=None):
6231   """Remove all disks for an instance.
6232
6233   This abstracts away some work from `AddInstance()` and
6234   `RemoveInstance()`. Note that in case some of the devices couldn't
6235   be removed, the removal will continue with the other ones (compare
6236   with `_CreateDisks()`).
6237
6238   @type lu: L{LogicalUnit}
6239   @param lu: the logical unit on whose behalf we execute
6240   @type instance: L{objects.Instance}
6241   @param instance: the instance whose disks we should remove
6242   @type target_node: string
6243   @param target_node: used to override the node on which to remove the disks
6244   @rtype: boolean
6245   @return: the success of the removal
6246
6247   """
6248   logging.info("Removing block devices for instance %s", instance.name)
6249
6250   all_result = True
6251   for device in instance.disks:
6252     if target_node:
6253       edata = [(target_node, device)]
6254     else:
6255       edata = device.ComputeNodeTree(instance.primary_node)
6256     for node, disk in edata:
6257       lu.cfg.SetDiskID(disk, node)
6258       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6259       if msg:
6260         lu.LogWarning("Could not remove block device %s on node %s,"
6261                       " continuing anyway: %s", device.iv_name, node, msg)
6262         all_result = False
6263
6264   if instance.disk_template == constants.DT_FILE:
6265     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6266     if target_node:
6267       tgt = target_node
6268     else:
6269       tgt = instance.primary_node
6270     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6271     if result.fail_msg:
6272       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6273                     file_storage_dir, instance.primary_node, result.fail_msg)
6274       all_result = False
6275
6276   return all_result
6277
6278
6279 def _ComputeDiskSize(disk_template, disks):
6280   """Compute disk size requirements in the volume group
6281
6282   """
6283   # Required free disk space as a function of disk and swap space
6284   req_size_dict = {
6285     constants.DT_DISKLESS: None,
6286     constants.DT_PLAIN: sum(d["size"] for d in disks),
6287     # 128 MB are added for drbd metadata for each disk
6288     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6289     constants.DT_FILE: None,
6290   }
6291
6292   if disk_template not in req_size_dict:
6293     raise errors.ProgrammerError("Disk template '%s' size requirement"
6294                                  " is unknown" %  disk_template)
6295
6296   return req_size_dict[disk_template]
6297
6298
6299 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6300   """Hypervisor parameter validation.
6301
6302   This function abstract the hypervisor parameter validation to be
6303   used in both instance create and instance modify.
6304
6305   @type lu: L{LogicalUnit}
6306   @param lu: the logical unit for which we check
6307   @type nodenames: list
6308   @param nodenames: the list of nodes on which we should check
6309   @type hvname: string
6310   @param hvname: the name of the hypervisor we should use
6311   @type hvparams: dict
6312   @param hvparams: the parameters which we need to check
6313   @raise errors.OpPrereqError: if the parameters are not valid
6314
6315   """
6316   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6317                                                   hvname,
6318                                                   hvparams)
6319   for node in nodenames:
6320     info = hvinfo[node]
6321     if info.offline:
6322       continue
6323     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6324
6325
6326 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6327   """OS parameters validation.
6328
6329   @type lu: L{LogicalUnit}
6330   @param lu: the logical unit for which we check
6331   @type required: boolean
6332   @param required: whether the validation should fail if the OS is not
6333       found
6334   @type nodenames: list
6335   @param nodenames: the list of nodes on which we should check
6336   @type osname: string
6337   @param osname: the name of the hypervisor we should use
6338   @type osparams: dict
6339   @param osparams: the parameters which we need to check
6340   @raise errors.OpPrereqError: if the parameters are not valid
6341
6342   """
6343   result = lu.rpc.call_os_validate(required, nodenames, osname,
6344                                    [constants.OS_VALIDATE_PARAMETERS],
6345                                    osparams)
6346   for node, nres in result.items():
6347     # we don't check for offline cases since this should be run only
6348     # against the master node and/or an instance's nodes
6349     nres.Raise("OS Parameters validation failed on node %s" % node)
6350     if not nres.payload:
6351       lu.LogInfo("OS %s not found on node %s, validation skipped",
6352                  osname, node)
6353
6354
6355 class LUCreateInstance(LogicalUnit):
6356   """Create an instance.
6357
6358   """
6359   HPATH = "instance-add"
6360   HTYPE = constants.HTYPE_INSTANCE
6361   _OP_PARAMS = [
6362     _PInstanceName,
6363     ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6364     ("start", True, _TBool),
6365     ("wait_for_sync", True, _TBool),
6366     ("ip_check", True, _TBool),
6367     ("name_check", True, _TBool),
6368     ("disks", _NoDefault, _TListOf(_TDict)),
6369     ("nics", _NoDefault, _TListOf(_TDict)),
6370     ("hvparams", _EmptyDict, _TDict),
6371     ("beparams", _EmptyDict, _TDict),
6372     ("osparams", _EmptyDict, _TDict),
6373     ("no_install", None, _TMaybeBool),
6374     ("os_type", None, _TMaybeString),
6375     ("force_variant", False, _TBool),
6376     ("source_handshake", None, _TOr(_TList, _TNone)),
6377     ("source_x509_ca", None, _TOr(_TList, _TNone)),
6378     ("source_instance_name", None, _TMaybeString),
6379     ("src_node", None, _TMaybeString),
6380     ("src_path", None, _TMaybeString),
6381     ("pnode", None, _TMaybeString),
6382     ("snode", None, _TMaybeString),
6383     ("iallocator", None, _TMaybeString),
6384     ("hypervisor", None, _TMaybeString),
6385     ("disk_template", _NoDefault, _CheckDiskTemplate),
6386     ("identify_defaults", False, _TBool),
6387     ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6388     ("file_storage_dir", None, _TMaybeString),
6389     ("dry_run", False, _TBool),
6390     ]
6391   REQ_BGL = False
6392
6393   def CheckArguments(self):
6394     """Check arguments.
6395
6396     """
6397     # do not require name_check to ease forward/backward compatibility
6398     # for tools
6399     if self.op.no_install and self.op.start:
6400       self.LogInfo("No-installation mode selected, disabling startup")
6401       self.op.start = False
6402     # validate/normalize the instance name
6403     self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6404     if self.op.ip_check and not self.op.name_check:
6405       # TODO: make the ip check more flexible and not depend on the name check
6406       raise errors.OpPrereqError("Cannot do ip checks without a name check",
6407                                  errors.ECODE_INVAL)
6408
6409     # check nics' parameter names
6410     for nic in self.op.nics:
6411       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6412
6413     # check disks. parameter names and consistent adopt/no-adopt strategy
6414     has_adopt = has_no_adopt = False
6415     for disk in self.op.disks:
6416       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6417       if "adopt" in disk:
6418         has_adopt = True
6419       else:
6420         has_no_adopt = True
6421     if has_adopt and has_no_adopt:
6422       raise errors.OpPrereqError("Either all disks are adopted or none is",
6423                                  errors.ECODE_INVAL)
6424     if has_adopt:
6425       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6426         raise errors.OpPrereqError("Disk adoption is not supported for the"
6427                                    " '%s' disk template" %
6428                                    self.op.disk_template,
6429                                    errors.ECODE_INVAL)
6430       if self.op.iallocator is not None:
6431         raise errors.OpPrereqError("Disk adoption not allowed with an"
6432                                    " iallocator script", errors.ECODE_INVAL)
6433       if self.op.mode == constants.INSTANCE_IMPORT:
6434         raise errors.OpPrereqError("Disk adoption not allowed for"
6435                                    " instance import", errors.ECODE_INVAL)
6436
6437     self.adopt_disks = has_adopt
6438
6439     # instance name verification
6440     if self.op.name_check:
6441       self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6442       self.op.instance_name = self.hostname1.name
6443       # used in CheckPrereq for ip ping check
6444       self.check_ip = self.hostname1.ip
6445     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6446       raise errors.OpPrereqError("Remote imports require names to be checked" %
6447                                  errors.ECODE_INVAL)
6448     else:
6449       self.check_ip = None
6450
6451     # file storage checks
6452     if (self.op.file_driver and
6453         not self.op.file_driver in constants.FILE_DRIVER):
6454       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6455                                  self.op.file_driver, errors.ECODE_INVAL)
6456
6457     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6458       raise errors.OpPrereqError("File storage directory path not absolute",
6459                                  errors.ECODE_INVAL)
6460
6461     ### Node/iallocator related checks
6462     if [self.op.iallocator, self.op.pnode].count(None) != 1:
6463       raise errors.OpPrereqError("One and only one of iallocator and primary"
6464                                  " node must be given",
6465                                  errors.ECODE_INVAL)
6466
6467     self._cds = _GetClusterDomainSecret()
6468
6469     if self.op.mode == constants.INSTANCE_IMPORT:
6470       # On import force_variant must be True, because if we forced it at
6471       # initial install, our only chance when importing it back is that it
6472       # works again!
6473       self.op.force_variant = True
6474
6475       if self.op.no_install:
6476         self.LogInfo("No-installation mode has no effect during import")
6477
6478     elif self.op.mode == constants.INSTANCE_CREATE:
6479       if self.op.os_type is None:
6480         raise errors.OpPrereqError("No guest OS specified",
6481                                    errors.ECODE_INVAL)
6482       if self.op.disk_template is None:
6483         raise errors.OpPrereqError("No disk template specified",
6484                                    errors.ECODE_INVAL)
6485
6486     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6487       # Check handshake to ensure both clusters have the same domain secret
6488       src_handshake = self.op.source_handshake
6489       if not src_handshake:
6490         raise errors.OpPrereqError("Missing source handshake",
6491                                    errors.ECODE_INVAL)
6492
6493       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6494                                                            src_handshake)
6495       if errmsg:
6496         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6497                                    errors.ECODE_INVAL)
6498
6499       # Load and check source CA
6500       self.source_x509_ca_pem = self.op.source_x509_ca
6501       if not self.source_x509_ca_pem:
6502         raise errors.OpPrereqError("Missing source X509 CA",
6503                                    errors.ECODE_INVAL)
6504
6505       try:
6506         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6507                                                     self._cds)
6508       except OpenSSL.crypto.Error, err:
6509         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6510                                    (err, ), errors.ECODE_INVAL)
6511
6512       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6513       if errcode is not None:
6514         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6515                                    errors.ECODE_INVAL)
6516
6517       self.source_x509_ca = cert
6518
6519       src_instance_name = self.op.source_instance_name
6520       if not src_instance_name:
6521         raise errors.OpPrereqError("Missing source instance name",
6522                                    errors.ECODE_INVAL)
6523
6524       self.source_instance_name = \
6525         utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6526
6527     else:
6528       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6529                                  self.op.mode, errors.ECODE_INVAL)
6530
6531   def ExpandNames(self):
6532     """ExpandNames for CreateInstance.
6533
6534     Figure out the right locks for instance creation.
6535
6536     """
6537     self.needed_locks = {}
6538
6539     instance_name = self.op.instance_name
6540     # this is just a preventive check, but someone might still add this
6541     # instance in the meantime, and creation will fail at lock-add time
6542     if instance_name in self.cfg.GetInstanceList():
6543       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6544                                  instance_name, errors.ECODE_EXISTS)
6545
6546     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6547
6548     if self.op.iallocator:
6549       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6550     else:
6551       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6552       nodelist = [self.op.pnode]
6553       if self.op.snode is not None:
6554         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6555         nodelist.append(self.op.snode)
6556       self.needed_locks[locking.LEVEL_NODE] = nodelist
6557
6558     # in case of import lock the source node too
6559     if self.op.mode == constants.INSTANCE_IMPORT:
6560       src_node = self.op.src_node
6561       src_path = self.op.src_path
6562
6563       if src_path is None:
6564         self.op.src_path = src_path = self.op.instance_name
6565
6566       if src_node is None:
6567         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6568         self.op.src_node = None
6569         if os.path.isabs(src_path):
6570           raise errors.OpPrereqError("Importing an instance from an absolute"
6571                                      " path requires a source node option.",
6572                                      errors.ECODE_INVAL)
6573       else:
6574         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6575         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6576           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6577         if not os.path.isabs(src_path):
6578           self.op.src_path = src_path = \
6579             utils.PathJoin(constants.EXPORT_DIR, src_path)
6580
6581   def _RunAllocator(self):
6582     """Run the allocator based on input opcode.
6583
6584     """
6585     nics = [n.ToDict() for n in self.nics]
6586     ial = IAllocator(self.cfg, self.rpc,
6587                      mode=constants.IALLOCATOR_MODE_ALLOC,
6588                      name=self.op.instance_name,
6589                      disk_template=self.op.disk_template,
6590                      tags=[],
6591                      os=self.op.os_type,
6592                      vcpus=self.be_full[constants.BE_VCPUS],
6593                      mem_size=self.be_full[constants.BE_MEMORY],
6594                      disks=self.disks,
6595                      nics=nics,
6596                      hypervisor=self.op.hypervisor,
6597                      )
6598
6599     ial.Run(self.op.iallocator)
6600
6601     if not ial.success:
6602       raise errors.OpPrereqError("Can't compute nodes using"
6603                                  " iallocator '%s': %s" %
6604                                  (self.op.iallocator, ial.info),
6605                                  errors.ECODE_NORES)
6606     if len(ial.result) != ial.required_nodes:
6607       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6608                                  " of nodes (%s), required %s" %
6609                                  (self.op.iallocator, len(ial.result),
6610                                   ial.required_nodes), errors.ECODE_FAULT)
6611     self.op.pnode = ial.result[0]
6612     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6613                  self.op.instance_name, self.op.iallocator,
6614                  utils.CommaJoin(ial.result))
6615     if ial.required_nodes == 2:
6616       self.op.snode = ial.result[1]
6617
6618   def BuildHooksEnv(self):
6619     """Build hooks env.
6620
6621     This runs on master, primary and secondary nodes of the instance.
6622
6623     """
6624     env = {
6625       "ADD_MODE": self.op.mode,
6626       }
6627     if self.op.mode == constants.INSTANCE_IMPORT:
6628       env["SRC_NODE"] = self.op.src_node
6629       env["SRC_PATH"] = self.op.src_path
6630       env["SRC_IMAGES"] = self.src_images
6631
6632     env.update(_BuildInstanceHookEnv(
6633       name=self.op.instance_name,
6634       primary_node=self.op.pnode,
6635       secondary_nodes=self.secondaries,
6636       status=self.op.start,
6637       os_type=self.op.os_type,
6638       memory=self.be_full[constants.BE_MEMORY],
6639       vcpus=self.be_full[constants.BE_VCPUS],
6640       nics=_NICListToTuple(self, self.nics),
6641       disk_template=self.op.disk_template,
6642       disks=[(d["size"], d["mode"]) for d in self.disks],
6643       bep=self.be_full,
6644       hvp=self.hv_full,
6645       hypervisor_name=self.op.hypervisor,
6646     ))
6647
6648     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6649           self.secondaries)
6650     return env, nl, nl
6651
6652   def _ReadExportInfo(self):
6653     """Reads the export information from disk.
6654
6655     It will override the opcode source node and path with the actual
6656     information, if these two were not specified before.
6657
6658     @return: the export information
6659
6660     """
6661     assert self.op.mode == constants.INSTANCE_IMPORT
6662
6663     src_node = self.op.src_node
6664     src_path = self.op.src_path
6665
6666     if src_node is None:
6667       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6668       exp_list = self.rpc.call_export_list(locked_nodes)
6669       found = False
6670       for node in exp_list:
6671         if exp_list[node].fail_msg:
6672           continue
6673         if src_path in exp_list[node].payload:
6674           found = True
6675           self.op.src_node = src_node = node
6676           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6677                                                        src_path)
6678           break
6679       if not found:
6680         raise errors.OpPrereqError("No export found for relative path %s" %
6681                                     src_path, errors.ECODE_INVAL)
6682
6683     _CheckNodeOnline(self, src_node)
6684     result = self.rpc.call_export_info(src_node, src_path)
6685     result.Raise("No export or invalid export found in dir %s" % src_path)
6686
6687     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6688     if not export_info.has_section(constants.INISECT_EXP):
6689       raise errors.ProgrammerError("Corrupted export config",
6690                                    errors.ECODE_ENVIRON)
6691
6692     ei_version = export_info.get(constants.INISECT_EXP, "version")
6693     if (int(ei_version) != constants.EXPORT_VERSION):
6694       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6695                                  (ei_version, constants.EXPORT_VERSION),
6696                                  errors.ECODE_ENVIRON)
6697     return export_info
6698
6699   def _ReadExportParams(self, einfo):
6700     """Use export parameters as defaults.
6701
6702     In case the opcode doesn't specify (as in override) some instance
6703     parameters, then try to use them from the export information, if
6704     that declares them.
6705
6706     """
6707     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6708
6709     if self.op.disk_template is None:
6710       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6711         self.op.disk_template = einfo.get(constants.INISECT_INS,
6712                                           "disk_template")
6713       else:
6714         raise errors.OpPrereqError("No disk template specified and the export"
6715                                    " is missing the disk_template information",
6716                                    errors.ECODE_INVAL)
6717
6718     if not self.op.disks:
6719       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6720         disks = []
6721         # TODO: import the disk iv_name too
6722         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6723           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6724           disks.append({"size": disk_sz})
6725         self.op.disks = disks
6726       else:
6727         raise errors.OpPrereqError("No disk info specified and the export"
6728                                    " is missing the disk information",
6729                                    errors.ECODE_INVAL)
6730
6731     if (not self.op.nics and
6732         einfo.has_option(constants.INISECT_INS, "nic_count")):
6733       nics = []
6734       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6735         ndict = {}
6736         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6737           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6738           ndict[name] = v
6739         nics.append(ndict)
6740       self.op.nics = nics
6741
6742     if (self.op.hypervisor is None and
6743         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6744       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6745     if einfo.has_section(constants.INISECT_HYP):
6746       # use the export parameters but do not override the ones
6747       # specified by the user
6748       for name, value in einfo.items(constants.INISECT_HYP):
6749         if name not in self.op.hvparams:
6750           self.op.hvparams[name] = value
6751
6752     if einfo.has_section(constants.INISECT_BEP):
6753       # use the parameters, without overriding
6754       for name, value in einfo.items(constants.INISECT_BEP):
6755         if name not in self.op.beparams:
6756           self.op.beparams[name] = value
6757     else:
6758       # try to read the parameters old style, from the main section
6759       for name in constants.BES_PARAMETERS:
6760         if (name not in self.op.beparams and
6761             einfo.has_option(constants.INISECT_INS, name)):
6762           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6763
6764     if einfo.has_section(constants.INISECT_OSP):
6765       # use the parameters, without overriding
6766       for name, value in einfo.items(constants.INISECT_OSP):
6767         if name not in self.op.osparams:
6768           self.op.osparams[name] = value
6769
6770   def _RevertToDefaults(self, cluster):
6771     """Revert the instance parameters to the default values.
6772
6773     """
6774     # hvparams
6775     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6776     for name in self.op.hvparams.keys():
6777       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6778         del self.op.hvparams[name]
6779     # beparams
6780     be_defs = cluster.SimpleFillBE({})
6781     for name in self.op.beparams.keys():
6782       if name in be_defs and be_defs[name] == self.op.beparams[name]:
6783         del self.op.beparams[name]
6784     # nic params
6785     nic_defs = cluster.SimpleFillNIC({})
6786     for nic in self.op.nics:
6787       for name in constants.NICS_PARAMETERS:
6788         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6789           del nic[name]
6790     # osparams
6791     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6792     for name in self.op.osparams.keys():
6793       if name in os_defs and os_defs[name] == self.op.osparams[name]:
6794         del self.op.osparams[name]
6795
6796   def CheckPrereq(self):
6797     """Check prerequisites.
6798
6799     """
6800     if self.op.mode == constants.INSTANCE_IMPORT:
6801       export_info = self._ReadExportInfo()
6802       self._ReadExportParams(export_info)
6803
6804     _CheckDiskTemplate(self.op.disk_template)
6805
6806     if (not self.cfg.GetVGName() and
6807         self.op.disk_template not in constants.DTS_NOT_LVM):
6808       raise errors.OpPrereqError("Cluster does not support lvm-based"
6809                                  " instances", errors.ECODE_STATE)
6810
6811     if self.op.hypervisor is None:
6812       self.op.hypervisor = self.cfg.GetHypervisorType()
6813
6814     cluster = self.cfg.GetClusterInfo()
6815     enabled_hvs = cluster.enabled_hypervisors
6816     if self.op.hypervisor not in enabled_hvs:
6817       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6818                                  " cluster (%s)" % (self.op.hypervisor,
6819                                   ",".join(enabled_hvs)),
6820                                  errors.ECODE_STATE)
6821
6822     # check hypervisor parameter syntax (locally)
6823     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6824     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6825                                       self.op.hvparams)
6826     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6827     hv_type.CheckParameterSyntax(filled_hvp)
6828     self.hv_full = filled_hvp
6829     # check that we don't specify global parameters on an instance
6830     _CheckGlobalHvParams(self.op.hvparams)
6831
6832     # fill and remember the beparams dict
6833     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6834     self.be_full = cluster.SimpleFillBE(self.op.beparams)
6835
6836     # build os parameters
6837     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6838
6839     # now that hvp/bep are in final format, let's reset to defaults,
6840     # if told to do so
6841     if self.op.identify_defaults:
6842       self._RevertToDefaults(cluster)
6843
6844     # NIC buildup
6845     self.nics = []
6846     for idx, nic in enumerate(self.op.nics):
6847       nic_mode_req = nic.get("mode", None)
6848       nic_mode = nic_mode_req
6849       if nic_mode is None:
6850         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6851
6852       # in routed mode, for the first nic, the default ip is 'auto'
6853       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6854         default_ip_mode = constants.VALUE_AUTO
6855       else:
6856         default_ip_mode = constants.VALUE_NONE
6857
6858       # ip validity checks
6859       ip = nic.get("ip", default_ip_mode)
6860       if ip is None or ip.lower() == constants.VALUE_NONE:
6861         nic_ip = None
6862       elif ip.lower() == constants.VALUE_AUTO:
6863         if not self.op.name_check:
6864           raise errors.OpPrereqError("IP address set to auto but name checks"
6865                                      " have been skipped. Aborting.",
6866                                      errors.ECODE_INVAL)
6867         nic_ip = self.hostname1.ip
6868       else:
6869         if not utils.IsValidIP4(ip):
6870           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6871                                      " like a valid IP" % ip,
6872                                      errors.ECODE_INVAL)
6873         nic_ip = ip
6874
6875       # TODO: check the ip address for uniqueness
6876       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6877         raise errors.OpPrereqError("Routed nic mode requires an ip address",
6878                                    errors.ECODE_INVAL)
6879
6880       # MAC address verification
6881       mac = nic.get("mac", constants.VALUE_AUTO)
6882       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6883         mac = utils.NormalizeAndValidateMac(mac)
6884
6885         try:
6886           self.cfg.ReserveMAC(mac, self.proc.GetECId())
6887         except errors.ReservationError:
6888           raise errors.OpPrereqError("MAC address %s already in use"
6889                                      " in cluster" % mac,
6890                                      errors.ECODE_NOTUNIQUE)
6891
6892       # bridge verification
6893       bridge = nic.get("bridge", None)
6894       link = nic.get("link", None)
6895       if bridge and link:
6896         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6897                                    " at the same time", errors.ECODE_INVAL)
6898       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6899         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6900                                    errors.ECODE_INVAL)
6901       elif bridge:
6902         link = bridge
6903
6904       nicparams = {}
6905       if nic_mode_req:
6906         nicparams[constants.NIC_MODE] = nic_mode_req
6907       if link:
6908         nicparams[constants.NIC_LINK] = link
6909
6910       check_params = cluster.SimpleFillNIC(nicparams)
6911       objects.NIC.CheckParameterSyntax(check_params)
6912       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6913
6914     # disk checks/pre-build
6915     self.disks = []
6916     for disk in self.op.disks:
6917       mode = disk.get("mode", constants.DISK_RDWR)
6918       if mode not in constants.DISK_ACCESS_SET:
6919         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6920                                    mode, errors.ECODE_INVAL)
6921       size = disk.get("size", None)
6922       if size is None:
6923         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6924       try:
6925         size = int(size)
6926       except (TypeError, ValueError):
6927         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6928                                    errors.ECODE_INVAL)
6929       new_disk = {"size": size, "mode": mode}
6930       if "adopt" in disk:
6931         new_disk["adopt"] = disk["adopt"]
6932       self.disks.append(new_disk)
6933
6934     if self.op.mode == constants.INSTANCE_IMPORT:
6935
6936       # Check that the new instance doesn't have less disks than the export
6937       instance_disks = len(self.disks)
6938       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6939       if instance_disks < export_disks:
6940         raise errors.OpPrereqError("Not enough disks to import."
6941                                    " (instance: %d, export: %d)" %
6942                                    (instance_disks, export_disks),
6943                                    errors.ECODE_INVAL)
6944
6945       disk_images = []
6946       for idx in range(export_disks):
6947         option = 'disk%d_dump' % idx
6948         if export_info.has_option(constants.INISECT_INS, option):
6949           # FIXME: are the old os-es, disk sizes, etc. useful?
6950           export_name = export_info.get(constants.INISECT_INS, option)
6951           image = utils.PathJoin(self.op.src_path, export_name)
6952           disk_images.append(image)
6953         else:
6954           disk_images.append(False)
6955
6956       self.src_images = disk_images
6957
6958       old_name = export_info.get(constants.INISECT_INS, 'name')
6959       try:
6960         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6961       except (TypeError, ValueError), err:
6962         raise errors.OpPrereqError("Invalid export file, nic_count is not"
6963                                    " an integer: %s" % str(err),
6964                                    errors.ECODE_STATE)
6965       if self.op.instance_name == old_name:
6966         for idx, nic in enumerate(self.nics):
6967           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6968             nic_mac_ini = 'nic%d_mac' % idx
6969             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6970
6971     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6972
6973     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6974     if self.op.ip_check:
6975       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6976         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6977                                    (self.check_ip, self.op.instance_name),
6978                                    errors.ECODE_NOTUNIQUE)
6979
6980     #### mac address generation
6981     # By generating here the mac address both the allocator and the hooks get
6982     # the real final mac address rather than the 'auto' or 'generate' value.
6983     # There is a race condition between the generation and the instance object
6984     # creation, which means that we know the mac is valid now, but we're not
6985     # sure it will be when we actually add the instance. If things go bad
6986     # adding the instance will abort because of a duplicate mac, and the
6987     # creation job will fail.
6988     for nic in self.nics:
6989       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6990         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6991
6992     #### allocator run
6993
6994     if self.op.iallocator is not None:
6995       self._RunAllocator()
6996
6997     #### node related checks
6998
6999     # check primary node
7000     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7001     assert self.pnode is not None, \
7002       "Cannot retrieve locked node %s" % self.op.pnode
7003     if pnode.offline:
7004       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7005                                  pnode.name, errors.ECODE_STATE)
7006     if pnode.drained:
7007       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7008                                  pnode.name, errors.ECODE_STATE)
7009
7010     self.secondaries = []
7011
7012     # mirror node verification
7013     if self.op.disk_template in constants.DTS_NET_MIRROR:
7014       if self.op.snode is None:
7015         raise errors.OpPrereqError("The networked disk templates need"
7016                                    " a mirror node", errors.ECODE_INVAL)
7017       if self.op.snode == pnode.name:
7018         raise errors.OpPrereqError("The secondary node cannot be the"
7019                                    " primary node.", errors.ECODE_INVAL)
7020       _CheckNodeOnline(self, self.op.snode)
7021       _CheckNodeNotDrained(self, self.op.snode)
7022       self.secondaries.append(self.op.snode)
7023
7024     nodenames = [pnode.name] + self.secondaries
7025
7026     req_size = _ComputeDiskSize(self.op.disk_template,
7027                                 self.disks)
7028
7029     # Check lv size requirements, if not adopting
7030     if req_size is not None and not self.adopt_disks:
7031       _CheckNodesFreeDisk(self, nodenames, req_size)
7032
7033     if self.adopt_disks: # instead, we must check the adoption data
7034       all_lvs = set([i["adopt"] for i in self.disks])
7035       if len(all_lvs) != len(self.disks):
7036         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7037                                    errors.ECODE_INVAL)
7038       for lv_name in all_lvs:
7039         try:
7040           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7041         except errors.ReservationError:
7042           raise errors.OpPrereqError("LV named %s used by another instance" %
7043                                      lv_name, errors.ECODE_NOTUNIQUE)
7044
7045       node_lvs = self.rpc.call_lv_list([pnode.name],
7046                                        self.cfg.GetVGName())[pnode.name]
7047       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7048       node_lvs = node_lvs.payload
7049       delta = all_lvs.difference(node_lvs.keys())
7050       if delta:
7051         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7052                                    utils.CommaJoin(delta),
7053                                    errors.ECODE_INVAL)
7054       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7055       if online_lvs:
7056         raise errors.OpPrereqError("Online logical volumes found, cannot"
7057                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7058                                    errors.ECODE_STATE)
7059       # update the size of disk based on what is found
7060       for dsk in self.disks:
7061         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7062
7063     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7064
7065     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7066     # check OS parameters (remotely)
7067     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7068
7069     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7070
7071     # memory check on primary node
7072     if self.op.start:
7073       _CheckNodeFreeMemory(self, self.pnode.name,
7074                            "creating instance %s" % self.op.instance_name,
7075                            self.be_full[constants.BE_MEMORY],
7076                            self.op.hypervisor)
7077
7078     self.dry_run_result = list(nodenames)
7079
7080   def Exec(self, feedback_fn):
7081     """Create and add the instance to the cluster.
7082
7083     """
7084     instance = self.op.instance_name
7085     pnode_name = self.pnode.name
7086
7087     ht_kind = self.op.hypervisor
7088     if ht_kind in constants.HTS_REQ_PORT:
7089       network_port = self.cfg.AllocatePort()
7090     else:
7091       network_port = None
7092
7093     if constants.ENABLE_FILE_STORAGE:
7094       # this is needed because os.path.join does not accept None arguments
7095       if self.op.file_storage_dir is None:
7096         string_file_storage_dir = ""
7097       else:
7098         string_file_storage_dir = self.op.file_storage_dir
7099
7100       # build the full file storage dir path
7101       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7102                                         string_file_storage_dir, instance)
7103     else:
7104       file_storage_dir = ""
7105
7106     disks = _GenerateDiskTemplate(self,
7107                                   self.op.disk_template,
7108                                   instance, pnode_name,
7109                                   self.secondaries,
7110                                   self.disks,
7111                                   file_storage_dir,
7112                                   self.op.file_driver,
7113                                   0)
7114
7115     iobj = objects.Instance(name=instance, os=self.op.os_type,
7116                             primary_node=pnode_name,
7117                             nics=self.nics, disks=disks,
7118                             disk_template=self.op.disk_template,
7119                             admin_up=False,
7120                             network_port=network_port,
7121                             beparams=self.op.beparams,
7122                             hvparams=self.op.hvparams,
7123                             hypervisor=self.op.hypervisor,
7124                             osparams=self.op.osparams,
7125                             )
7126
7127     if self.adopt_disks:
7128       # rename LVs to the newly-generated names; we need to construct
7129       # 'fake' LV disks with the old data, plus the new unique_id
7130       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7131       rename_to = []
7132       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7133         rename_to.append(t_dsk.logical_id)
7134         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7135         self.cfg.SetDiskID(t_dsk, pnode_name)
7136       result = self.rpc.call_blockdev_rename(pnode_name,
7137                                              zip(tmp_disks, rename_to))
7138       result.Raise("Failed to rename adoped LVs")
7139     else:
7140       feedback_fn("* creating instance disks...")
7141       try:
7142         _CreateDisks(self, iobj)
7143       except errors.OpExecError:
7144         self.LogWarning("Device creation failed, reverting...")
7145         try:
7146           _RemoveDisks(self, iobj)
7147         finally:
7148           self.cfg.ReleaseDRBDMinors(instance)
7149           raise
7150
7151     feedback_fn("adding instance %s to cluster config" % instance)
7152
7153     self.cfg.AddInstance(iobj, self.proc.GetECId())
7154
7155     # Declare that we don't want to remove the instance lock anymore, as we've
7156     # added the instance to the config
7157     del self.remove_locks[locking.LEVEL_INSTANCE]
7158     # Unlock all the nodes
7159     if self.op.mode == constants.INSTANCE_IMPORT:
7160       nodes_keep = [self.op.src_node]
7161       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7162                        if node != self.op.src_node]
7163       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7164       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7165     else:
7166       self.context.glm.release(locking.LEVEL_NODE)
7167       del self.acquired_locks[locking.LEVEL_NODE]
7168
7169     if self.op.wait_for_sync:
7170       disk_abort = not _WaitForSync(self, iobj)
7171     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7172       # make sure the disks are not degraded (still sync-ing is ok)
7173       time.sleep(15)
7174       feedback_fn("* checking mirrors status")
7175       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7176     else:
7177       disk_abort = False
7178
7179     if disk_abort:
7180       _RemoveDisks(self, iobj)
7181       self.cfg.RemoveInstance(iobj.name)
7182       # Make sure the instance lock gets removed
7183       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7184       raise errors.OpExecError("There are some degraded disks for"
7185                                " this instance")
7186
7187     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7188       if self.op.mode == constants.INSTANCE_CREATE:
7189         if not self.op.no_install:
7190           feedback_fn("* running the instance OS create scripts...")
7191           # FIXME: pass debug option from opcode to backend
7192           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7193                                                  self.op.debug_level)
7194           result.Raise("Could not add os for instance %s"
7195                        " on node %s" % (instance, pnode_name))
7196
7197       elif self.op.mode == constants.INSTANCE_IMPORT:
7198         feedback_fn("* running the instance OS import scripts...")
7199
7200         transfers = []
7201
7202         for idx, image in enumerate(self.src_images):
7203           if not image:
7204             continue
7205
7206           # FIXME: pass debug option from opcode to backend
7207           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7208                                              constants.IEIO_FILE, (image, ),
7209                                              constants.IEIO_SCRIPT,
7210                                              (iobj.disks[idx], idx),
7211                                              None)
7212           transfers.append(dt)
7213
7214         import_result = \
7215           masterd.instance.TransferInstanceData(self, feedback_fn,
7216                                                 self.op.src_node, pnode_name,
7217                                                 self.pnode.secondary_ip,
7218                                                 iobj, transfers)
7219         if not compat.all(import_result):
7220           self.LogWarning("Some disks for instance %s on node %s were not"
7221                           " imported successfully" % (instance, pnode_name))
7222
7223       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7224         feedback_fn("* preparing remote import...")
7225         connect_timeout = constants.RIE_CONNECT_TIMEOUT
7226         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7227
7228         disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7229                                                      self.source_x509_ca,
7230                                                      self._cds, timeouts)
7231         if not compat.all(disk_results):
7232           # TODO: Should the instance still be started, even if some disks
7233           # failed to import (valid for local imports, too)?
7234           self.LogWarning("Some disks for instance %s on node %s were not"
7235                           " imported successfully" % (instance, pnode_name))
7236
7237         # Run rename script on newly imported instance
7238         assert iobj.name == instance
7239         feedback_fn("Running rename script for %s" % instance)
7240         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7241                                                    self.source_instance_name,
7242                                                    self.op.debug_level)
7243         if result.fail_msg:
7244           self.LogWarning("Failed to run rename script for %s on node"
7245                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7246
7247       else:
7248         # also checked in the prereq part
7249         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7250                                      % self.op.mode)
7251
7252     if self.op.start:
7253       iobj.admin_up = True
7254       self.cfg.Update(iobj, feedback_fn)
7255       logging.info("Starting instance %s on node %s", instance, pnode_name)
7256       feedback_fn("* starting instance...")
7257       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7258       result.Raise("Could not start instance")
7259
7260     return list(iobj.all_nodes)
7261
7262
7263 class LUConnectConsole(NoHooksLU):
7264   """Connect to an instance's console.
7265
7266   This is somewhat special in that it returns the command line that
7267   you need to run on the master node in order to connect to the
7268   console.
7269
7270   """
7271   _OP_PARAMS = [
7272     _PInstanceName
7273     ]
7274   REQ_BGL = False
7275
7276   def ExpandNames(self):
7277     self._ExpandAndLockInstance()
7278
7279   def CheckPrereq(self):
7280     """Check prerequisites.
7281
7282     This checks that the instance is in the cluster.
7283
7284     """
7285     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7286     assert self.instance is not None, \
7287       "Cannot retrieve locked instance %s" % self.op.instance_name
7288     _CheckNodeOnline(self, self.instance.primary_node)
7289
7290   def Exec(self, feedback_fn):
7291     """Connect to the console of an instance
7292
7293     """
7294     instance = self.instance
7295     node = instance.primary_node
7296
7297     node_insts = self.rpc.call_instance_list([node],
7298                                              [instance.hypervisor])[node]
7299     node_insts.Raise("Can't get node information from %s" % node)
7300
7301     if instance.name not in node_insts.payload:
7302       raise errors.OpExecError("Instance %s is not running." % instance.name)
7303
7304     logging.debug("Connecting to console of %s on %s", instance.name, node)
7305
7306     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7307     cluster = self.cfg.GetClusterInfo()
7308     # beparams and hvparams are passed separately, to avoid editing the
7309     # instance and then saving the defaults in the instance itself.
7310     hvparams = cluster.FillHV(instance)
7311     beparams = cluster.FillBE(instance)
7312     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7313
7314     # build ssh cmdline
7315     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7316
7317
7318 class LUReplaceDisks(LogicalUnit):
7319   """Replace the disks of an instance.
7320
7321   """
7322   HPATH = "mirrors-replace"
7323   HTYPE = constants.HTYPE_INSTANCE
7324   _OP_PARAMS = [
7325     _PInstanceName,
7326     ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7327     ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7328     ("remote_node", None, _TMaybeString),
7329     ("iallocator", None, _TMaybeString),
7330     ("early_release", False, _TBool),
7331     ]
7332   REQ_BGL = False
7333
7334   def CheckArguments(self):
7335     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7336                                   self.op.iallocator)
7337
7338   def ExpandNames(self):
7339     self._ExpandAndLockInstance()
7340
7341     if self.op.iallocator is not None:
7342       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7343
7344     elif self.op.remote_node is not None:
7345       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7346       self.op.remote_node = remote_node
7347
7348       # Warning: do not remove the locking of the new secondary here
7349       # unless DRBD8.AddChildren is changed to work in parallel;
7350       # currently it doesn't since parallel invocations of
7351       # FindUnusedMinor will conflict
7352       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7353       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7354
7355     else:
7356       self.needed_locks[locking.LEVEL_NODE] = []
7357       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7358
7359     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7360                                    self.op.iallocator, self.op.remote_node,
7361                                    self.op.disks, False, self.op.early_release)
7362
7363     self.tasklets = [self.replacer]
7364
7365   def DeclareLocks(self, level):
7366     # If we're not already locking all nodes in the set we have to declare the
7367     # instance's primary/secondary nodes.
7368     if (level == locking.LEVEL_NODE and
7369         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7370       self._LockInstancesNodes()
7371
7372   def BuildHooksEnv(self):
7373     """Build hooks env.
7374
7375     This runs on the master, the primary and all the secondaries.
7376
7377     """
7378     instance = self.replacer.instance
7379     env = {
7380       "MODE": self.op.mode,
7381       "NEW_SECONDARY": self.op.remote_node,
7382       "OLD_SECONDARY": instance.secondary_nodes[0],
7383       }
7384     env.update(_BuildInstanceHookEnvByObject(self, instance))
7385     nl = [
7386       self.cfg.GetMasterNode(),
7387       instance.primary_node,
7388       ]
7389     if self.op.remote_node is not None:
7390       nl.append(self.op.remote_node)
7391     return env, nl, nl
7392
7393
7394 class TLReplaceDisks(Tasklet):
7395   """Replaces disks for an instance.
7396
7397   Note: Locking is not within the scope of this class.
7398
7399   """
7400   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7401                disks, delay_iallocator, early_release):
7402     """Initializes this class.
7403
7404     """
7405     Tasklet.__init__(self, lu)
7406
7407     # Parameters
7408     self.instance_name = instance_name
7409     self.mode = mode
7410     self.iallocator_name = iallocator_name
7411     self.remote_node = remote_node
7412     self.disks = disks
7413     self.delay_iallocator = delay_iallocator
7414     self.early_release = early_release
7415
7416     # Runtime data
7417     self.instance = None
7418     self.new_node = None
7419     self.target_node = None
7420     self.other_node = None
7421     self.remote_node_info = None
7422     self.node_secondary_ip = None
7423
7424   @staticmethod
7425   def CheckArguments(mode, remote_node, iallocator):
7426     """Helper function for users of this class.
7427
7428     """
7429     # check for valid parameter combination
7430     if mode == constants.REPLACE_DISK_CHG:
7431       if remote_node is None and iallocator is None:
7432         raise errors.OpPrereqError("When changing the secondary either an"
7433                                    " iallocator script must be used or the"
7434                                    " new node given", errors.ECODE_INVAL)
7435
7436       if remote_node is not None and iallocator is not None:
7437         raise errors.OpPrereqError("Give either the iallocator or the new"
7438                                    " secondary, not both", errors.ECODE_INVAL)
7439
7440     elif remote_node is not None or iallocator is not None:
7441       # Not replacing the secondary
7442       raise errors.OpPrereqError("The iallocator and new node options can"
7443                                  " only be used when changing the"
7444                                  " secondary node", errors.ECODE_INVAL)
7445
7446   @staticmethod
7447   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7448     """Compute a new secondary node using an IAllocator.
7449
7450     """
7451     ial = IAllocator(lu.cfg, lu.rpc,
7452                      mode=constants.IALLOCATOR_MODE_RELOC,
7453                      name=instance_name,
7454                      relocate_from=relocate_from)
7455
7456     ial.Run(iallocator_name)
7457
7458     if not ial.success:
7459       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7460                                  " %s" % (iallocator_name, ial.info),
7461                                  errors.ECODE_NORES)
7462
7463     if len(ial.result) != ial.required_nodes:
7464       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7465                                  " of nodes (%s), required %s" %
7466                                  (iallocator_name,
7467                                   len(ial.result), ial.required_nodes),
7468                                  errors.ECODE_FAULT)
7469
7470     remote_node_name = ial.result[0]
7471
7472     lu.LogInfo("Selected new secondary for instance '%s': %s",
7473                instance_name, remote_node_name)
7474
7475     return remote_node_name
7476
7477   def _FindFaultyDisks(self, node_name):
7478     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7479                                     node_name, True)
7480
7481   def CheckPrereq(self):
7482     """Check prerequisites.
7483
7484     This checks that the instance is in the cluster.
7485
7486     """
7487     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7488     assert instance is not None, \
7489       "Cannot retrieve locked instance %s" % self.instance_name
7490
7491     if instance.disk_template != constants.DT_DRBD8:
7492       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7493                                  " instances", errors.ECODE_INVAL)
7494
7495     if len(instance.secondary_nodes) != 1:
7496       raise errors.OpPrereqError("The instance has a strange layout,"
7497                                  " expected one secondary but found %d" %
7498                                  len(instance.secondary_nodes),
7499                                  errors.ECODE_FAULT)
7500
7501     if not self.delay_iallocator:
7502       self._CheckPrereq2()
7503
7504   def _CheckPrereq2(self):
7505     """Check prerequisites, second part.
7506
7507     This function should always be part of CheckPrereq. It was separated and is
7508     now called from Exec because during node evacuation iallocator was only
7509     called with an unmodified cluster model, not taking planned changes into
7510     account.
7511
7512     """
7513     instance = self.instance
7514     secondary_node = instance.secondary_nodes[0]
7515
7516     if self.iallocator_name is None:
7517       remote_node = self.remote_node
7518     else:
7519       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7520                                        instance.name, instance.secondary_nodes)
7521
7522     if remote_node is not None:
7523       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7524       assert self.remote_node_info is not None, \
7525         "Cannot retrieve locked node %s" % remote_node
7526     else:
7527       self.remote_node_info = None
7528
7529     if remote_node == self.instance.primary_node:
7530       raise errors.OpPrereqError("The specified node is the primary node of"
7531                                  " the instance.", errors.ECODE_INVAL)
7532
7533     if remote_node == secondary_node:
7534       raise errors.OpPrereqError("The specified node is already the"
7535                                  " secondary node of the instance.",
7536                                  errors.ECODE_INVAL)
7537
7538     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7539                                     constants.REPLACE_DISK_CHG):
7540       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7541                                  errors.ECODE_INVAL)
7542
7543     if self.mode == constants.REPLACE_DISK_AUTO:
7544       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7545       faulty_secondary = self._FindFaultyDisks(secondary_node)
7546
7547       if faulty_primary and faulty_secondary:
7548         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7549                                    " one node and can not be repaired"
7550                                    " automatically" % self.instance_name,
7551                                    errors.ECODE_STATE)
7552
7553       if faulty_primary:
7554         self.disks = faulty_primary
7555         self.target_node = instance.primary_node
7556         self.other_node = secondary_node
7557         check_nodes = [self.target_node, self.other_node]
7558       elif faulty_secondary:
7559         self.disks = faulty_secondary
7560         self.target_node = secondary_node
7561         self.other_node = instance.primary_node
7562         check_nodes = [self.target_node, self.other_node]
7563       else:
7564         self.disks = []
7565         check_nodes = []
7566
7567     else:
7568       # Non-automatic modes
7569       if self.mode == constants.REPLACE_DISK_PRI:
7570         self.target_node = instance.primary_node
7571         self.other_node = secondary_node
7572         check_nodes = [self.target_node, self.other_node]
7573
7574       elif self.mode == constants.REPLACE_DISK_SEC:
7575         self.target_node = secondary_node
7576         self.other_node = instance.primary_node
7577         check_nodes = [self.target_node, self.other_node]
7578
7579       elif self.mode == constants.REPLACE_DISK_CHG:
7580         self.new_node = remote_node
7581         self.other_node = instance.primary_node
7582         self.target_node = secondary_node
7583         check_nodes = [self.new_node, self.other_node]
7584
7585         _CheckNodeNotDrained(self.lu, remote_node)
7586
7587         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7588         assert old_node_info is not None
7589         if old_node_info.offline and not self.early_release:
7590           # doesn't make sense to delay the release
7591           self.early_release = True
7592           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7593                           " early-release mode", secondary_node)
7594
7595       else:
7596         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7597                                      self.mode)
7598
7599       # If not specified all disks should be replaced
7600       if not self.disks:
7601         self.disks = range(len(self.instance.disks))
7602
7603     for node in check_nodes:
7604       _CheckNodeOnline(self.lu, node)
7605
7606     # Check whether disks are valid
7607     for disk_idx in self.disks:
7608       instance.FindDisk(disk_idx)
7609
7610     # Get secondary node IP addresses
7611     node_2nd_ip = {}
7612
7613     for node_name in [self.target_node, self.other_node, self.new_node]:
7614       if node_name is not None:
7615         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7616
7617     self.node_secondary_ip = node_2nd_ip
7618
7619   def Exec(self, feedback_fn):
7620     """Execute disk replacement.
7621
7622     This dispatches the disk replacement to the appropriate handler.
7623
7624     """
7625     if self.delay_iallocator:
7626       self._CheckPrereq2()
7627
7628     if not self.disks:
7629       feedback_fn("No disks need replacement")
7630       return
7631
7632     feedback_fn("Replacing disk(s) %s for %s" %
7633                 (utils.CommaJoin(self.disks), self.instance.name))
7634
7635     activate_disks = (not self.instance.admin_up)
7636
7637     # Activate the instance disks if we're replacing them on a down instance
7638     if activate_disks:
7639       _StartInstanceDisks(self.lu, self.instance, True)
7640
7641     try:
7642       # Should we replace the secondary node?
7643       if self.new_node is not None:
7644         fn = self._ExecDrbd8Secondary
7645       else:
7646         fn = self._ExecDrbd8DiskOnly
7647
7648       return fn(feedback_fn)
7649
7650     finally:
7651       # Deactivate the instance disks if we're replacing them on a
7652       # down instance
7653       if activate_disks:
7654         _SafeShutdownInstanceDisks(self.lu, self.instance)
7655
7656   def _CheckVolumeGroup(self, nodes):
7657     self.lu.LogInfo("Checking volume groups")
7658
7659     vgname = self.cfg.GetVGName()
7660
7661     # Make sure volume group exists on all involved nodes
7662     results = self.rpc.call_vg_list(nodes)
7663     if not results:
7664       raise errors.OpExecError("Can't list volume groups on the nodes")
7665
7666     for node in nodes:
7667       res = results[node]
7668       res.Raise("Error checking node %s" % node)
7669       if vgname not in res.payload:
7670         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7671                                  (vgname, node))
7672
7673   def _CheckDisksExistence(self, nodes):
7674     # Check disk existence
7675     for idx, dev in enumerate(self.instance.disks):
7676       if idx not in self.disks:
7677         continue
7678
7679       for node in nodes:
7680         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7681         self.cfg.SetDiskID(dev, node)
7682
7683         result = self.rpc.call_blockdev_find(node, dev)
7684
7685         msg = result.fail_msg
7686         if msg or not result.payload:
7687           if not msg:
7688             msg = "disk not found"
7689           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7690                                    (idx, node, msg))
7691
7692   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7693     for idx, dev in enumerate(self.instance.disks):
7694       if idx not in self.disks:
7695         continue
7696
7697       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7698                       (idx, node_name))
7699
7700       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7701                                    ldisk=ldisk):
7702         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7703                                  " replace disks for instance %s" %
7704                                  (node_name, self.instance.name))
7705
7706   def _CreateNewStorage(self, node_name):
7707     vgname = self.cfg.GetVGName()
7708     iv_names = {}
7709
7710     for idx, dev in enumerate(self.instance.disks):
7711       if idx not in self.disks:
7712         continue
7713
7714       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7715
7716       self.cfg.SetDiskID(dev, node_name)
7717
7718       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7719       names = _GenerateUniqueNames(self.lu, lv_names)
7720
7721       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7722                              logical_id=(vgname, names[0]))
7723       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7724                              logical_id=(vgname, names[1]))
7725
7726       new_lvs = [lv_data, lv_meta]
7727       old_lvs = dev.children
7728       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7729
7730       # we pass force_create=True to force the LVM creation
7731       for new_lv in new_lvs:
7732         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7733                         _GetInstanceInfoText(self.instance), False)
7734
7735     return iv_names
7736
7737   def _CheckDevices(self, node_name, iv_names):
7738     for name, (dev, _, _) in iv_names.iteritems():
7739       self.cfg.SetDiskID(dev, node_name)
7740
7741       result = self.rpc.call_blockdev_find(node_name, dev)
7742
7743       msg = result.fail_msg
7744       if msg or not result.payload:
7745         if not msg:
7746           msg = "disk not found"
7747         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7748                                  (name, msg))
7749
7750       if result.payload.is_degraded:
7751         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7752
7753   def _RemoveOldStorage(self, node_name, iv_names):
7754     for name, (_, old_lvs, _) in iv_names.iteritems():
7755       self.lu.LogInfo("Remove logical volumes for %s" % name)
7756
7757       for lv in old_lvs:
7758         self.cfg.SetDiskID(lv, node_name)
7759
7760         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7761         if msg:
7762           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7763                              hint="remove unused LVs manually")
7764
7765   def _ReleaseNodeLock(self, node_name):
7766     """Releases the lock for a given node."""
7767     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7768
7769   def _ExecDrbd8DiskOnly(self, feedback_fn):
7770     """Replace a disk on the primary or secondary for DRBD 8.
7771
7772     The algorithm for replace is quite complicated:
7773
7774       1. for each disk to be replaced:
7775
7776         1. create new LVs on the target node with unique names
7777         1. detach old LVs from the drbd device
7778         1. rename old LVs to name_replaced.<time_t>
7779         1. rename new LVs to old LVs
7780         1. attach the new LVs (with the old names now) to the drbd device
7781
7782       1. wait for sync across all devices
7783
7784       1. for each modified disk:
7785
7786         1. remove old LVs (which have the name name_replaces.<time_t>)
7787
7788     Failures are not very well handled.
7789
7790     """
7791     steps_total = 6
7792
7793     # Step: check device activation
7794     self.lu.LogStep(1, steps_total, "Check device existence")
7795     self._CheckDisksExistence([self.other_node, self.target_node])
7796     self._CheckVolumeGroup([self.target_node, self.other_node])
7797
7798     # Step: check other node consistency
7799     self.lu.LogStep(2, steps_total, "Check peer consistency")
7800     self._CheckDisksConsistency(self.other_node,
7801                                 self.other_node == self.instance.primary_node,
7802                                 False)
7803
7804     # Step: create new storage
7805     self.lu.LogStep(3, steps_total, "Allocate new storage")
7806     iv_names = self._CreateNewStorage(self.target_node)
7807
7808     # Step: for each lv, detach+rename*2+attach
7809     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7810     for dev, old_lvs, new_lvs in iv_names.itervalues():
7811       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7812
7813       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7814                                                      old_lvs)
7815       result.Raise("Can't detach drbd from local storage on node"
7816                    " %s for device %s" % (self.target_node, dev.iv_name))
7817       #dev.children = []
7818       #cfg.Update(instance)
7819
7820       # ok, we created the new LVs, so now we know we have the needed
7821       # storage; as such, we proceed on the target node to rename
7822       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7823       # using the assumption that logical_id == physical_id (which in
7824       # turn is the unique_id on that node)
7825
7826       # FIXME(iustin): use a better name for the replaced LVs
7827       temp_suffix = int(time.time())
7828       ren_fn = lambda d, suff: (d.physical_id[0],
7829                                 d.physical_id[1] + "_replaced-%s" % suff)
7830
7831       # Build the rename list based on what LVs exist on the node
7832       rename_old_to_new = []
7833       for to_ren in old_lvs:
7834         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7835         if not result.fail_msg and result.payload:
7836           # device exists
7837           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7838
7839       self.lu.LogInfo("Renaming the old LVs on the target node")
7840       result = self.rpc.call_blockdev_rename(self.target_node,
7841                                              rename_old_to_new)
7842       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7843
7844       # Now we rename the new LVs to the old LVs
7845       self.lu.LogInfo("Renaming the new LVs on the target node")
7846       rename_new_to_old = [(new, old.physical_id)
7847                            for old, new in zip(old_lvs, new_lvs)]
7848       result = self.rpc.call_blockdev_rename(self.target_node,
7849                                              rename_new_to_old)
7850       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7851
7852       for old, new in zip(old_lvs, new_lvs):
7853         new.logical_id = old.logical_id
7854         self.cfg.SetDiskID(new, self.target_node)
7855
7856       for disk in old_lvs:
7857         disk.logical_id = ren_fn(disk, temp_suffix)
7858         self.cfg.SetDiskID(disk, self.target_node)
7859
7860       # Now that the new lvs have the old name, we can add them to the device
7861       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7862       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7863                                                   new_lvs)
7864       msg = result.fail_msg
7865       if msg:
7866         for new_lv in new_lvs:
7867           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7868                                                new_lv).fail_msg
7869           if msg2:
7870             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7871                                hint=("cleanup manually the unused logical"
7872                                      "volumes"))
7873         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7874
7875       dev.children = new_lvs
7876
7877       self.cfg.Update(self.instance, feedback_fn)
7878
7879     cstep = 5
7880     if self.early_release:
7881       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7882       cstep += 1
7883       self._RemoveOldStorage(self.target_node, iv_names)
7884       # WARNING: we release both node locks here, do not do other RPCs
7885       # than WaitForSync to the primary node
7886       self._ReleaseNodeLock([self.target_node, self.other_node])
7887
7888     # Wait for sync
7889     # This can fail as the old devices are degraded and _WaitForSync
7890     # does a combined result over all disks, so we don't check its return value
7891     self.lu.LogStep(cstep, steps_total, "Sync devices")
7892     cstep += 1
7893     _WaitForSync(self.lu, self.instance)
7894
7895     # Check all devices manually
7896     self._CheckDevices(self.instance.primary_node, iv_names)
7897
7898     # Step: remove old storage
7899     if not self.early_release:
7900       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7901       cstep += 1
7902       self._RemoveOldStorage(self.target_node, iv_names)
7903
7904   def _ExecDrbd8Secondary(self, feedback_fn):
7905     """Replace the secondary node for DRBD 8.
7906
7907     The algorithm for replace is quite complicated:
7908       - for all disks of the instance:
7909         - create new LVs on the new node with same names
7910         - shutdown the drbd device on the old secondary
7911         - disconnect the drbd network on the primary
7912         - create the drbd device on the new secondary
7913         - network attach the drbd on the primary, using an artifice:
7914           the drbd code for Attach() will connect to the network if it
7915           finds a device which is connected to the good local disks but
7916           not network enabled
7917       - wait for sync across all devices
7918       - remove all disks from the old secondary
7919
7920     Failures are not very well handled.
7921
7922     """
7923     steps_total = 6
7924
7925     # Step: check device activation
7926     self.lu.LogStep(1, steps_total, "Check device existence")
7927     self._CheckDisksExistence([self.instance.primary_node])
7928     self._CheckVolumeGroup([self.instance.primary_node])
7929
7930     # Step: check other node consistency
7931     self.lu.LogStep(2, steps_total, "Check peer consistency")
7932     self._CheckDisksConsistency(self.instance.primary_node, True, True)
7933
7934     # Step: create new storage
7935     self.lu.LogStep(3, steps_total, "Allocate new storage")
7936     for idx, dev in enumerate(self.instance.disks):
7937       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7938                       (self.new_node, idx))
7939       # we pass force_create=True to force LVM creation
7940       for new_lv in dev.children:
7941         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7942                         _GetInstanceInfoText(self.instance), False)
7943
7944     # Step 4: dbrd minors and drbd setups changes
7945     # after this, we must manually remove the drbd minors on both the
7946     # error and the success paths
7947     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7948     minors = self.cfg.AllocateDRBDMinor([self.new_node
7949                                          for dev in self.instance.disks],
7950                                         self.instance.name)
7951     logging.debug("Allocated minors %r", minors)
7952
7953     iv_names = {}
7954     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7955       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7956                       (self.new_node, idx))
7957       # create new devices on new_node; note that we create two IDs:
7958       # one without port, so the drbd will be activated without
7959       # networking information on the new node at this stage, and one
7960       # with network, for the latter activation in step 4
7961       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7962       if self.instance.primary_node == o_node1:
7963         p_minor = o_minor1
7964       else:
7965         assert self.instance.primary_node == o_node2, "Three-node instance?"
7966         p_minor = o_minor2
7967
7968       new_alone_id = (self.instance.primary_node, self.new_node, None,
7969                       p_minor, new_minor, o_secret)
7970       new_net_id = (self.instance.primary_node, self.new_node, o_port,
7971                     p_minor, new_minor, o_secret)
7972
7973       iv_names[idx] = (dev, dev.children, new_net_id)
7974       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7975                     new_net_id)
7976       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7977                               logical_id=new_alone_id,
7978                               children=dev.children,
7979                               size=dev.size)
7980       try:
7981         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7982                               _GetInstanceInfoText(self.instance), False)
7983       except errors.GenericError:
7984         self.cfg.ReleaseDRBDMinors(self.instance.name)
7985         raise
7986
7987     # We have new devices, shutdown the drbd on the old secondary
7988     for idx, dev in enumerate(self.instance.disks):
7989       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7990       self.cfg.SetDiskID(dev, self.target_node)
7991       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7992       if msg:
7993         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7994                            "node: %s" % (idx, msg),
7995                            hint=("Please cleanup this device manually as"
7996                                  " soon as possible"))
7997
7998     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7999     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8000                                                self.node_secondary_ip,
8001                                                self.instance.disks)\
8002                                               [self.instance.primary_node]
8003
8004     msg = result.fail_msg
8005     if msg:
8006       # detaches didn't succeed (unlikely)
8007       self.cfg.ReleaseDRBDMinors(self.instance.name)
8008       raise errors.OpExecError("Can't detach the disks from the network on"
8009                                " old node: %s" % (msg,))
8010
8011     # if we managed to detach at least one, we update all the disks of
8012     # the instance to point to the new secondary
8013     self.lu.LogInfo("Updating instance configuration")
8014     for dev, _, new_logical_id in iv_names.itervalues():
8015       dev.logical_id = new_logical_id
8016       self.cfg.SetDiskID(dev, self.instance.primary_node)
8017
8018     self.cfg.Update(self.instance, feedback_fn)
8019
8020     # and now perform the drbd attach
8021     self.lu.LogInfo("Attaching primary drbds to new secondary"
8022                     " (standalone => connected)")
8023     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8024                                             self.new_node],
8025                                            self.node_secondary_ip,
8026                                            self.instance.disks,
8027                                            self.instance.name,
8028                                            False)
8029     for to_node, to_result in result.items():
8030       msg = to_result.fail_msg
8031       if msg:
8032         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8033                            to_node, msg,
8034                            hint=("please do a gnt-instance info to see the"
8035                                  " status of disks"))
8036     cstep = 5
8037     if self.early_release:
8038       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8039       cstep += 1
8040       self._RemoveOldStorage(self.target_node, iv_names)
8041       # WARNING: we release all node locks here, do not do other RPCs
8042       # than WaitForSync to the primary node
8043       self._ReleaseNodeLock([self.instance.primary_node,
8044                              self.target_node,
8045                              self.new_node])
8046
8047     # Wait for sync
8048     # This can fail as the old devices are degraded and _WaitForSync
8049     # does a combined result over all disks, so we don't check its return value
8050     self.lu.LogStep(cstep, steps_total, "Sync devices")
8051     cstep += 1
8052     _WaitForSync(self.lu, self.instance)
8053
8054     # Check all devices manually
8055     self._CheckDevices(self.instance.primary_node, iv_names)
8056
8057     # Step: remove old storage
8058     if not self.early_release:
8059       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8060       self._RemoveOldStorage(self.target_node, iv_names)
8061
8062
8063 class LURepairNodeStorage(NoHooksLU):
8064   """Repairs the volume group on a node.
8065
8066   """
8067   _OP_PARAMS = [
8068     _PNodeName,
8069     ("storage_type", _NoDefault, _CheckStorageType),
8070     ("name", _NoDefault, _TNonEmptyString),
8071     ("ignore_consistency", False, _TBool),
8072     ]
8073   REQ_BGL = False
8074
8075   def CheckArguments(self):
8076     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8077
8078     storage_type = self.op.storage_type
8079
8080     if (constants.SO_FIX_CONSISTENCY not in
8081         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8082       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8083                                  " repaired" % storage_type,
8084                                  errors.ECODE_INVAL)
8085
8086   def ExpandNames(self):
8087     self.needed_locks = {
8088       locking.LEVEL_NODE: [self.op.node_name],
8089       }
8090
8091   def _CheckFaultyDisks(self, instance, node_name):
8092     """Ensure faulty disks abort the opcode or at least warn."""
8093     try:
8094       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8095                                   node_name, True):
8096         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8097                                    " node '%s'" % (instance.name, node_name),
8098                                    errors.ECODE_STATE)
8099     except errors.OpPrereqError, err:
8100       if self.op.ignore_consistency:
8101         self.proc.LogWarning(str(err.args[0]))
8102       else:
8103         raise
8104
8105   def CheckPrereq(self):
8106     """Check prerequisites.
8107
8108     """
8109     # Check whether any instance on this node has faulty disks
8110     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8111       if not inst.admin_up:
8112         continue
8113       check_nodes = set(inst.all_nodes)
8114       check_nodes.discard(self.op.node_name)
8115       for inst_node_name in check_nodes:
8116         self._CheckFaultyDisks(inst, inst_node_name)
8117
8118   def Exec(self, feedback_fn):
8119     feedback_fn("Repairing storage unit '%s' on %s ..." %
8120                 (self.op.name, self.op.node_name))
8121
8122     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8123     result = self.rpc.call_storage_execute(self.op.node_name,
8124                                            self.op.storage_type, st_args,
8125                                            self.op.name,
8126                                            constants.SO_FIX_CONSISTENCY)
8127     result.Raise("Failed to repair storage unit '%s' on %s" %
8128                  (self.op.name, self.op.node_name))
8129
8130
8131 class LUNodeEvacuationStrategy(NoHooksLU):
8132   """Computes the node evacuation strategy.
8133
8134   """
8135   _OP_PARAMS = [
8136     ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8137     ("remote_node", None, _TMaybeString),
8138     ("iallocator", None, _TMaybeString),
8139     ]
8140   REQ_BGL = False
8141
8142   def CheckArguments(self):
8143     if self.op.remote_node is not None and self.op.iallocator is not None:
8144       raise errors.OpPrereqError("Give either the iallocator or the new"
8145                                  " secondary, not both", errors.ECODE_INVAL)
8146
8147   def ExpandNames(self):
8148     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8149     self.needed_locks = locks = {}
8150     if self.op.remote_node is None:
8151       locks[locking.LEVEL_NODE] = locking.ALL_SET
8152     else:
8153       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8154       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8155
8156   def Exec(self, feedback_fn):
8157     if self.op.remote_node is not None:
8158       instances = []
8159       for node in self.op.nodes:
8160         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8161       result = []
8162       for i in instances:
8163         if i.primary_node == self.op.remote_node:
8164           raise errors.OpPrereqError("Node %s is the primary node of"
8165                                      " instance %s, cannot use it as"
8166                                      " secondary" %
8167                                      (self.op.remote_node, i.name),
8168                                      errors.ECODE_INVAL)
8169         result.append([i.name, self.op.remote_node])
8170     else:
8171       ial = IAllocator(self.cfg, self.rpc,
8172                        mode=constants.IALLOCATOR_MODE_MEVAC,
8173                        evac_nodes=self.op.nodes)
8174       ial.Run(self.op.iallocator, validate=True)
8175       if not ial.success:
8176         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8177                                  errors.ECODE_NORES)
8178       result = ial.result
8179     return result
8180
8181
8182 class LUGrowDisk(LogicalUnit):
8183   """Grow a disk of an instance.
8184
8185   """
8186   HPATH = "disk-grow"
8187   HTYPE = constants.HTYPE_INSTANCE
8188   _OP_PARAMS = [
8189     _PInstanceName,
8190     ("disk", _NoDefault, _TInt),
8191     ("amount", _NoDefault, _TInt),
8192     ("wait_for_sync", True, _TBool),
8193     ]
8194   REQ_BGL = False
8195
8196   def ExpandNames(self):
8197     self._ExpandAndLockInstance()
8198     self.needed_locks[locking.LEVEL_NODE] = []
8199     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8200
8201   def DeclareLocks(self, level):
8202     if level == locking.LEVEL_NODE:
8203       self._LockInstancesNodes()
8204
8205   def BuildHooksEnv(self):
8206     """Build hooks env.
8207
8208     This runs on the master, the primary and all the secondaries.
8209
8210     """
8211     env = {
8212       "DISK": self.op.disk,
8213       "AMOUNT": self.op.amount,
8214       }
8215     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8216     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8217     return env, nl, nl
8218
8219   def CheckPrereq(self):
8220     """Check prerequisites.
8221
8222     This checks that the instance is in the cluster.
8223
8224     """
8225     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8226     assert instance is not None, \
8227       "Cannot retrieve locked instance %s" % self.op.instance_name
8228     nodenames = list(instance.all_nodes)
8229     for node in nodenames:
8230       _CheckNodeOnline(self, node)
8231
8232     self.instance = instance
8233
8234     if instance.disk_template not in constants.DTS_GROWABLE:
8235       raise errors.OpPrereqError("Instance's disk layout does not support"
8236                                  " growing.", errors.ECODE_INVAL)
8237
8238     self.disk = instance.FindDisk(self.op.disk)
8239
8240     if instance.disk_template != constants.DT_FILE:
8241       # TODO: check the free disk space for file, when that feature will be
8242       # supported
8243       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8244
8245   def Exec(self, feedback_fn):
8246     """Execute disk grow.
8247
8248     """
8249     instance = self.instance
8250     disk = self.disk
8251
8252     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8253     if not disks_ok:
8254       raise errors.OpExecError("Cannot activate block device to grow")
8255
8256     for node in instance.all_nodes:
8257       self.cfg.SetDiskID(disk, node)
8258       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8259       result.Raise("Grow request failed to node %s" % node)
8260
8261       # TODO: Rewrite code to work properly
8262       # DRBD goes into sync mode for a short amount of time after executing the
8263       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8264       # calling "resize" in sync mode fails. Sleeping for a short amount of
8265       # time is a work-around.
8266       time.sleep(5)
8267
8268     disk.RecordGrow(self.op.amount)
8269     self.cfg.Update(instance, feedback_fn)
8270     if self.op.wait_for_sync:
8271       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8272       if disk_abort:
8273         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8274                              " status.\nPlease check the instance.")
8275       if not instance.admin_up:
8276         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8277     elif not instance.admin_up:
8278       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8279                            " not supposed to be running because no wait for"
8280                            " sync mode was requested.")
8281
8282
8283 class LUQueryInstanceData(NoHooksLU):
8284   """Query runtime instance data.
8285
8286   """
8287   _OP_PARAMS = [
8288     ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8289     ("static", False, _TBool),
8290     ]
8291   REQ_BGL = False
8292
8293   def ExpandNames(self):
8294     self.needed_locks = {}
8295     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8296
8297     if self.op.instances:
8298       self.wanted_names = []
8299       for name in self.op.instances:
8300         full_name = _ExpandInstanceName(self.cfg, name)
8301         self.wanted_names.append(full_name)
8302       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8303     else:
8304       self.wanted_names = None
8305       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8306
8307     self.needed_locks[locking.LEVEL_NODE] = []
8308     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8309
8310   def DeclareLocks(self, level):
8311     if level == locking.LEVEL_NODE:
8312       self._LockInstancesNodes()
8313
8314   def CheckPrereq(self):
8315     """Check prerequisites.
8316
8317     This only checks the optional instance list against the existing names.
8318
8319     """
8320     if self.wanted_names is None:
8321       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8322
8323     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8324                              in self.wanted_names]
8325
8326   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8327     """Returns the status of a block device
8328
8329     """
8330     if self.op.static or not node:
8331       return None
8332
8333     self.cfg.SetDiskID(dev, node)
8334
8335     result = self.rpc.call_blockdev_find(node, dev)
8336     if result.offline:
8337       return None
8338
8339     result.Raise("Can't compute disk status for %s" % instance_name)
8340
8341     status = result.payload
8342     if status is None:
8343       return None
8344
8345     return (status.dev_path, status.major, status.minor,
8346             status.sync_percent, status.estimated_time,
8347             status.is_degraded, status.ldisk_status)
8348
8349   def _ComputeDiskStatus(self, instance, snode, dev):
8350     """Compute block device status.
8351
8352     """
8353     if dev.dev_type in constants.LDS_DRBD:
8354       # we change the snode then (otherwise we use the one passed in)
8355       if dev.logical_id[0] == instance.primary_node:
8356         snode = dev.logical_id[1]
8357       else:
8358         snode = dev.logical_id[0]
8359
8360     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8361                                               instance.name, dev)
8362     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8363
8364     if dev.children:
8365       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8366                       for child in dev.children]
8367     else:
8368       dev_children = []
8369
8370     data = {
8371       "iv_name": dev.iv_name,
8372       "dev_type": dev.dev_type,
8373       "logical_id": dev.logical_id,
8374       "physical_id": dev.physical_id,
8375       "pstatus": dev_pstatus,
8376       "sstatus": dev_sstatus,
8377       "children": dev_children,
8378       "mode": dev.mode,
8379       "size": dev.size,
8380       }
8381
8382     return data
8383
8384   def Exec(self, feedback_fn):
8385     """Gather and return data"""
8386     result = {}
8387
8388     cluster = self.cfg.GetClusterInfo()
8389
8390     for instance in self.wanted_instances:
8391       if not self.op.static:
8392         remote_info = self.rpc.call_instance_info(instance.primary_node,
8393                                                   instance.name,
8394                                                   instance.hypervisor)
8395         remote_info.Raise("Error checking node %s" % instance.primary_node)
8396         remote_info = remote_info.payload
8397         if remote_info and "state" in remote_info:
8398           remote_state = "up"
8399         else:
8400           remote_state = "down"
8401       else:
8402         remote_state = None
8403       if instance.admin_up:
8404         config_state = "up"
8405       else:
8406         config_state = "down"
8407
8408       disks = [self._ComputeDiskStatus(instance, None, device)
8409                for device in instance.disks]
8410
8411       idict = {
8412         "name": instance.name,
8413         "config_state": config_state,
8414         "run_state": remote_state,
8415         "pnode": instance.primary_node,
8416         "snodes": instance.secondary_nodes,
8417         "os": instance.os,
8418         # this happens to be the same format used for hooks
8419         "nics": _NICListToTuple(self, instance.nics),
8420         "disk_template": instance.disk_template,
8421         "disks": disks,
8422         "hypervisor": instance.hypervisor,
8423         "network_port": instance.network_port,
8424         "hv_instance": instance.hvparams,
8425         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8426         "be_instance": instance.beparams,
8427         "be_actual": cluster.FillBE(instance),
8428         "os_instance": instance.osparams,
8429         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8430         "serial_no": instance.serial_no,
8431         "mtime": instance.mtime,
8432         "ctime": instance.ctime,
8433         "uuid": instance.uuid,
8434         }
8435
8436       result[instance.name] = idict
8437
8438     return result
8439
8440
8441 class LUSetInstanceParams(LogicalUnit):
8442   """Modifies an instances's parameters.
8443
8444   """
8445   HPATH = "instance-modify"
8446   HTYPE = constants.HTYPE_INSTANCE
8447   _OP_PARAMS = [
8448     _PInstanceName,
8449     ("nics", _EmptyList, _TList),
8450     ("disks", _EmptyList, _TList),
8451     ("beparams", _EmptyDict, _TDict),
8452     ("hvparams", _EmptyDict, _TDict),
8453     ("disk_template", None, _TMaybeString),
8454     ("remote_node", None, _TMaybeString),
8455     ("os_name", None, _TMaybeString),
8456     ("force_variant", False, _TBool),
8457     ("osparams", None, _TOr(_TDict, _TNone)),
8458     _PForce,
8459     ]
8460   REQ_BGL = False
8461
8462   def CheckArguments(self):
8463     if not (self.op.nics or self.op.disks or self.op.disk_template or
8464             self.op.hvparams or self.op.beparams or self.op.os_name):
8465       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8466
8467     if self.op.hvparams:
8468       _CheckGlobalHvParams(self.op.hvparams)
8469
8470     # Disk validation
8471     disk_addremove = 0
8472     for disk_op, disk_dict in self.op.disks:
8473       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8474       if disk_op == constants.DDM_REMOVE:
8475         disk_addremove += 1
8476         continue
8477       elif disk_op == constants.DDM_ADD:
8478         disk_addremove += 1
8479       else:
8480         if not isinstance(disk_op, int):
8481           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8482         if not isinstance(disk_dict, dict):
8483           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8484           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8485
8486       if disk_op == constants.DDM_ADD:
8487         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8488         if mode not in constants.DISK_ACCESS_SET:
8489           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8490                                      errors.ECODE_INVAL)
8491         size = disk_dict.get('size', None)
8492         if size is None:
8493           raise errors.OpPrereqError("Required disk parameter size missing",
8494                                      errors.ECODE_INVAL)
8495         try:
8496           size = int(size)
8497         except (TypeError, ValueError), err:
8498           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8499                                      str(err), errors.ECODE_INVAL)
8500         disk_dict['size'] = size
8501       else:
8502         # modification of disk
8503         if 'size' in disk_dict:
8504           raise errors.OpPrereqError("Disk size change not possible, use"
8505                                      " grow-disk", errors.ECODE_INVAL)
8506
8507     if disk_addremove > 1:
8508       raise errors.OpPrereqError("Only one disk add or remove operation"
8509                                  " supported at a time", errors.ECODE_INVAL)
8510
8511     if self.op.disks and self.op.disk_template is not None:
8512       raise errors.OpPrereqError("Disk template conversion and other disk"
8513                                  " changes not supported at the same time",
8514                                  errors.ECODE_INVAL)
8515
8516     if self.op.disk_template:
8517       _CheckDiskTemplate(self.op.disk_template)
8518       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8519           self.op.remote_node is None):
8520         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8521                                    " one requires specifying a secondary node",
8522                                    errors.ECODE_INVAL)
8523
8524     # NIC validation
8525     nic_addremove = 0
8526     for nic_op, nic_dict in self.op.nics:
8527       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8528       if nic_op == constants.DDM_REMOVE:
8529         nic_addremove += 1
8530         continue
8531       elif nic_op == constants.DDM_ADD:
8532         nic_addremove += 1
8533       else:
8534         if not isinstance(nic_op, int):
8535           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8536         if not isinstance(nic_dict, dict):
8537           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8538           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8539
8540       # nic_dict should be a dict
8541       nic_ip = nic_dict.get('ip', None)
8542       if nic_ip is not None:
8543         if nic_ip.lower() == constants.VALUE_NONE:
8544           nic_dict['ip'] = None
8545         else:
8546           if not utils.IsValidIP4(nic_ip):
8547             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8548                                        errors.ECODE_INVAL)
8549
8550       nic_bridge = nic_dict.get('bridge', None)
8551       nic_link = nic_dict.get('link', None)
8552       if nic_bridge and nic_link:
8553         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8554                                    " at the same time", errors.ECODE_INVAL)
8555       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8556         nic_dict['bridge'] = None
8557       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8558         nic_dict['link'] = None
8559
8560       if nic_op == constants.DDM_ADD:
8561         nic_mac = nic_dict.get('mac', None)
8562         if nic_mac is None:
8563           nic_dict['mac'] = constants.VALUE_AUTO
8564
8565       if 'mac' in nic_dict:
8566         nic_mac = nic_dict['mac']
8567         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8568           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8569
8570         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8571           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8572                                      " modifying an existing nic",
8573                                      errors.ECODE_INVAL)
8574
8575     if nic_addremove > 1:
8576       raise errors.OpPrereqError("Only one NIC add or remove operation"
8577                                  " supported at a time", errors.ECODE_INVAL)
8578
8579   def ExpandNames(self):
8580     self._ExpandAndLockInstance()
8581     self.needed_locks[locking.LEVEL_NODE] = []
8582     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8583
8584   def DeclareLocks(self, level):
8585     if level == locking.LEVEL_NODE:
8586       self._LockInstancesNodes()
8587       if self.op.disk_template and self.op.remote_node:
8588         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8589         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8590
8591   def BuildHooksEnv(self):
8592     """Build hooks env.
8593
8594     This runs on the master, primary and secondaries.
8595
8596     """
8597     args = dict()
8598     if constants.BE_MEMORY in self.be_new:
8599       args['memory'] = self.be_new[constants.BE_MEMORY]
8600     if constants.BE_VCPUS in self.be_new:
8601       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8602     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8603     # information at all.
8604     if self.op.nics:
8605       args['nics'] = []
8606       nic_override = dict(self.op.nics)
8607       for idx, nic in enumerate(self.instance.nics):
8608         if idx in nic_override:
8609           this_nic_override = nic_override[idx]
8610         else:
8611           this_nic_override = {}
8612         if 'ip' in this_nic_override:
8613           ip = this_nic_override['ip']
8614         else:
8615           ip = nic.ip
8616         if 'mac' in this_nic_override:
8617           mac = this_nic_override['mac']
8618         else:
8619           mac = nic.mac
8620         if idx in self.nic_pnew:
8621           nicparams = self.nic_pnew[idx]
8622         else:
8623           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8624         mode = nicparams[constants.NIC_MODE]
8625         link = nicparams[constants.NIC_LINK]
8626         args['nics'].append((ip, mac, mode, link))
8627       if constants.DDM_ADD in nic_override:
8628         ip = nic_override[constants.DDM_ADD].get('ip', None)
8629         mac = nic_override[constants.DDM_ADD]['mac']
8630         nicparams = self.nic_pnew[constants.DDM_ADD]
8631         mode = nicparams[constants.NIC_MODE]
8632         link = nicparams[constants.NIC_LINK]
8633         args['nics'].append((ip, mac, mode, link))
8634       elif constants.DDM_REMOVE in nic_override:
8635         del args['nics'][-1]
8636
8637     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8638     if self.op.disk_template:
8639       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8640     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8641     return env, nl, nl
8642
8643   def CheckPrereq(self):
8644     """Check prerequisites.
8645
8646     This only checks the instance list against the existing names.
8647
8648     """
8649     # checking the new params on the primary/secondary nodes
8650
8651     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8652     cluster = self.cluster = self.cfg.GetClusterInfo()
8653     assert self.instance is not None, \
8654       "Cannot retrieve locked instance %s" % self.op.instance_name
8655     pnode = instance.primary_node
8656     nodelist = list(instance.all_nodes)
8657
8658     # OS change
8659     if self.op.os_name and not self.op.force:
8660       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8661                       self.op.force_variant)
8662       instance_os = self.op.os_name
8663     else:
8664       instance_os = instance.os
8665
8666     if self.op.disk_template:
8667       if instance.disk_template == self.op.disk_template:
8668         raise errors.OpPrereqError("Instance already has disk template %s" %
8669                                    instance.disk_template, errors.ECODE_INVAL)
8670
8671       if (instance.disk_template,
8672           self.op.disk_template) not in self._DISK_CONVERSIONS:
8673         raise errors.OpPrereqError("Unsupported disk template conversion from"
8674                                    " %s to %s" % (instance.disk_template,
8675                                                   self.op.disk_template),
8676                                    errors.ECODE_INVAL)
8677       _CheckInstanceDown(self, instance, "cannot change disk template")
8678       if self.op.disk_template in constants.DTS_NET_MIRROR:
8679         _CheckNodeOnline(self, self.op.remote_node)
8680         _CheckNodeNotDrained(self, self.op.remote_node)
8681         disks = [{"size": d.size} for d in instance.disks]
8682         required = _ComputeDiskSize(self.op.disk_template, disks)
8683         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8684
8685     # hvparams processing
8686     if self.op.hvparams:
8687       hv_type = instance.hypervisor
8688       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8689       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8690       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8691
8692       # local check
8693       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8694       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8695       self.hv_new = hv_new # the new actual values
8696       self.hv_inst = i_hvdict # the new dict (without defaults)
8697     else:
8698       self.hv_new = self.hv_inst = {}
8699
8700     # beparams processing
8701     if self.op.beparams:
8702       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8703                                    use_none=True)
8704       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8705       be_new = cluster.SimpleFillBE(i_bedict)
8706       self.be_new = be_new # the new actual values
8707       self.be_inst = i_bedict # the new dict (without defaults)
8708     else:
8709       self.be_new = self.be_inst = {}
8710
8711     # osparams processing
8712     if self.op.osparams:
8713       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8714       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8715       self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8716       self.os_inst = i_osdict # the new dict (without defaults)
8717     else:
8718       self.os_new = self.os_inst = {}
8719
8720     self.warn = []
8721
8722     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8723       mem_check_list = [pnode]
8724       if be_new[constants.BE_AUTO_BALANCE]:
8725         # either we changed auto_balance to yes or it was from before
8726         mem_check_list.extend(instance.secondary_nodes)
8727       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8728                                                   instance.hypervisor)
8729       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8730                                          instance.hypervisor)
8731       pninfo = nodeinfo[pnode]
8732       msg = pninfo.fail_msg
8733       if msg:
8734         # Assume the primary node is unreachable and go ahead
8735         self.warn.append("Can't get info from primary node %s: %s" %
8736                          (pnode,  msg))
8737       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8738         self.warn.append("Node data from primary node %s doesn't contain"
8739                          " free memory information" % pnode)
8740       elif instance_info.fail_msg:
8741         self.warn.append("Can't get instance runtime information: %s" %
8742                         instance_info.fail_msg)
8743       else:
8744         if instance_info.payload:
8745           current_mem = int(instance_info.payload['memory'])
8746         else:
8747           # Assume instance not running
8748           # (there is a slight race condition here, but it's not very probable,
8749           # and we have no other way to check)
8750           current_mem = 0
8751         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8752                     pninfo.payload['memory_free'])
8753         if miss_mem > 0:
8754           raise errors.OpPrereqError("This change will prevent the instance"
8755                                      " from starting, due to %d MB of memory"
8756                                      " missing on its primary node" % miss_mem,
8757                                      errors.ECODE_NORES)
8758
8759       if be_new[constants.BE_AUTO_BALANCE]:
8760         for node, nres in nodeinfo.items():
8761           if node not in instance.secondary_nodes:
8762             continue
8763           msg = nres.fail_msg
8764           if msg:
8765             self.warn.append("Can't get info from secondary node %s: %s" %
8766                              (node, msg))
8767           elif not isinstance(nres.payload.get('memory_free', None), int):
8768             self.warn.append("Secondary node %s didn't return free"
8769                              " memory information" % node)
8770           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8771             self.warn.append("Not enough memory to failover instance to"
8772                              " secondary node %s" % node)
8773
8774     # NIC processing
8775     self.nic_pnew = {}
8776     self.nic_pinst = {}
8777     for nic_op, nic_dict in self.op.nics:
8778       if nic_op == constants.DDM_REMOVE:
8779         if not instance.nics:
8780           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8781                                      errors.ECODE_INVAL)
8782         continue
8783       if nic_op != constants.DDM_ADD:
8784         # an existing nic
8785         if not instance.nics:
8786           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8787                                      " no NICs" % nic_op,
8788                                      errors.ECODE_INVAL)
8789         if nic_op < 0 or nic_op >= len(instance.nics):
8790           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8791                                      " are 0 to %d" %
8792                                      (nic_op, len(instance.nics) - 1),
8793                                      errors.ECODE_INVAL)
8794         old_nic_params = instance.nics[nic_op].nicparams
8795         old_nic_ip = instance.nics[nic_op].ip
8796       else:
8797         old_nic_params = {}
8798         old_nic_ip = None
8799
8800       update_params_dict = dict([(key, nic_dict[key])
8801                                  for key in constants.NICS_PARAMETERS
8802                                  if key in nic_dict])
8803
8804       if 'bridge' in nic_dict:
8805         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8806
8807       new_nic_params = _GetUpdatedParams(old_nic_params,
8808                                          update_params_dict)
8809       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8810       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8811       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8812       self.nic_pinst[nic_op] = new_nic_params
8813       self.nic_pnew[nic_op] = new_filled_nic_params
8814       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8815
8816       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8817         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8818         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8819         if msg:
8820           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8821           if self.op.force:
8822             self.warn.append(msg)
8823           else:
8824             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8825       if new_nic_mode == constants.NIC_MODE_ROUTED:
8826         if 'ip' in nic_dict:
8827           nic_ip = nic_dict['ip']
8828         else:
8829           nic_ip = old_nic_ip
8830         if nic_ip is None:
8831           raise errors.OpPrereqError('Cannot set the nic ip to None'
8832                                      ' on a routed nic', errors.ECODE_INVAL)
8833       if 'mac' in nic_dict:
8834         nic_mac = nic_dict['mac']
8835         if nic_mac is None:
8836           raise errors.OpPrereqError('Cannot set the nic mac to None',
8837                                      errors.ECODE_INVAL)
8838         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8839           # otherwise generate the mac
8840           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8841         else:
8842           # or validate/reserve the current one
8843           try:
8844             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8845           except errors.ReservationError:
8846             raise errors.OpPrereqError("MAC address %s already in use"
8847                                        " in cluster" % nic_mac,
8848                                        errors.ECODE_NOTUNIQUE)
8849
8850     # DISK processing
8851     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8852       raise errors.OpPrereqError("Disk operations not supported for"
8853                                  " diskless instances",
8854                                  errors.ECODE_INVAL)
8855     for disk_op, _ in self.op.disks:
8856       if disk_op == constants.DDM_REMOVE:
8857         if len(instance.disks) == 1:
8858           raise errors.OpPrereqError("Cannot remove the last disk of"
8859                                      " an instance", errors.ECODE_INVAL)
8860         _CheckInstanceDown(self, instance, "cannot remove disks")
8861
8862       if (disk_op == constants.DDM_ADD and
8863           len(instance.nics) >= constants.MAX_DISKS):
8864         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8865                                    " add more" % constants.MAX_DISKS,
8866                                    errors.ECODE_STATE)
8867       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8868         # an existing disk
8869         if disk_op < 0 or disk_op >= len(instance.disks):
8870           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8871                                      " are 0 to %d" %
8872                                      (disk_op, len(instance.disks)),
8873                                      errors.ECODE_INVAL)
8874
8875     return
8876
8877   def _ConvertPlainToDrbd(self, feedback_fn):
8878     """Converts an instance from plain to drbd.
8879
8880     """
8881     feedback_fn("Converting template to drbd")
8882     instance = self.instance
8883     pnode = instance.primary_node
8884     snode = self.op.remote_node
8885
8886     # create a fake disk info for _GenerateDiskTemplate
8887     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8888     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8889                                       instance.name, pnode, [snode],
8890                                       disk_info, None, None, 0)
8891     info = _GetInstanceInfoText(instance)
8892     feedback_fn("Creating aditional volumes...")
8893     # first, create the missing data and meta devices
8894     for disk in new_disks:
8895       # unfortunately this is... not too nice
8896       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8897                             info, True)
8898       for child in disk.children:
8899         _CreateSingleBlockDev(self, snode, instance, child, info, True)
8900     # at this stage, all new LVs have been created, we can rename the
8901     # old ones
8902     feedback_fn("Renaming original volumes...")
8903     rename_list = [(o, n.children[0].logical_id)
8904                    for (o, n) in zip(instance.disks, new_disks)]
8905     result = self.rpc.call_blockdev_rename(pnode, rename_list)
8906     result.Raise("Failed to rename original LVs")
8907
8908     feedback_fn("Initializing DRBD devices...")
8909     # all child devices are in place, we can now create the DRBD devices
8910     for disk in new_disks:
8911       for node in [pnode, snode]:
8912         f_create = node == pnode
8913         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8914
8915     # at this point, the instance has been modified
8916     instance.disk_template = constants.DT_DRBD8
8917     instance.disks = new_disks
8918     self.cfg.Update(instance, feedback_fn)
8919
8920     # disks are created, waiting for sync
8921     disk_abort = not _WaitForSync(self, instance)
8922     if disk_abort:
8923       raise errors.OpExecError("There are some degraded disks for"
8924                                " this instance, please cleanup manually")
8925
8926   def _ConvertDrbdToPlain(self, feedback_fn):
8927     """Converts an instance from drbd to plain.
8928
8929     """
8930     instance = self.instance
8931     assert len(instance.secondary_nodes) == 1
8932     pnode = instance.primary_node
8933     snode = instance.secondary_nodes[0]
8934     feedback_fn("Converting template to plain")
8935
8936     old_disks = instance.disks
8937     new_disks = [d.children[0] for d in old_disks]
8938
8939     # copy over size and mode
8940     for parent, child in zip(old_disks, new_disks):
8941       child.size = parent.size
8942       child.mode = parent.mode
8943
8944     # update instance structure
8945     instance.disks = new_disks
8946     instance.disk_template = constants.DT_PLAIN
8947     self.cfg.Update(instance, feedback_fn)
8948
8949     feedback_fn("Removing volumes on the secondary node...")
8950     for disk in old_disks:
8951       self.cfg.SetDiskID(disk, snode)
8952       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8953       if msg:
8954         self.LogWarning("Could not remove block device %s on node %s,"
8955                         " continuing anyway: %s", disk.iv_name, snode, msg)
8956
8957     feedback_fn("Removing unneeded volumes on the primary node...")
8958     for idx, disk in enumerate(old_disks):
8959       meta = disk.children[1]
8960       self.cfg.SetDiskID(meta, pnode)
8961       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8962       if msg:
8963         self.LogWarning("Could not remove metadata for disk %d on node %s,"
8964                         " continuing anyway: %s", idx, pnode, msg)
8965
8966
8967   def Exec(self, feedback_fn):
8968     """Modifies an instance.
8969
8970     All parameters take effect only at the next restart of the instance.
8971
8972     """
8973     # Process here the warnings from CheckPrereq, as we don't have a
8974     # feedback_fn there.
8975     for warn in self.warn:
8976       feedback_fn("WARNING: %s" % warn)
8977
8978     result = []
8979     instance = self.instance
8980     # disk changes
8981     for disk_op, disk_dict in self.op.disks:
8982       if disk_op == constants.DDM_REMOVE:
8983         # remove the last disk
8984         device = instance.disks.pop()
8985         device_idx = len(instance.disks)
8986         for node, disk in device.ComputeNodeTree(instance.primary_node):
8987           self.cfg.SetDiskID(disk, node)
8988           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8989           if msg:
8990             self.LogWarning("Could not remove disk/%d on node %s: %s,"
8991                             " continuing anyway", device_idx, node, msg)
8992         result.append(("disk/%d" % device_idx, "remove"))
8993       elif disk_op == constants.DDM_ADD:
8994         # add a new disk
8995         if instance.disk_template == constants.DT_FILE:
8996           file_driver, file_path = instance.disks[0].logical_id
8997           file_path = os.path.dirname(file_path)
8998         else:
8999           file_driver = file_path = None
9000         disk_idx_base = len(instance.disks)
9001         new_disk = _GenerateDiskTemplate(self,
9002                                          instance.disk_template,
9003                                          instance.name, instance.primary_node,
9004                                          instance.secondary_nodes,
9005                                          [disk_dict],
9006                                          file_path,
9007                                          file_driver,
9008                                          disk_idx_base)[0]
9009         instance.disks.append(new_disk)
9010         info = _GetInstanceInfoText(instance)
9011
9012         logging.info("Creating volume %s for instance %s",
9013                      new_disk.iv_name, instance.name)
9014         # Note: this needs to be kept in sync with _CreateDisks
9015         #HARDCODE
9016         for node in instance.all_nodes:
9017           f_create = node == instance.primary_node
9018           try:
9019             _CreateBlockDev(self, node, instance, new_disk,
9020                             f_create, info, f_create)
9021           except errors.OpExecError, err:
9022             self.LogWarning("Failed to create volume %s (%s) on"
9023                             " node %s: %s",
9024                             new_disk.iv_name, new_disk, node, err)
9025         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9026                        (new_disk.size, new_disk.mode)))
9027       else:
9028         # change a given disk
9029         instance.disks[disk_op].mode = disk_dict['mode']
9030         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9031
9032     if self.op.disk_template:
9033       r_shut = _ShutdownInstanceDisks(self, instance)
9034       if not r_shut:
9035         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9036                                  " proceed with disk template conversion")
9037       mode = (instance.disk_template, self.op.disk_template)
9038       try:
9039         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9040       except:
9041         self.cfg.ReleaseDRBDMinors(instance.name)
9042         raise
9043       result.append(("disk_template", self.op.disk_template))
9044
9045     # NIC changes
9046     for nic_op, nic_dict in self.op.nics:
9047       if nic_op == constants.DDM_REMOVE:
9048         # remove the last nic
9049         del instance.nics[-1]
9050         result.append(("nic.%d" % len(instance.nics), "remove"))
9051       elif nic_op == constants.DDM_ADD:
9052         # mac and bridge should be set, by now
9053         mac = nic_dict['mac']
9054         ip = nic_dict.get('ip', None)
9055         nicparams = self.nic_pinst[constants.DDM_ADD]
9056         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9057         instance.nics.append(new_nic)
9058         result.append(("nic.%d" % (len(instance.nics) - 1),
9059                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9060                        (new_nic.mac, new_nic.ip,
9061                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9062                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9063                        )))
9064       else:
9065         for key in 'mac', 'ip':
9066           if key in nic_dict:
9067             setattr(instance.nics[nic_op], key, nic_dict[key])
9068         if nic_op in self.nic_pinst:
9069           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9070         for key, val in nic_dict.iteritems():
9071           result.append(("nic.%s/%d" % (key, nic_op), val))
9072
9073     # hvparams changes
9074     if self.op.hvparams:
9075       instance.hvparams = self.hv_inst
9076       for key, val in self.op.hvparams.iteritems():
9077         result.append(("hv/%s" % key, val))
9078
9079     # beparams changes
9080     if self.op.beparams:
9081       instance.beparams = self.be_inst
9082       for key, val in self.op.beparams.iteritems():
9083         result.append(("be/%s" % key, val))
9084
9085     # OS change
9086     if self.op.os_name:
9087       instance.os = self.op.os_name
9088
9089     # osparams changes
9090     if self.op.osparams:
9091       instance.osparams = self.os_inst
9092       for key, val in self.op.osparams.iteritems():
9093         result.append(("os/%s" % key, val))
9094
9095     self.cfg.Update(instance, feedback_fn)
9096
9097     return result
9098
9099   _DISK_CONVERSIONS = {
9100     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9101     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9102     }
9103
9104
9105 class LUQueryExports(NoHooksLU):
9106   """Query the exports list
9107
9108   """
9109   _OP_PARAMS = [
9110     ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9111     ("use_locking", False, _TBool),
9112     ]
9113   REQ_BGL = False
9114
9115   def ExpandNames(self):
9116     self.needed_locks = {}
9117     self.share_locks[locking.LEVEL_NODE] = 1
9118     if not self.op.nodes:
9119       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9120     else:
9121       self.needed_locks[locking.LEVEL_NODE] = \
9122         _GetWantedNodes(self, self.op.nodes)
9123
9124   def Exec(self, feedback_fn):
9125     """Compute the list of all the exported system images.
9126
9127     @rtype: dict
9128     @return: a dictionary with the structure node->(export-list)
9129         where export-list is a list of the instances exported on
9130         that node.
9131
9132     """
9133     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9134     rpcresult = self.rpc.call_export_list(self.nodes)
9135     result = {}
9136     for node in rpcresult:
9137       if rpcresult[node].fail_msg:
9138         result[node] = False
9139       else:
9140         result[node] = rpcresult[node].payload
9141
9142     return result
9143
9144
9145 class LUPrepareExport(NoHooksLU):
9146   """Prepares an instance for an export and returns useful information.
9147
9148   """
9149   _OP_PARAMS = [
9150     _PInstanceName,
9151     ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9152     ]
9153   REQ_BGL = False
9154
9155   def ExpandNames(self):
9156     self._ExpandAndLockInstance()
9157
9158   def CheckPrereq(self):
9159     """Check prerequisites.
9160
9161     """
9162     instance_name = self.op.instance_name
9163
9164     self.instance = self.cfg.GetInstanceInfo(instance_name)
9165     assert self.instance is not None, \
9166           "Cannot retrieve locked instance %s" % self.op.instance_name
9167     _CheckNodeOnline(self, self.instance.primary_node)
9168
9169     self._cds = _GetClusterDomainSecret()
9170
9171   def Exec(self, feedback_fn):
9172     """Prepares an instance for an export.
9173
9174     """
9175     instance = self.instance
9176
9177     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9178       salt = utils.GenerateSecret(8)
9179
9180       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9181       result = self.rpc.call_x509_cert_create(instance.primary_node,
9182                                               constants.RIE_CERT_VALIDITY)
9183       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9184
9185       (name, cert_pem) = result.payload
9186
9187       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9188                                              cert_pem)
9189
9190       return {
9191         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9192         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9193                           salt),
9194         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9195         }
9196
9197     return None
9198
9199
9200 class LUExportInstance(LogicalUnit):
9201   """Export an instance to an image in the cluster.
9202
9203   """
9204   HPATH = "instance-export"
9205   HTYPE = constants.HTYPE_INSTANCE
9206   _OP_PARAMS = [
9207     _PInstanceName,
9208     ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9209     ("shutdown", True, _TBool),
9210     _PShutdownTimeout,
9211     ("remove_instance", False, _TBool),
9212     ("ignore_remove_failures", False, _TBool),
9213     ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9214     ("x509_key_name", None, _TOr(_TList, _TNone)),
9215     ("destination_x509_ca", None, _TMaybeString),
9216     ]
9217   REQ_BGL = False
9218
9219   def CheckArguments(self):
9220     """Check the arguments.
9221
9222     """
9223     self.x509_key_name = self.op.x509_key_name
9224     self.dest_x509_ca_pem = self.op.destination_x509_ca
9225
9226     if self.op.remove_instance and not self.op.shutdown:
9227       raise errors.OpPrereqError("Can not remove instance without shutting it"
9228                                  " down before")
9229
9230     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9231       if not self.x509_key_name:
9232         raise errors.OpPrereqError("Missing X509 key name for encryption",
9233                                    errors.ECODE_INVAL)
9234
9235       if not self.dest_x509_ca_pem:
9236         raise errors.OpPrereqError("Missing destination X509 CA",
9237                                    errors.ECODE_INVAL)
9238
9239   def ExpandNames(self):
9240     self._ExpandAndLockInstance()
9241
9242     # Lock all nodes for local exports
9243     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9244       # FIXME: lock only instance primary and destination node
9245       #
9246       # Sad but true, for now we have do lock all nodes, as we don't know where
9247       # the previous export might be, and in this LU we search for it and
9248       # remove it from its current node. In the future we could fix this by:
9249       #  - making a tasklet to search (share-lock all), then create the
9250       #    new one, then one to remove, after
9251       #  - removing the removal operation altogether
9252       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9253
9254   def DeclareLocks(self, level):
9255     """Last minute lock declaration."""
9256     # All nodes are locked anyway, so nothing to do here.
9257
9258   def BuildHooksEnv(self):
9259     """Build hooks env.
9260
9261     This will run on the master, primary node and target node.
9262
9263     """
9264     env = {
9265       "EXPORT_MODE": self.op.mode,
9266       "EXPORT_NODE": self.op.target_node,
9267       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9268       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9269       # TODO: Generic function for boolean env variables
9270       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9271       }
9272
9273     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9274
9275     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9276
9277     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9278       nl.append(self.op.target_node)
9279
9280     return env, nl, nl
9281
9282   def CheckPrereq(self):
9283     """Check prerequisites.
9284
9285     This checks that the instance and node names are valid.
9286
9287     """
9288     instance_name = self.op.instance_name
9289
9290     self.instance = self.cfg.GetInstanceInfo(instance_name)
9291     assert self.instance is not None, \
9292           "Cannot retrieve locked instance %s" % self.op.instance_name
9293     _CheckNodeOnline(self, self.instance.primary_node)
9294
9295     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9296       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9297       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9298       assert self.dst_node is not None
9299
9300       _CheckNodeOnline(self, self.dst_node.name)
9301       _CheckNodeNotDrained(self, self.dst_node.name)
9302
9303       self._cds = None
9304       self.dest_disk_info = None
9305       self.dest_x509_ca = None
9306
9307     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9308       self.dst_node = None
9309
9310       if len(self.op.target_node) != len(self.instance.disks):
9311         raise errors.OpPrereqError(("Received destination information for %s"
9312                                     " disks, but instance %s has %s disks") %
9313                                    (len(self.op.target_node), instance_name,
9314                                     len(self.instance.disks)),
9315                                    errors.ECODE_INVAL)
9316
9317       cds = _GetClusterDomainSecret()
9318
9319       # Check X509 key name
9320       try:
9321         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9322       except (TypeError, ValueError), err:
9323         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9324
9325       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9326         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9327                                    errors.ECODE_INVAL)
9328
9329       # Load and verify CA
9330       try:
9331         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9332       except OpenSSL.crypto.Error, err:
9333         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9334                                    (err, ), errors.ECODE_INVAL)
9335
9336       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9337       if errcode is not None:
9338         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9339                                    (msg, ), errors.ECODE_INVAL)
9340
9341       self.dest_x509_ca = cert
9342
9343       # Verify target information
9344       disk_info = []
9345       for idx, disk_data in enumerate(self.op.target_node):
9346         try:
9347           (host, port, magic) = \
9348             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9349         except errors.GenericError, err:
9350           raise errors.OpPrereqError("Target info for disk %s: %s" %
9351                                      (idx, err), errors.ECODE_INVAL)
9352
9353         disk_info.append((host, port, magic))
9354
9355       assert len(disk_info) == len(self.op.target_node)
9356       self.dest_disk_info = disk_info
9357
9358     else:
9359       raise errors.ProgrammerError("Unhandled export mode %r" %
9360                                    self.op.mode)
9361
9362     # instance disk type verification
9363     # TODO: Implement export support for file-based disks
9364     for disk in self.instance.disks:
9365       if disk.dev_type == constants.LD_FILE:
9366         raise errors.OpPrereqError("Export not supported for instances with"
9367                                    " file-based disks", errors.ECODE_INVAL)
9368
9369   def _CleanupExports(self, feedback_fn):
9370     """Removes exports of current instance from all other nodes.
9371
9372     If an instance in a cluster with nodes A..D was exported to node C, its
9373     exports will be removed from the nodes A, B and D.
9374
9375     """
9376     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9377
9378     nodelist = self.cfg.GetNodeList()
9379     nodelist.remove(self.dst_node.name)
9380
9381     # on one-node clusters nodelist will be empty after the removal
9382     # if we proceed the backup would be removed because OpQueryExports
9383     # substitutes an empty list with the full cluster node list.
9384     iname = self.instance.name
9385     if nodelist:
9386       feedback_fn("Removing old exports for instance %s" % iname)
9387       exportlist = self.rpc.call_export_list(nodelist)
9388       for node in exportlist:
9389         if exportlist[node].fail_msg:
9390           continue
9391         if iname in exportlist[node].payload:
9392           msg = self.rpc.call_export_remove(node, iname).fail_msg
9393           if msg:
9394             self.LogWarning("Could not remove older export for instance %s"
9395                             " on node %s: %s", iname, node, msg)
9396
9397   def Exec(self, feedback_fn):
9398     """Export an instance to an image in the cluster.
9399
9400     """
9401     assert self.op.mode in constants.EXPORT_MODES
9402
9403     instance = self.instance
9404     src_node = instance.primary_node
9405
9406     if self.op.shutdown:
9407       # shutdown the instance, but not the disks
9408       feedback_fn("Shutting down instance %s" % instance.name)
9409       result = self.rpc.call_instance_shutdown(src_node, instance,
9410                                                self.op.shutdown_timeout)
9411       # TODO: Maybe ignore failures if ignore_remove_failures is set
9412       result.Raise("Could not shutdown instance %s on"
9413                    " node %s" % (instance.name, src_node))
9414
9415     # set the disks ID correctly since call_instance_start needs the
9416     # correct drbd minor to create the symlinks
9417     for disk in instance.disks:
9418       self.cfg.SetDiskID(disk, src_node)
9419
9420     activate_disks = (not instance.admin_up)
9421
9422     if activate_disks:
9423       # Activate the instance disks if we'exporting a stopped instance
9424       feedback_fn("Activating disks for %s" % instance.name)
9425       _StartInstanceDisks(self, instance, None)
9426
9427     try:
9428       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9429                                                      instance)
9430
9431       helper.CreateSnapshots()
9432       try:
9433         if (self.op.shutdown and instance.admin_up and
9434             not self.op.remove_instance):
9435           assert not activate_disks
9436           feedback_fn("Starting instance %s" % instance.name)
9437           result = self.rpc.call_instance_start(src_node, instance, None, None)
9438           msg = result.fail_msg
9439           if msg:
9440             feedback_fn("Failed to start instance: %s" % msg)
9441             _ShutdownInstanceDisks(self, instance)
9442             raise errors.OpExecError("Could not start instance: %s" % msg)
9443
9444         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9445           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9446         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9447           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9448           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9449
9450           (key_name, _, _) = self.x509_key_name
9451
9452           dest_ca_pem = \
9453             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9454                                             self.dest_x509_ca)
9455
9456           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9457                                                      key_name, dest_ca_pem,
9458                                                      timeouts)
9459       finally:
9460         helper.Cleanup()
9461
9462       # Check for backwards compatibility
9463       assert len(dresults) == len(instance.disks)
9464       assert compat.all(isinstance(i, bool) for i in dresults), \
9465              "Not all results are boolean: %r" % dresults
9466
9467     finally:
9468       if activate_disks:
9469         feedback_fn("Deactivating disks for %s" % instance.name)
9470         _ShutdownInstanceDisks(self, instance)
9471
9472     # Remove instance if requested
9473     if self.op.remove_instance:
9474       if not (compat.all(dresults) and fin_resu):
9475         feedback_fn("Not removing instance %s as parts of the export failed" %
9476                     instance.name)
9477       else:
9478         feedback_fn("Removing instance %s" % instance.name)
9479         _RemoveInstance(self, feedback_fn, instance,
9480                         self.op.ignore_remove_failures)
9481
9482     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9483       self._CleanupExports(feedback_fn)
9484
9485     return fin_resu, dresults
9486
9487
9488 class LURemoveExport(NoHooksLU):
9489   """Remove exports related to the named instance.
9490
9491   """
9492   _OP_PARAMS = [
9493     _PInstanceName,
9494     ]
9495   REQ_BGL = False
9496
9497   def ExpandNames(self):
9498     self.needed_locks = {}
9499     # We need all nodes to be locked in order for RemoveExport to work, but we
9500     # don't need to lock the instance itself, as nothing will happen to it (and
9501     # we can remove exports also for a removed instance)
9502     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9503
9504   def Exec(self, feedback_fn):
9505     """Remove any export.
9506
9507     """
9508     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9509     # If the instance was not found we'll try with the name that was passed in.
9510     # This will only work if it was an FQDN, though.
9511     fqdn_warn = False
9512     if not instance_name:
9513       fqdn_warn = True
9514       instance_name = self.op.instance_name
9515
9516     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9517     exportlist = self.rpc.call_export_list(locked_nodes)
9518     found = False
9519     for node in exportlist:
9520       msg = exportlist[node].fail_msg
9521       if msg:
9522         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9523         continue
9524       if instance_name in exportlist[node].payload:
9525         found = True
9526         result = self.rpc.call_export_remove(node, instance_name)
9527         msg = result.fail_msg
9528         if msg:
9529           logging.error("Could not remove export for instance %s"
9530                         " on node %s: %s", instance_name, node, msg)
9531
9532     if fqdn_warn and not found:
9533       feedback_fn("Export not found. If trying to remove an export belonging"
9534                   " to a deleted instance please use its Fully Qualified"
9535                   " Domain Name.")
9536
9537
9538 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9539   """Generic tags LU.
9540
9541   This is an abstract class which is the parent of all the other tags LUs.
9542
9543   """
9544
9545   def ExpandNames(self):
9546     self.needed_locks = {}
9547     if self.op.kind == constants.TAG_NODE:
9548       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9549       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9550     elif self.op.kind == constants.TAG_INSTANCE:
9551       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9552       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9553
9554   def CheckPrereq(self):
9555     """Check prerequisites.
9556
9557     """
9558     if self.op.kind == constants.TAG_CLUSTER:
9559       self.target = self.cfg.GetClusterInfo()
9560     elif self.op.kind == constants.TAG_NODE:
9561       self.target = self.cfg.GetNodeInfo(self.op.name)
9562     elif self.op.kind == constants.TAG_INSTANCE:
9563       self.target = self.cfg.GetInstanceInfo(self.op.name)
9564     else:
9565       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9566                                  str(self.op.kind), errors.ECODE_INVAL)
9567
9568
9569 class LUGetTags(TagsLU):
9570   """Returns the tags of a given object.
9571
9572   """
9573   _OP_PARAMS = [
9574     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9575     ("name", _NoDefault, _TNonEmptyString),
9576     ]
9577   REQ_BGL = False
9578
9579   def Exec(self, feedback_fn):
9580     """Returns the tag list.
9581
9582     """
9583     return list(self.target.GetTags())
9584
9585
9586 class LUSearchTags(NoHooksLU):
9587   """Searches the tags for a given pattern.
9588
9589   """
9590   _OP_PARAMS = [
9591     ("pattern", _NoDefault, _TNonEmptyString),
9592     ]
9593   REQ_BGL = False
9594
9595   def ExpandNames(self):
9596     self.needed_locks = {}
9597
9598   def CheckPrereq(self):
9599     """Check prerequisites.
9600
9601     This checks the pattern passed for validity by compiling it.
9602
9603     """
9604     try:
9605       self.re = re.compile(self.op.pattern)
9606     except re.error, err:
9607       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9608                                  (self.op.pattern, err), errors.ECODE_INVAL)
9609
9610   def Exec(self, feedback_fn):
9611     """Returns the tag list.
9612
9613     """
9614     cfg = self.cfg
9615     tgts = [("/cluster", cfg.GetClusterInfo())]
9616     ilist = cfg.GetAllInstancesInfo().values()
9617     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9618     nlist = cfg.GetAllNodesInfo().values()
9619     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9620     results = []
9621     for path, target in tgts:
9622       for tag in target.GetTags():
9623         if self.re.search(tag):
9624           results.append((path, tag))
9625     return results
9626
9627
9628 class LUAddTags(TagsLU):
9629   """Sets a tag on a given object.
9630
9631   """
9632   _OP_PARAMS = [
9633     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9634     ("name", _NoDefault, _TNonEmptyString),
9635     ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9636     ]
9637   REQ_BGL = False
9638
9639   def CheckPrereq(self):
9640     """Check prerequisites.
9641
9642     This checks the type and length of the tag name and value.
9643
9644     """
9645     TagsLU.CheckPrereq(self)
9646     for tag in self.op.tags:
9647       objects.TaggableObject.ValidateTag(tag)
9648
9649   def Exec(self, feedback_fn):
9650     """Sets the tag.
9651
9652     """
9653     try:
9654       for tag in self.op.tags:
9655         self.target.AddTag(tag)
9656     except errors.TagError, err:
9657       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9658     self.cfg.Update(self.target, feedback_fn)
9659
9660
9661 class LUDelTags(TagsLU):
9662   """Delete a list of tags from a given object.
9663
9664   """
9665   _OP_PARAMS = [
9666     ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9667     ("name", _NoDefault, _TNonEmptyString),
9668     ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9669     ]
9670   REQ_BGL = False
9671
9672   def CheckPrereq(self):
9673     """Check prerequisites.
9674
9675     This checks that we have the given tag.
9676
9677     """
9678     TagsLU.CheckPrereq(self)
9679     for tag in self.op.tags:
9680       objects.TaggableObject.ValidateTag(tag)
9681     del_tags = frozenset(self.op.tags)
9682     cur_tags = self.target.GetTags()
9683     if not del_tags <= cur_tags:
9684       diff_tags = del_tags - cur_tags
9685       diff_names = ["'%s'" % tag for tag in diff_tags]
9686       diff_names.sort()
9687       raise errors.OpPrereqError("Tag(s) %s not found" %
9688                                  (",".join(diff_names)), errors.ECODE_NOENT)
9689
9690   def Exec(self, feedback_fn):
9691     """Remove the tag from the object.
9692
9693     """
9694     for tag in self.op.tags:
9695       self.target.RemoveTag(tag)
9696     self.cfg.Update(self.target, feedback_fn)
9697
9698
9699 class LUTestDelay(NoHooksLU):
9700   """Sleep for a specified amount of time.
9701
9702   This LU sleeps on the master and/or nodes for a specified amount of
9703   time.
9704
9705   """
9706   _OP_PARAMS = [
9707     ("duration", _NoDefault, _TFloat),
9708     ("on_master", True, _TBool),
9709     ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9710     ("repeat", 0, _TPositiveInt)
9711     ]
9712   REQ_BGL = False
9713
9714   def ExpandNames(self):
9715     """Expand names and set required locks.
9716
9717     This expands the node list, if any.
9718
9719     """
9720     self.needed_locks = {}
9721     if self.op.on_nodes:
9722       # _GetWantedNodes can be used here, but is not always appropriate to use
9723       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9724       # more information.
9725       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9726       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9727
9728   def _TestDelay(self):
9729     """Do the actual sleep.
9730
9731     """
9732     if self.op.on_master:
9733       if not utils.TestDelay(self.op.duration):
9734         raise errors.OpExecError("Error during master delay test")
9735     if self.op.on_nodes:
9736       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9737       for node, node_result in result.items():
9738         node_result.Raise("Failure during rpc call to node %s" % node)
9739
9740   def Exec(self, feedback_fn):
9741     """Execute the test delay opcode, with the wanted repetitions.
9742
9743     """
9744     if self.op.repeat == 0:
9745       self._TestDelay()
9746     else:
9747       top_value = self.op.repeat - 1
9748       for i in range(self.op.repeat):
9749         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9750         self._TestDelay()
9751
9752
9753 class IAllocator(object):
9754   """IAllocator framework.
9755
9756   An IAllocator instance has three sets of attributes:
9757     - cfg that is needed to query the cluster
9758     - input data (all members of the _KEYS class attribute are required)
9759     - four buffer attributes (in|out_data|text), that represent the
9760       input (to the external script) in text and data structure format,
9761       and the output from it, again in two formats
9762     - the result variables from the script (success, info, nodes) for
9763       easy usage
9764
9765   """
9766   # pylint: disable-msg=R0902
9767   # lots of instance attributes
9768   _ALLO_KEYS = [
9769     "name", "mem_size", "disks", "disk_template",
9770     "os", "tags", "nics", "vcpus", "hypervisor",
9771     ]
9772   _RELO_KEYS = [
9773     "name", "relocate_from",
9774     ]
9775   _EVAC_KEYS = [
9776     "evac_nodes",
9777     ]
9778
9779   def __init__(self, cfg, rpc, mode, **kwargs):
9780     self.cfg = cfg
9781     self.rpc = rpc
9782     # init buffer variables
9783     self.in_text = self.out_text = self.in_data = self.out_data = None
9784     # init all input fields so that pylint is happy
9785     self.mode = mode
9786     self.mem_size = self.disks = self.disk_template = None
9787     self.os = self.tags = self.nics = self.vcpus = None
9788     self.hypervisor = None
9789     self.relocate_from = None
9790     self.name = None
9791     self.evac_nodes = None
9792     # computed fields
9793     self.required_nodes = None
9794     # init result fields
9795     self.success = self.info = self.result = None
9796     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9797       keyset = self._ALLO_KEYS
9798       fn = self._AddNewInstance
9799     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9800       keyset = self._RELO_KEYS
9801       fn = self._AddRelocateInstance
9802     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9803       keyset = self._EVAC_KEYS
9804       fn = self._AddEvacuateNodes
9805     else:
9806       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9807                                    " IAllocator" % self.mode)
9808     for key in kwargs:
9809       if key not in keyset:
9810         raise errors.ProgrammerError("Invalid input parameter '%s' to"
9811                                      " IAllocator" % key)
9812       setattr(self, key, kwargs[key])
9813
9814     for key in keyset:
9815       if key not in kwargs:
9816         raise errors.ProgrammerError("Missing input parameter '%s' to"
9817                                      " IAllocator" % key)
9818     self._BuildInputData(fn)
9819
9820   def _ComputeClusterData(self):
9821     """Compute the generic allocator input data.
9822
9823     This is the data that is independent of the actual operation.
9824
9825     """
9826     cfg = self.cfg
9827     cluster_info = cfg.GetClusterInfo()
9828     # cluster data
9829     data = {
9830       "version": constants.IALLOCATOR_VERSION,
9831       "cluster_name": cfg.GetClusterName(),
9832       "cluster_tags": list(cluster_info.GetTags()),
9833       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9834       # we don't have job IDs
9835       }
9836     iinfo = cfg.GetAllInstancesInfo().values()
9837     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9838
9839     # node data
9840     node_results = {}
9841     node_list = cfg.GetNodeList()
9842
9843     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9844       hypervisor_name = self.hypervisor
9845     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9846       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9847     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9848       hypervisor_name = cluster_info.enabled_hypervisors[0]
9849
9850     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9851                                         hypervisor_name)
9852     node_iinfo = \
9853       self.rpc.call_all_instances_info(node_list,
9854                                        cluster_info.enabled_hypervisors)
9855     for nname, nresult in node_data.items():
9856       # first fill in static (config-based) values
9857       ninfo = cfg.GetNodeInfo(nname)
9858       pnr = {
9859         "tags": list(ninfo.GetTags()),
9860         "primary_ip": ninfo.primary_ip,
9861         "secondary_ip": ninfo.secondary_ip,
9862         "offline": ninfo.offline,
9863         "drained": ninfo.drained,
9864         "master_candidate": ninfo.master_candidate,
9865         }
9866
9867       if not (ninfo.offline or ninfo.drained):
9868         nresult.Raise("Can't get data for node %s" % nname)
9869         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9870                                 nname)
9871         remote_info = nresult.payload
9872
9873         for attr in ['memory_total', 'memory_free', 'memory_dom0',
9874                      'vg_size', 'vg_free', 'cpu_total']:
9875           if attr not in remote_info:
9876             raise errors.OpExecError("Node '%s' didn't return attribute"
9877                                      " '%s'" % (nname, attr))
9878           if not isinstance(remote_info[attr], int):
9879             raise errors.OpExecError("Node '%s' returned invalid value"
9880                                      " for '%s': %s" %
9881                                      (nname, attr, remote_info[attr]))
9882         # compute memory used by primary instances
9883         i_p_mem = i_p_up_mem = 0
9884         for iinfo, beinfo in i_list:
9885           if iinfo.primary_node == nname:
9886             i_p_mem += beinfo[constants.BE_MEMORY]
9887             if iinfo.name not in node_iinfo[nname].payload:
9888               i_used_mem = 0
9889             else:
9890               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9891             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9892             remote_info['memory_free'] -= max(0, i_mem_diff)
9893
9894             if iinfo.admin_up:
9895               i_p_up_mem += beinfo[constants.BE_MEMORY]
9896
9897         # compute memory used by instances
9898         pnr_dyn = {
9899           "total_memory": remote_info['memory_total'],
9900           "reserved_memory": remote_info['memory_dom0'],
9901           "free_memory": remote_info['memory_free'],
9902           "total_disk": remote_info['vg_size'],
9903           "free_disk": remote_info['vg_free'],
9904           "total_cpus": remote_info['cpu_total'],
9905           "i_pri_memory": i_p_mem,
9906           "i_pri_up_memory": i_p_up_mem,
9907           }
9908         pnr.update(pnr_dyn)
9909
9910       node_results[nname] = pnr
9911     data["nodes"] = node_results
9912
9913     # instance data
9914     instance_data = {}
9915     for iinfo, beinfo in i_list:
9916       nic_data = []
9917       for nic in iinfo.nics:
9918         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9919         nic_dict = {"mac": nic.mac,
9920                     "ip": nic.ip,
9921                     "mode": filled_params[constants.NIC_MODE],
9922                     "link": filled_params[constants.NIC_LINK],
9923                    }
9924         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9925           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9926         nic_data.append(nic_dict)
9927       pir = {
9928         "tags": list(iinfo.GetTags()),
9929         "admin_up": iinfo.admin_up,
9930         "vcpus": beinfo[constants.BE_VCPUS],
9931         "memory": beinfo[constants.BE_MEMORY],
9932         "os": iinfo.os,
9933         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9934         "nics": nic_data,
9935         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9936         "disk_template": iinfo.disk_template,
9937         "hypervisor": iinfo.hypervisor,
9938         }
9939       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9940                                                  pir["disks"])
9941       instance_data[iinfo.name] = pir
9942
9943     data["instances"] = instance_data
9944
9945     self.in_data = data
9946
9947   def _AddNewInstance(self):
9948     """Add new instance data to allocator structure.
9949
9950     This in combination with _AllocatorGetClusterData will create the
9951     correct structure needed as input for the allocator.
9952
9953     The checks for the completeness of the opcode must have already been
9954     done.
9955
9956     """
9957     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9958
9959     if self.disk_template in constants.DTS_NET_MIRROR:
9960       self.required_nodes = 2
9961     else:
9962       self.required_nodes = 1
9963     request = {
9964       "name": self.name,
9965       "disk_template": self.disk_template,
9966       "tags": self.tags,
9967       "os": self.os,
9968       "vcpus": self.vcpus,
9969       "memory": self.mem_size,
9970       "disks": self.disks,
9971       "disk_space_total": disk_space,
9972       "nics": self.nics,
9973       "required_nodes": self.required_nodes,
9974       }
9975     return request
9976
9977   def _AddRelocateInstance(self):
9978     """Add relocate instance data to allocator structure.
9979
9980     This in combination with _IAllocatorGetClusterData will create the
9981     correct structure needed as input for the allocator.
9982
9983     The checks for the completeness of the opcode must have already been
9984     done.
9985
9986     """
9987     instance = self.cfg.GetInstanceInfo(self.name)
9988     if instance is None:
9989       raise errors.ProgrammerError("Unknown instance '%s' passed to"
9990                                    " IAllocator" % self.name)
9991
9992     if instance.disk_template not in constants.DTS_NET_MIRROR:
9993       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9994                                  errors.ECODE_INVAL)
9995
9996     if len(instance.secondary_nodes) != 1:
9997       raise errors.OpPrereqError("Instance has not exactly one secondary node",
9998                                  errors.ECODE_STATE)
9999
10000     self.required_nodes = 1
10001     disk_sizes = [{'size': disk.size} for disk in instance.disks]
10002     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10003
10004     request = {
10005       "name": self.name,
10006       "disk_space_total": disk_space,
10007       "required_nodes": self.required_nodes,
10008       "relocate_from": self.relocate_from,
10009       }
10010     return request
10011
10012   def _AddEvacuateNodes(self):
10013     """Add evacuate nodes data to allocator structure.
10014
10015     """
10016     request = {
10017       "evac_nodes": self.evac_nodes
10018       }
10019     return request
10020
10021   def _BuildInputData(self, fn):
10022     """Build input data structures.
10023
10024     """
10025     self._ComputeClusterData()
10026
10027     request = fn()
10028     request["type"] = self.mode
10029     self.in_data["request"] = request
10030
10031     self.in_text = serializer.Dump(self.in_data)
10032
10033   def Run(self, name, validate=True, call_fn=None):
10034     """Run an instance allocator and return the results.
10035
10036     """
10037     if call_fn is None:
10038       call_fn = self.rpc.call_iallocator_runner
10039
10040     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10041     result.Raise("Failure while running the iallocator script")
10042
10043     self.out_text = result.payload
10044     if validate:
10045       self._ValidateResult()
10046
10047   def _ValidateResult(self):
10048     """Process the allocator results.
10049
10050     This will process and if successful save the result in
10051     self.out_data and the other parameters.
10052
10053     """
10054     try:
10055       rdict = serializer.Load(self.out_text)
10056     except Exception, err:
10057       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10058
10059     if not isinstance(rdict, dict):
10060       raise errors.OpExecError("Can't parse iallocator results: not a dict")
10061
10062     # TODO: remove backwards compatiblity in later versions
10063     if "nodes" in rdict and "result" not in rdict:
10064       rdict["result"] = rdict["nodes"]
10065       del rdict["nodes"]
10066
10067     for key in "success", "info", "result":
10068       if key not in rdict:
10069         raise errors.OpExecError("Can't parse iallocator results:"
10070                                  " missing key '%s'" % key)
10071       setattr(self, key, rdict[key])
10072
10073     if not isinstance(rdict["result"], list):
10074       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10075                                " is not a list")
10076     self.out_data = rdict
10077
10078
10079 class LUTestAllocator(NoHooksLU):
10080   """Run allocator tests.
10081
10082   This LU runs the allocator tests
10083
10084   """
10085   _OP_PARAMS = [
10086     ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10087     ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10088     ("name", _NoDefault, _TNonEmptyString),
10089     ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10090       _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10091                _TOr(_TNone, _TNonEmptyString))))),
10092     ("disks", _NoDefault, _TOr(_TNone, _TList)),
10093     ("hypervisor", None, _TMaybeString),
10094     ("allocator", None, _TMaybeString),
10095     ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10096     ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10097     ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10098     ("os", None, _TMaybeString),
10099     ("disk_template", None, _TMaybeString),
10100     ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10101     ]
10102
10103   def CheckPrereq(self):
10104     """Check prerequisites.
10105
10106     This checks the opcode parameters depending on the director and mode test.
10107
10108     """
10109     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10110       for attr in ["mem_size", "disks", "disk_template",
10111                    "os", "tags", "nics", "vcpus"]:
10112         if not hasattr(self.op, attr):
10113           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10114                                      attr, errors.ECODE_INVAL)
10115       iname = self.cfg.ExpandInstanceName(self.op.name)
10116       if iname is not None:
10117         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10118                                    iname, errors.ECODE_EXISTS)
10119       if not isinstance(self.op.nics, list):
10120         raise errors.OpPrereqError("Invalid parameter 'nics'",
10121                                    errors.ECODE_INVAL)
10122       if not isinstance(self.op.disks, list):
10123         raise errors.OpPrereqError("Invalid parameter 'disks'",
10124                                    errors.ECODE_INVAL)
10125       for row in self.op.disks:
10126         if (not isinstance(row, dict) or
10127             "size" not in row or
10128             not isinstance(row["size"], int) or
10129             "mode" not in row or
10130             row["mode"] not in ['r', 'w']):
10131           raise errors.OpPrereqError("Invalid contents of the 'disks'"
10132                                      " parameter", errors.ECODE_INVAL)
10133       if self.op.hypervisor is None:
10134         self.op.hypervisor = self.cfg.GetHypervisorType()
10135     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10136       fname = _ExpandInstanceName(self.cfg, self.op.name)
10137       self.op.name = fname
10138       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10139     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10140       if not hasattr(self.op, "evac_nodes"):
10141         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10142                                    " opcode input", errors.ECODE_INVAL)
10143     else:
10144       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10145                                  self.op.mode, errors.ECODE_INVAL)
10146
10147     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10148       if self.op.allocator is None:
10149         raise errors.OpPrereqError("Missing allocator name",
10150                                    errors.ECODE_INVAL)
10151     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10152       raise errors.OpPrereqError("Wrong allocator test '%s'" %
10153                                  self.op.direction, errors.ECODE_INVAL)
10154
10155   def Exec(self, feedback_fn):
10156     """Run the allocator test.
10157
10158     """
10159     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10160       ial = IAllocator(self.cfg, self.rpc,
10161                        mode=self.op.mode,
10162                        name=self.op.name,
10163                        mem_size=self.op.mem_size,
10164                        disks=self.op.disks,
10165                        disk_template=self.op.disk_template,
10166                        os=self.op.os,
10167                        tags=self.op.tags,
10168                        nics=self.op.nics,
10169                        vcpus=self.op.vcpus,
10170                        hypervisor=self.op.hypervisor,
10171                        )
10172     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10173       ial = IAllocator(self.cfg, self.rpc,
10174                        mode=self.op.mode,
10175                        name=self.op.name,
10176                        relocate_from=list(self.relocate_from),
10177                        )
10178     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10179       ial = IAllocator(self.cfg, self.rpc,
10180                        mode=self.op.mode,
10181                        evac_nodes=self.op.evac_nodes)
10182     else:
10183       raise errors.ProgrammerError("Uncatched mode %s in"
10184                                    " LUTestAllocator.Exec", self.op.mode)
10185
10186     if self.op.direction == constants.IALLOCATOR_DIR_IN:
10187       result = ial.in_text
10188     else:
10189       ial.Run(self.op.allocator, validate=False)
10190       result = ial.out_text
10191     return result