code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39
  40 from ganeti import ssh
  41 from ganeti import utils
  42 from ganeti import errors
  43 from ganeti import hypervisor
  44 from ganeti import locking
  45 from ganeti import constants
  46 from ganeti import objects
  47 from ganeti import serializer
  48 from ganeti import ssconf
  49 from ganeti import uidpool
  50 from ganeti import compat
  51 from ganeti import masterd
  52
  53 import ganeti.masterd.instance # pylint: disable-msg=W0611
  54
  55
  56 # Modifiable default values; need to define these here before the
  57 # actual LUs
  58
  59 def _EmptyList():
  60   """Returns an empty list.
  61
  62   """
  63   return []
  64
  65
  66 def _EmptyDict():
  67   """Returns an empty dict.
  68
  69   """
  70   return {}
  71
  72
  73 # Some basic types
  74 def _TNotNone(val):
  75   """Checks if the given value is not None.
  76
  77   """
  78   return val is not None
  79
  80
  81 def _TNone(val):
  82   """Checks if the given value is None.
  83
  84   """
  85   return val is None
  86
  87
  88 def _TBool(val):
  89   """Checks if the given value is a boolean.
  90
  91   """
  92   return isinstance(val, bool)
  93
  94
  95 def _TInt(val):
  96   """Checks if the given value is an integer.
  97
  98   """
  99   return isinstance(val, int)
 100
 101
 102 def _TFloat(val):
 103   """Checks if the given value is a float.
 104
 105   """
 106   return isinstance(val, float)
 107
 108
 109 def _TString(val):
 110   """Checks if the given value is a string.
 111
 112   """
 113   return isinstance(val, basestring)
 114
 115
 116 def _TTrue(val):
 117   """Checks if a given value evaluates to a boolean True value.
 118
 119   """
 120   return bool(val)
 121
 122
 123 def _TElemOf(target_list):
 124   """Builds a function that checks if a given value is a member of a list.
 125
 126   """
 127   return lambda val: val in target_list
 128
 129
 130 # Container types
 131 def _TList(val):
 132   """Checks if the given value is a list.
 133
 134   """
 135   return isinstance(val, list)
 136
 137
 138 def _TDict(val):
 139   """Checks if the given value is a dictionary.
 140
 141   """
 142   return isinstance(val, dict)
 143
 144
 145 # Combinator types
 146 def _TAnd(*args):
 147   """Combine multiple functions using an AND operation.
 148
 149   """
 150   def fn(val):
 151     return compat.all(t(val) for t in args)
 152   return fn
 153
 154
 155 def _TOr(*args):
 156   """Combine multiple functions using an AND operation.
 157
 158   """
 159   def fn(val):
 160     return compat.any(t(val) for t in args)
 161   return fn
 162
 163
 164 # Type aliases
 165
 166 # non-empty string
 167 _TNEString = _TAnd(_TString, _TTrue)
 168
 169
 170 # positive integer
 171 _TPInt = _TAnd(_TInt, lambda v: v >= 0)
 172
 173
 174 def _TListOf(my_type):
 175   """Checks if a given value is a list with all elements of the same type.
 176
 177   """
 178   return _TAnd(_TList,
 179                lambda lst: compat.all(lst, my_type))
 180
 181
 182 def _TDictOf(key_type, val_type):
 183   """Checks a dict type for the type of its key/values.
 184
 185   """
 186   return _TAnd(_TDict,
 187                lambda my_dict: (compat.all(my_dict.keys(), key_type) and
 188                                 compat.all(my_dict.values(), val_type)))
 189
 190
 191 # End types
 192 class LogicalUnit(object):
 193   """Logical Unit base class.
 194
 195   Subclasses must follow these rules:
 196     - implement ExpandNames
 197     - implement CheckPrereq (except when tasklets are used)
 198     - implement Exec (except when tasklets are used)
 199     - implement BuildHooksEnv
 200     - redefine HPATH and HTYPE
 201     - optionally redefine their run requirements:
 202         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 203
 204   Note that all commands require root permissions.
 205
 206   @ivar dry_run_result: the value (if any) that will be returned to the caller
 207       in dry-run mode (signalled by opcode dry_run parameter)
 208   @cvar _OP_DEFS: a list of opcode attributes and the defaults values
 209       they should get if not already existing
 210
 211   """
 212   HPATH = None
 213   HTYPE = None
 214   _OP_REQP = []
 215   _OP_DEFS = []
 216   REQ_BGL = True
 217
 218   def __init__(self, processor, op, context, rpc):
 219     """Constructor for LogicalUnit.
 220
 221     This needs to be overridden in derived classes in order to check op
 222     validity.
 223
 224     """
 225     self.proc = processor
 226     self.op = op
 227     self.cfg = context.cfg
 228     self.context = context
 229     self.rpc = rpc
 230     # Dicts used to declare locking needs to mcpu
 231     self.needed_locks = None
 232     self.acquired_locks = {}
 233     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 234     self.add_locks = {}
 235     self.remove_locks = {}
 236     # Used to force good behavior when calling helper functions
 237     self.recalculate_locks = {}
 238     self.__ssh = None
 239     # logging
 240     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 241     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 242     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 243     # support for dry-run
 244     self.dry_run_result = None
 245     # support for generic debug attribute
 246     if (not hasattr(self.op, "debug_level") or
 247         not isinstance(self.op.debug_level, int)):
 248       self.op.debug_level = 0
 249
 250     # Tasklets
 251     self.tasklets = None
 252
 253     for aname, aval in self._OP_DEFS:
 254       if not hasattr(self.op, aname):
 255         if callable(aval):
 256           dval = aval()
 257         else:
 258           dval = aval
 259         setattr(self.op, aname, dval)
 260
 261     for attr_name, test in self._OP_REQP:
 262       if not hasattr(op, attr_name):
 263         raise errors.OpPrereqError("Required parameter '%s' missing" %
 264                                    attr_name, errors.ECODE_INVAL)
 265       attr_val = getattr(op, attr_name, None)
 266       if not callable(test):
 267         raise errors.ProgrammerError("Validation for parameter '%s' failed,"
 268                                      " given type is not a proper type (%s)" %
 269                                      (attr_name, test))
 270       if not test(attr_val):
 271         raise errors.OpPrereqError("Parameter '%s' has invalid type" %
 272                                    attr_name, errors.ECODE_INVAL)
 273
 274     self.CheckArguments()
 275
 276   def __GetSSH(self):
 277     """Returns the SshRunner object
 278
 279     """
 280     if not self.__ssh:
 281       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 282     return self.__ssh
 283
 284   ssh = property(fget=__GetSSH)
 285
 286   def CheckArguments(self):
 287     """Check syntactic validity for the opcode arguments.
 288
 289     This method is for doing a simple syntactic check and ensure
 290     validity of opcode parameters, without any cluster-related
 291     checks. While the same can be accomplished in ExpandNames and/or
 292     CheckPrereq, doing these separate is better because:
 293
 294       - ExpandNames is left as as purely a lock-related function
 295       - CheckPrereq is run after we have acquired locks (and possible
 296         waited for them)
 297
 298     The function is allowed to change the self.op attribute so that
 299     later methods can no longer worry about missing parameters.
 300
 301     """
 302     pass
 303
 304   def ExpandNames(self):
 305     """Expand names for this LU.
 306
 307     This method is called before starting to execute the opcode, and it should
 308     update all the parameters of the opcode to their canonical form (e.g. a
 309     short node name must be fully expanded after this method has successfully
 310     completed). This way locking, hooks, logging, ecc. can work correctly.
 311
 312     LUs which implement this method must also populate the self.needed_locks
 313     member, as a dict with lock levels as keys, and a list of needed lock names
 314     as values. Rules:
 315
 316       - use an empty dict if you don't need any lock
 317       - if you don't need any lock at a particular level omit that level
 318       - don't put anything for the BGL level
 319       - if you want all locks at a level use locking.ALL_SET as a value
 320
 321     If you need to share locks (rather than acquire them exclusively) at one
 322     level you can modify self.share_locks, setting a true value (usually 1) for
 323     that level. By default locks are not shared.
 324
 325     This function can also define a list of tasklets, which then will be
 326     executed in order instead of the usual LU-level CheckPrereq and Exec
 327     functions, if those are not defined by the LU.
 328
 329     Examples::
 330
 331       # Acquire all nodes and one instance
 332       self.needed_locks = {
 333         locking.LEVEL_NODE: locking.ALL_SET,
 334         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 335       }
 336       # Acquire just two nodes
 337       self.needed_locks = {
 338         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 339       }
 340       # Acquire no locks
 341       self.needed_locks = {} # No, you can't leave it to the default value None
 342
 343     """
 344     # The implementation of this method is mandatory only if the new LU is
 345     # concurrent, so that old LUs don't need to be changed all at the same
 346     # time.
 347     if self.REQ_BGL:
 348       self.needed_locks = {} # Exclusive LUs don't need locks.
 349     else:
 350       raise NotImplementedError
 351
 352   def DeclareLocks(self, level):
 353     """Declare LU locking needs for a level
 354
 355     While most LUs can just declare their locking needs at ExpandNames time,
 356     sometimes there's the need to calculate some locks after having acquired
 357     the ones before. This function is called just before acquiring locks at a
 358     particular level, but after acquiring the ones at lower levels, and permits
 359     such calculations. It can be used to modify self.needed_locks, and by
 360     default it does nothing.
 361
 362     This function is only called if you have something already set in
 363     self.needed_locks for the level.
 364
 365     @param level: Locking level which is going to be locked
 366     @type level: member of ganeti.locking.LEVELS
 367
 368     """
 369
 370   def CheckPrereq(self):
 371     """Check prerequisites for this LU.
 372
 373     This method should check that the prerequisites for the execution
 374     of this LU are fulfilled. It can do internode communication, but
 375     it should be idempotent - no cluster or system changes are
 376     allowed.
 377
 378     The method should raise errors.OpPrereqError in case something is
 379     not fulfilled. Its return value is ignored.
 380
 381     This method should also update all the parameters of the opcode to
 382     their canonical form if it hasn't been done by ExpandNames before.
 383
 384     """
 385     if self.tasklets is not None:
 386       for (idx, tl) in enumerate(self.tasklets):
 387         logging.debug("Checking prerequisites for tasklet %s/%s",
 388                       idx + 1, len(self.tasklets))
 389         tl.CheckPrereq()
 390     else:
 391       pass
 392
 393   def Exec(self, feedback_fn):
 394     """Execute the LU.
 395
 396     This method should implement the actual work. It should raise
 397     errors.OpExecError for failures that are somewhat dealt with in
 398     code, or expected.
 399
 400     """
 401     if self.tasklets is not None:
 402       for (idx, tl) in enumerate(self.tasklets):
 403         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 404         tl.Exec(feedback_fn)
 405     else:
 406       raise NotImplementedError
 407
 408   def BuildHooksEnv(self):
 409     """Build hooks environment for this LU.
 410
 411     This method should return a three-node tuple consisting of: a dict
 412     containing the environment that will be used for running the
 413     specific hook for this LU, a list of node names on which the hook
 414     should run before the execution, and a list of node names on which
 415     the hook should run after the execution.
 416
 417     The keys of the dict must not have 'GANETI_' prefixed as this will
 418     be handled in the hooks runner. Also note additional keys will be
 419     added by the hooks runner. If the LU doesn't define any
 420     environment, an empty dict (and not None) should be returned.
 421
 422     No nodes should be returned as an empty list (and not None).
 423
 424     Note that if the HPATH for a LU class is None, this function will
 425     not be called.
 426
 427     """
 428     raise NotImplementedError
 429
 430   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 431     """Notify the LU about the results of its hooks.
 432
 433     This method is called every time a hooks phase is executed, and notifies
 434     the Logical Unit about the hooks' result. The LU can then use it to alter
 435     its result based on the hooks.  By default the method does nothing and the
 436     previous result is passed back unchanged but any LU can define it if it
 437     wants to use the local cluster hook-scripts somehow.
 438
 439     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 440         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 441     @param hook_results: the results of the multi-node hooks rpc call
 442     @param feedback_fn: function used send feedback back to the caller
 443     @param lu_result: the previous Exec result this LU had, or None
 444         in the PRE phase
 445     @return: the new Exec result, based on the previous result
 446         and hook results
 447
 448     """
 449     # API must be kept, thus we ignore the unused argument and could
 450     # be a function warnings
 451     # pylint: disable-msg=W0613,R0201
 452     return lu_result
 453
 454   def _ExpandAndLockInstance(self):
 455     """Helper function to expand and lock an instance.
 456
 457     Many LUs that work on an instance take its name in self.op.instance_name
 458     and need to expand it and then declare the expanded name for locking. This
 459     function does it, and then updates self.op.instance_name to the expanded
 460     name. It also initializes needed_locks as a dict, if this hasn't been done
 461     before.
 462
 463     """
 464     if self.needed_locks is None:
 465       self.needed_locks = {}
 466     else:
 467       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 468         "_ExpandAndLockInstance called with instance-level locks set"
 469     self.op.instance_name = _ExpandInstanceName(self.cfg,
 470                                                 self.op.instance_name)
 471     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 472
 473   def _LockInstancesNodes(self, primary_only=False):
 474     """Helper function to declare instances' nodes for locking.
 475
 476     This function should be called after locking one or more instances to lock
 477     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 478     with all primary or secondary nodes for instances already locked and
 479     present in self.needed_locks[locking.LEVEL_INSTANCE].
 480
 481     It should be called from DeclareLocks, and for safety only works if
 482     self.recalculate_locks[locking.LEVEL_NODE] is set.
 483
 484     In the future it may grow parameters to just lock some instance's nodes, or
 485     to just lock primaries or secondary nodes, if needed.
 486
 487     If should be called in DeclareLocks in a way similar to::
 488
 489       if level == locking.LEVEL_NODE:
 490         self._LockInstancesNodes()
 491
 492     @type primary_only: boolean
 493     @param primary_only: only lock primary nodes of locked instances
 494
 495     """
 496     assert locking.LEVEL_NODE in self.recalculate_locks, \
 497       "_LockInstancesNodes helper function called with no nodes to recalculate"
 498
 499     # TODO: check if we're really been called with the instance locks held
 500
 501     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 502     # future we might want to have different behaviors depending on the value
 503     # of self.recalculate_locks[locking.LEVEL_NODE]
 504     wanted_nodes = []
 505     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 506       instance = self.context.cfg.GetInstanceInfo(instance_name)
 507       wanted_nodes.append(instance.primary_node)
 508       if not primary_only:
 509         wanted_nodes.extend(instance.secondary_nodes)
 510
 511     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 512       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 513     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 514       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 515
 516     del self.recalculate_locks[locking.LEVEL_NODE]
 517
 518
 519 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 520   """Simple LU which runs no hooks.
 521
 522   This LU is intended as a parent for other LogicalUnits which will
 523   run no hooks, in order to reduce duplicate code.
 524
 525   """
 526   HPATH = None
 527   HTYPE = None
 528
 529   def BuildHooksEnv(self):
 530     """Empty BuildHooksEnv for NoHooksLu.
 531
 532     This just raises an error.
 533
 534     """
 535     assert False, "BuildHooksEnv called for NoHooksLUs"
 536
 537
 538 class Tasklet:
 539   """Tasklet base class.
 540
 541   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 542   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 543   tasklets know nothing about locks.
 544
 545   Subclasses must follow these rules:
 546     - Implement CheckPrereq
 547     - Implement Exec
 548
 549   """
 550   def __init__(self, lu):
 551     self.lu = lu
 552
 553     # Shortcuts
 554     self.cfg = lu.cfg
 555     self.rpc = lu.rpc
 556
 557   def CheckPrereq(self):
 558     """Check prerequisites for this tasklets.
 559
 560     This method should check whether the prerequisites for the execution of
 561     this tasklet are fulfilled. It can do internode communication, but it
 562     should be idempotent - no cluster or system changes are allowed.
 563
 564     The method should raise errors.OpPrereqError in case something is not
 565     fulfilled. Its return value is ignored.
 566
 567     This method should also update all parameters to their canonical form if it
 568     hasn't been done before.
 569
 570     """
 571     pass
 572
 573   def Exec(self, feedback_fn):
 574     """Execute the tasklet.
 575
 576     This method should implement the actual work. It should raise
 577     errors.OpExecError for failures that are somewhat dealt with in code, or
 578     expected.
 579
 580     """
 581     raise NotImplementedError
 582
 583
 584 def _GetWantedNodes(lu, nodes):
 585   """Returns list of checked and expanded node names.
 586
 587   @type lu: L{LogicalUnit}
 588   @param lu: the logical unit on whose behalf we execute
 589   @type nodes: list
 590   @param nodes: list of node names or None for all nodes
 591   @rtype: list
 592   @return: the list of nodes, sorted
 593   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 594
 595   """
 596   if not nodes:
 597     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 598       " non-empty list of nodes whose name is to be expanded.")
 599
 600   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 601   return utils.NiceSort(wanted)
 602
 603
 604 def _GetWantedInstances(lu, instances):
 605   """Returns list of checked and expanded instance names.
 606
 607   @type lu: L{LogicalUnit}
 608   @param lu: the logical unit on whose behalf we execute
 609   @type instances: list
 610   @param instances: list of instance names or None for all instances
 611   @rtype: list
 612   @return: the list of instances, sorted
 613   @raise errors.OpPrereqError: if the instances parameter is wrong type
 614   @raise errors.OpPrereqError: if any of the passed instances is not found
 615
 616   """
 617   if instances:
 618     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 619   else:
 620     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 621   return wanted
 622
 623
 624 def _GetUpdatedParams(old_params, update_dict,
 625                       use_default=True, use_none=False):
 626   """Return the new version of a parameter dictionary.
 627
 628   @type old_params: dict
 629   @param old_params: old parameters
 630   @type update_dict: dict
 631   @param update_dict: dict containing new parameter values, or
 632       constants.VALUE_DEFAULT to reset the parameter to its default
 633       value
 634   @param use_default: boolean
 635   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 636       values as 'to be deleted' values
 637   @param use_none: boolean
 638   @type use_none: whether to recognise C{None} values as 'to be
 639       deleted' values
 640   @rtype: dict
 641   @return: the new parameter dictionary
 642
 643   """
 644   params_copy = copy.deepcopy(old_params)
 645   for key, val in update_dict.iteritems():
 646     if ((use_default and val == constants.VALUE_DEFAULT) or
 647         (use_none and val is None)):
 648       try:
 649         del params_copy[key]
 650       except KeyError:
 651         pass
 652     else:
 653       params_copy[key] = val
 654   return params_copy
 655
 656
 657 def _CheckOutputFields(static, dynamic, selected):
 658   """Checks whether all selected fields are valid.
 659
 660   @type static: L{utils.FieldSet}
 661   @param static: static fields set
 662   @type dynamic: L{utils.FieldSet}
 663   @param dynamic: dynamic fields set
 664
 665   """
 666   f = utils.FieldSet()
 667   f.Extend(static)
 668   f.Extend(dynamic)
 669
 670   delta = f.NonMatching(selected)
 671   if delta:
 672     raise errors.OpPrereqError("Unknown output fields selected: %s"
 673                                % ",".join(delta), errors.ECODE_INVAL)
 674
 675
 676 def _CheckBooleanOpField(op, name):
 677   """Validates boolean opcode parameters.
 678
 679   This will ensure that an opcode parameter is either a boolean value,
 680   or None (but that it always exists).
 681
 682   """
 683   val = getattr(op, name, None)
 684   if not (val is None or isinstance(val, bool)):
 685     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 686                                (name, str(val)), errors.ECODE_INVAL)
 687   setattr(op, name, val)
 688
 689
 690 def _CheckGlobalHvParams(params):
 691   """Validates that given hypervisor params are not global ones.
 692
 693   This will ensure that instances don't get customised versions of
 694   global params.
 695
 696   """
 697   used_globals = constants.HVC_GLOBALS.intersection(params)
 698   if used_globals:
 699     msg = ("The following hypervisor parameters are global and cannot"
 700            " be customized at instance level, please modify them at"
 701            " cluster level: %s" % utils.CommaJoin(used_globals))
 702     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 703
 704
 705 def _CheckNodeOnline(lu, node):
 706   """Ensure that a given node is online.
 707
 708   @param lu: the LU on behalf of which we make the check
 709   @param node: the node to check
 710   @raise errors.OpPrereqError: if the node is offline
 711
 712   """
 713   if lu.cfg.GetNodeInfo(node).offline:
 714     raise errors.OpPrereqError("Can't use offline node %s" % node,
 715                                errors.ECODE_INVAL)
 716
 717
 718 def _CheckNodeNotDrained(lu, node):
 719   """Ensure that a given node is not drained.
 720
 721   @param lu: the LU on behalf of which we make the check
 722   @param node: the node to check
 723   @raise errors.OpPrereqError: if the node is drained
 724
 725   """
 726   if lu.cfg.GetNodeInfo(node).drained:
 727     raise errors.OpPrereqError("Can't use drained node %s" % node,
 728                                errors.ECODE_INVAL)
 729
 730
 731 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 732   """Ensure that a node supports a given OS.
 733
 734   @param lu: the LU on behalf of which we make the check
 735   @param node: the node to check
 736   @param os_name: the OS to query about
 737   @param force_variant: whether to ignore variant errors
 738   @raise errors.OpPrereqError: if the node is not supporting the OS
 739
 740   """
 741   result = lu.rpc.call_os_get(node, os_name)
 742   result.Raise("OS '%s' not in supported OS list for node %s" %
 743                (os_name, node),
 744                prereq=True, ecode=errors.ECODE_INVAL)
 745   if not force_variant:
 746     _CheckOSVariant(result.payload, os_name)
 747
 748
 749 def _RequireFileStorage():
 750   """Checks that file storage is enabled.
 751
 752   @raise errors.OpPrereqError: when file storage is disabled
 753
 754   """
 755   if not constants.ENABLE_FILE_STORAGE:
 756     raise errors.OpPrereqError("File storage disabled at configure time",
 757                                errors.ECODE_INVAL)
 758
 759
 760 def _CheckDiskTemplate(template):
 761   """Ensure a given disk template is valid.
 762
 763   """
 764   if template not in constants.DISK_TEMPLATES:
 765     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 766            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 767     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 768   if template == constants.DT_FILE:
 769     _RequireFileStorage()
 770
 771
 772 def _CheckStorageType(storage_type):
 773   """Ensure a given storage type is valid.
 774
 775   """
 776   if storage_type not in constants.VALID_STORAGE_TYPES:
 777     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 778                                errors.ECODE_INVAL)
 779   if storage_type == constants.ST_FILE:
 780     _RequireFileStorage()
 781   return True
 782
 783
 784 def _GetClusterDomainSecret():
 785   """Reads the cluster domain secret.
 786
 787   """
 788   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 789                                strict=True)
 790
 791
 792 def _CheckInstanceDown(lu, instance, reason):
 793   """Ensure that an instance is not running."""
 794   if instance.admin_up:
 795     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 796                                (instance.name, reason), errors.ECODE_STATE)
 797
 798   pnode = instance.primary_node
 799   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 800   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 801               prereq=True, ecode=errors.ECODE_ENVIRON)
 802
 803   if instance.name in ins_l.payload:
 804     raise errors.OpPrereqError("Instance %s is running, %s" %
 805                                (instance.name, reason), errors.ECODE_STATE)
 806
 807
 808 def _ExpandItemName(fn, name, kind):
 809   """Expand an item name.
 810
 811   @param fn: the function to use for expansion
 812   @param name: requested item name
 813   @param kind: text description ('Node' or 'Instance')
 814   @return: the resolved (full) name
 815   @raise errors.OpPrereqError: if the item is not found
 816
 817   """
 818   full_name = fn(name)
 819   if full_name is None:
 820     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 821                                errors.ECODE_NOENT)
 822   return full_name
 823
 824
 825 def _ExpandNodeName(cfg, name):
 826   """Wrapper over L{_ExpandItemName} for nodes."""
 827   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 828
 829
 830 def _ExpandInstanceName(cfg, name):
 831   """Wrapper over L{_ExpandItemName} for instance."""
 832   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 833
 834
 835 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 836                           memory, vcpus, nics, disk_template, disks,
 837                           bep, hvp, hypervisor_name):
 838   """Builds instance related env variables for hooks
 839
 840   This builds the hook environment from individual variables.
 841
 842   @type name: string
 843   @param name: the name of the instance
 844   @type primary_node: string
 845   @param primary_node: the name of the instance's primary node
 846   @type secondary_nodes: list
 847   @param secondary_nodes: list of secondary nodes as strings
 848   @type os_type: string
 849   @param os_type: the name of the instance's OS
 850   @type status: boolean
 851   @param status: the should_run status of the instance
 852   @type memory: string
 853   @param memory: the memory size of the instance
 854   @type vcpus: string
 855   @param vcpus: the count of VCPUs the instance has
 856   @type nics: list
 857   @param nics: list of tuples (ip, mac, mode, link) representing
 858       the NICs the instance has
 859   @type disk_template: string
 860   @param disk_template: the disk template of the instance
 861   @type disks: list
 862   @param disks: the list of (size, mode) pairs
 863   @type bep: dict
 864   @param bep: the backend parameters for the instance
 865   @type hvp: dict
 866   @param hvp: the hypervisor parameters for the instance
 867   @type hypervisor_name: string
 868   @param hypervisor_name: the hypervisor for the instance
 869   @rtype: dict
 870   @return: the hook environment for this instance
 871
 872   """
 873   if status:
 874     str_status = "up"
 875   else:
 876     str_status = "down"
 877   env = {
 878     "OP_TARGET": name,
 879     "INSTANCE_NAME": name,
 880     "INSTANCE_PRIMARY": primary_node,
 881     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 882     "INSTANCE_OS_TYPE": os_type,
 883     "INSTANCE_STATUS": str_status,
 884     "INSTANCE_MEMORY": memory,
 885     "INSTANCE_VCPUS": vcpus,
 886     "INSTANCE_DISK_TEMPLATE": disk_template,
 887     "INSTANCE_HYPERVISOR": hypervisor_name,
 888   }
 889
 890   if nics:
 891     nic_count = len(nics)
 892     for idx, (ip, mac, mode, link) in enumerate(nics):
 893       if ip is None:
 894         ip = ""
 895       env["INSTANCE_NIC%d_IP" % idx] = ip
 896       env["INSTANCE_NIC%d_MAC" % idx] = mac
 897       env["INSTANCE_NIC%d_MODE" % idx] = mode
 898       env["INSTANCE_NIC%d_LINK" % idx] = link
 899       if mode == constants.NIC_MODE_BRIDGED:
 900         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 901   else:
 902     nic_count = 0
 903
 904   env["INSTANCE_NIC_COUNT"] = nic_count
 905
 906   if disks:
 907     disk_count = len(disks)
 908     for idx, (size, mode) in enumerate(disks):
 909       env["INSTANCE_DISK%d_SIZE" % idx] = size
 910       env["INSTANCE_DISK%d_MODE" % idx] = mode
 911   else:
 912     disk_count = 0
 913
 914   env["INSTANCE_DISK_COUNT"] = disk_count
 915
 916   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 917     for key, value in source.items():
 918       env["INSTANCE_%s_%s" % (kind, key)] = value
 919
 920   return env
 921
 922
 923 def _NICListToTuple(lu, nics):
 924   """Build a list of nic information tuples.
 925
 926   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 927   value in LUQueryInstanceData.
 928
 929   @type lu:  L{LogicalUnit}
 930   @param lu: the logical unit on whose behalf we execute
 931   @type nics: list of L{objects.NIC}
 932   @param nics: list of nics to convert to hooks tuples
 933
 934   """
 935   hooks_nics = []
 936   cluster = lu.cfg.GetClusterInfo()
 937   for nic in nics:
 938     ip = nic.ip
 939     mac = nic.mac
 940     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 941     mode = filled_params[constants.NIC_MODE]
 942     link = filled_params[constants.NIC_LINK]
 943     hooks_nics.append((ip, mac, mode, link))
 944   return hooks_nics
 945
 946
 947 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 948   """Builds instance related env variables for hooks from an object.
 949
 950   @type lu: L{LogicalUnit}
 951   @param lu: the logical unit on whose behalf we execute
 952   @type instance: L{objects.Instance}
 953   @param instance: the instance for which we should build the
 954       environment
 955   @type override: dict
 956   @param override: dictionary with key/values that will override
 957       our values
 958   @rtype: dict
 959   @return: the hook environment dictionary
 960
 961   """
 962   cluster = lu.cfg.GetClusterInfo()
 963   bep = cluster.FillBE(instance)
 964   hvp = cluster.FillHV(instance)
 965   args = {
 966     'name': instance.name,
 967     'primary_node': instance.primary_node,
 968     'secondary_nodes': instance.secondary_nodes,
 969     'os_type': instance.os,
 970     'status': instance.admin_up,
 971     'memory': bep[constants.BE_MEMORY],
 972     'vcpus': bep[constants.BE_VCPUS],
 973     'nics': _NICListToTuple(lu, instance.nics),
 974     'disk_template': instance.disk_template,
 975     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 976     'bep': bep,
 977     'hvp': hvp,
 978     'hypervisor_name': instance.hypervisor,
 979   }
 980   if override:
 981     args.update(override)
 982   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 983
 984
 985 def _AdjustCandidatePool(lu, exceptions):
 986   """Adjust the candidate pool after node operations.
 987
 988   """
 989   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 990   if mod_list:
 991     lu.LogInfo("Promoted nodes to master candidate role: %s",
 992                utils.CommaJoin(node.name for node in mod_list))
 993     for name in mod_list:
 994       lu.context.ReaddNode(name)
 995   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 996   if mc_now > mc_max:
 997     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 998                (mc_now, mc_max))
 999
1000
1001 def _DecideSelfPromotion(lu, exceptions=None):
1002   """Decide whether I should promote myself as a master candidate.
1003
1004   """
1005   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1006   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1007   # the new node will increase mc_max with one, so:
1008   mc_should = min(mc_should + 1, cp_size)
1009   return mc_now < mc_should
1010
1011
1012 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1013   """Check that the brigdes needed by a list of nics exist.
1014
1015   """
1016   cluster = lu.cfg.GetClusterInfo()
1017   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1018   brlist = [params[constants.NIC_LINK] for params in paramslist
1019             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1020   if brlist:
1021     result = lu.rpc.call_bridges_exist(target_node, brlist)
1022     result.Raise("Error checking bridges on destination node '%s'" %
1023                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1024
1025
1026 def _CheckInstanceBridgesExist(lu, instance, node=None):
1027   """Check that the brigdes needed by an instance exist.
1028
1029   """
1030   if node is None:
1031     node = instance.primary_node
1032   _CheckNicsBridgesExist(lu, instance.nics, node)
1033
1034
1035 def _CheckOSVariant(os_obj, name):
1036   """Check whether an OS name conforms to the os variants specification.
1037
1038   @type os_obj: L{objects.OS}
1039   @param os_obj: OS object to check
1040   @type name: string
1041   @param name: OS name passed by the user, to check for validity
1042
1043   """
1044   if not os_obj.supported_variants:
1045     return
1046   try:
1047     variant = name.split("+", 1)[1]
1048   except IndexError:
1049     raise errors.OpPrereqError("OS name must include a variant",
1050                                errors.ECODE_INVAL)
1051
1052   if variant not in os_obj.supported_variants:
1053     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1054
1055
1056 def _GetNodeInstancesInner(cfg, fn):
1057   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1058
1059
1060 def _GetNodeInstances(cfg, node_name):
1061   """Returns a list of all primary and secondary instances on a node.
1062
1063   """
1064
1065   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1066
1067
1068 def _GetNodePrimaryInstances(cfg, node_name):
1069   """Returns primary instances on a node.
1070
1071   """
1072   return _GetNodeInstancesInner(cfg,
1073                                 lambda inst: node_name == inst.primary_node)
1074
1075
1076 def _GetNodeSecondaryInstances(cfg, node_name):
1077   """Returns secondary instances on a node.
1078
1079   """
1080   return _GetNodeInstancesInner(cfg,
1081                                 lambda inst: node_name in inst.secondary_nodes)
1082
1083
1084 def _GetStorageTypeArgs(cfg, storage_type):
1085   """Returns the arguments for a storage type.
1086
1087   """
1088   # Special case for file storage
1089   if storage_type == constants.ST_FILE:
1090     # storage.FileStorage wants a list of storage directories
1091     return [[cfg.GetFileStorageDir()]]
1092
1093   return []
1094
1095
1096 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1097   faulty = []
1098
1099   for dev in instance.disks:
1100     cfg.SetDiskID(dev, node_name)
1101
1102   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1103   result.Raise("Failed to get disk status from node %s" % node_name,
1104                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1105
1106   for idx, bdev_status in enumerate(result.payload):
1107     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1108       faulty.append(idx)
1109
1110   return faulty
1111
1112
1113 class LUPostInitCluster(LogicalUnit):
1114   """Logical unit for running hooks after cluster initialization.
1115
1116   """
1117   HPATH = "cluster-init"
1118   HTYPE = constants.HTYPE_CLUSTER
1119   _OP_REQP = []
1120
1121   def BuildHooksEnv(self):
1122     """Build hooks env.
1123
1124     """
1125     env = {"OP_TARGET": self.cfg.GetClusterName()}
1126     mn = self.cfg.GetMasterNode()
1127     return env, [], [mn]
1128
1129   def Exec(self, feedback_fn):
1130     """Nothing to do.
1131
1132     """
1133     return True
1134
1135
1136 class LUDestroyCluster(LogicalUnit):
1137   """Logical unit for destroying the cluster.
1138
1139   """
1140   HPATH = "cluster-destroy"
1141   HTYPE = constants.HTYPE_CLUSTER
1142   _OP_REQP = []
1143
1144   def BuildHooksEnv(self):
1145     """Build hooks env.
1146
1147     """
1148     env = {"OP_TARGET": self.cfg.GetClusterName()}
1149     return env, [], []
1150
1151   def CheckPrereq(self):
1152     """Check prerequisites.
1153
1154     This checks whether the cluster is empty.
1155
1156     Any errors are signaled by raising errors.OpPrereqError.
1157
1158     """
1159     master = self.cfg.GetMasterNode()
1160
1161     nodelist = self.cfg.GetNodeList()
1162     if len(nodelist) != 1 or nodelist[0] != master:
1163       raise errors.OpPrereqError("There are still %d node(s) in"
1164                                  " this cluster." % (len(nodelist) - 1),
1165                                  errors.ECODE_INVAL)
1166     instancelist = self.cfg.GetInstanceList()
1167     if instancelist:
1168       raise errors.OpPrereqError("There are still %d instance(s) in"
1169                                  " this cluster." % len(instancelist),
1170                                  errors.ECODE_INVAL)
1171
1172   def Exec(self, feedback_fn):
1173     """Destroys the cluster.
1174
1175     """
1176     master = self.cfg.GetMasterNode()
1177     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1178
1179     # Run post hooks on master node before it's removed
1180     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1181     try:
1182       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1183     except:
1184       # pylint: disable-msg=W0702
1185       self.LogWarning("Errors occurred running hooks on %s" % master)
1186
1187     result = self.rpc.call_node_stop_master(master, False)
1188     result.Raise("Could not disable the master role")
1189
1190     if modify_ssh_setup:
1191       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1192       utils.CreateBackup(priv_key)
1193       utils.CreateBackup(pub_key)
1194
1195     return master
1196
1197
1198 def _VerifyCertificate(filename):
1199   """Verifies a certificate for LUVerifyCluster.
1200
1201   @type filename: string
1202   @param filename: Path to PEM file
1203
1204   """
1205   try:
1206     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1207                                            utils.ReadFile(filename))
1208   except Exception, err: # pylint: disable-msg=W0703
1209     return (LUVerifyCluster.ETYPE_ERROR,
1210             "Failed to load X509 certificate %s: %s" % (filename, err))
1211
1212   (errcode, msg) = \
1213     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1214                                 constants.SSL_CERT_EXPIRATION_ERROR)
1215
1216   if msg:
1217     fnamemsg = "While verifying %s: %s" % (filename, msg)
1218   else:
1219     fnamemsg = None
1220
1221   if errcode is None:
1222     return (None, fnamemsg)
1223   elif errcode == utils.CERT_WARNING:
1224     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1225   elif errcode == utils.CERT_ERROR:
1226     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1227
1228   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1229
1230
1231 class LUVerifyCluster(LogicalUnit):
1232   """Verifies the cluster status.
1233
1234   """
1235   HPATH = "cluster-verify"
1236   HTYPE = constants.HTYPE_CLUSTER
1237   _OP_REQP = [
1238     ("skip_checks", _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1239     ("verbose", _TBool),
1240     ("error_codes", _TBool),
1241     ("debug_simulate_errors", _TBool),
1242     ]
1243   REQ_BGL = False
1244
1245   TCLUSTER = "cluster"
1246   TNODE = "node"
1247   TINSTANCE = "instance"
1248
1249   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1250   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1251   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1252   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1253   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1254   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1255   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1256   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1257   ENODEDRBD = (TNODE, "ENODEDRBD")
1258   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1259   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1260   ENODEHV = (TNODE, "ENODEHV")
1261   ENODELVM = (TNODE, "ENODELVM")
1262   ENODEN1 = (TNODE, "ENODEN1")
1263   ENODENET = (TNODE, "ENODENET")
1264   ENODEOS = (TNODE, "ENODEOS")
1265   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1266   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1267   ENODERPC = (TNODE, "ENODERPC")
1268   ENODESSH = (TNODE, "ENODESSH")
1269   ENODEVERSION = (TNODE, "ENODEVERSION")
1270   ENODESETUP = (TNODE, "ENODESETUP")
1271   ENODETIME = (TNODE, "ENODETIME")
1272
1273   ETYPE_FIELD = "code"
1274   ETYPE_ERROR = "ERROR"
1275   ETYPE_WARNING = "WARNING"
1276
1277   class NodeImage(object):
1278     """A class representing the logical and physical status of a node.
1279
1280     @type name: string
1281     @ivar name: the node name to which this object refers
1282     @ivar volumes: a structure as returned from
1283         L{ganeti.backend.GetVolumeList} (runtime)
1284     @ivar instances: a list of running instances (runtime)
1285     @ivar pinst: list of configured primary instances (config)
1286     @ivar sinst: list of configured secondary instances (config)
1287     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1288         of this node (config)
1289     @ivar mfree: free memory, as reported by hypervisor (runtime)
1290     @ivar dfree: free disk, as reported by the node (runtime)
1291     @ivar offline: the offline status (config)
1292     @type rpc_fail: boolean
1293     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1294         not whether the individual keys were correct) (runtime)
1295     @type lvm_fail: boolean
1296     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1297     @type hyp_fail: boolean
1298     @ivar hyp_fail: whether the RPC call didn't return the instance list
1299     @type ghost: boolean
1300     @ivar ghost: whether this is a known node or not (config)
1301     @type os_fail: boolean
1302     @ivar os_fail: whether the RPC call didn't return valid OS data
1303     @type oslist: list
1304     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1305
1306     """
1307     def __init__(self, offline=False, name=None):
1308       self.name = name
1309       self.volumes = {}
1310       self.instances = []
1311       self.pinst = []
1312       self.sinst = []
1313       self.sbp = {}
1314       self.mfree = 0
1315       self.dfree = 0
1316       self.offline = offline
1317       self.rpc_fail = False
1318       self.lvm_fail = False
1319       self.hyp_fail = False
1320       self.ghost = False
1321       self.os_fail = False
1322       self.oslist = {}
1323
1324   def ExpandNames(self):
1325     self.needed_locks = {
1326       locking.LEVEL_NODE: locking.ALL_SET,
1327       locking.LEVEL_INSTANCE: locking.ALL_SET,
1328     }
1329     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1330
1331   def _Error(self, ecode, item, msg, *args, **kwargs):
1332     """Format an error message.
1333
1334     Based on the opcode's error_codes parameter, either format a
1335     parseable error code, or a simpler error string.
1336
1337     This must be called only from Exec and functions called from Exec.
1338
1339     """
1340     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1341     itype, etxt = ecode
1342     # first complete the msg
1343     if args:
1344       msg = msg % args
1345     # then format the whole message
1346     if self.op.error_codes:
1347       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1348     else:
1349       if item:
1350         item = " " + item
1351       else:
1352         item = ""
1353       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1354     # and finally report it via the feedback_fn
1355     self._feedback_fn("  - %s" % msg)
1356
1357   def _ErrorIf(self, cond, *args, **kwargs):
1358     """Log an error message if the passed condition is True.
1359
1360     """
1361     cond = bool(cond) or self.op.debug_simulate_errors
1362     if cond:
1363       self._Error(*args, **kwargs)
1364     # do not mark the operation as failed for WARN cases only
1365     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1366       self.bad = self.bad or cond
1367
1368   def _VerifyNode(self, ninfo, nresult):
1369     """Run multiple tests against a node.
1370
1371     Test list:
1372
1373       - compares ganeti version
1374       - checks vg existence and size > 20G
1375       - checks config file checksum
1376       - checks ssh to other nodes
1377
1378     @type ninfo: L{objects.Node}
1379     @param ninfo: the node to check
1380     @param nresult: the results from the node
1381     @rtype: boolean
1382     @return: whether overall this call was successful (and we can expect
1383          reasonable values in the respose)
1384
1385     """
1386     node = ninfo.name
1387     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1388
1389     # main result, nresult should be a non-empty dict
1390     test = not nresult or not isinstance(nresult, dict)
1391     _ErrorIf(test, self.ENODERPC, node,
1392                   "unable to verify node: no data returned")
1393     if test:
1394       return False
1395
1396     # compares ganeti version
1397     local_version = constants.PROTOCOL_VERSION
1398     remote_version = nresult.get("version", None)
1399     test = not (remote_version and
1400                 isinstance(remote_version, (list, tuple)) and
1401                 len(remote_version) == 2)
1402     _ErrorIf(test, self.ENODERPC, node,
1403              "connection to node returned invalid data")
1404     if test:
1405       return False
1406
1407     test = local_version != remote_version[0]
1408     _ErrorIf(test, self.ENODEVERSION, node,
1409              "incompatible protocol versions: master %s,"
1410              " node %s", local_version, remote_version[0])
1411     if test:
1412       return False
1413
1414     # node seems compatible, we can actually try to look into its results
1415
1416     # full package version
1417     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1418                   self.ENODEVERSION, node,
1419                   "software version mismatch: master %s, node %s",
1420                   constants.RELEASE_VERSION, remote_version[1],
1421                   code=self.ETYPE_WARNING)
1422
1423     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1424     if isinstance(hyp_result, dict):
1425       for hv_name, hv_result in hyp_result.iteritems():
1426         test = hv_result is not None
1427         _ErrorIf(test, self.ENODEHV, node,
1428                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1429
1430
1431     test = nresult.get(constants.NV_NODESETUP,
1432                            ["Missing NODESETUP results"])
1433     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1434              "; ".join(test))
1435
1436     return True
1437
1438   def _VerifyNodeTime(self, ninfo, nresult,
1439                       nvinfo_starttime, nvinfo_endtime):
1440     """Check the node time.
1441
1442     @type ninfo: L{objects.Node}
1443     @param ninfo: the node to check
1444     @param nresult: the remote results for the node
1445     @param nvinfo_starttime: the start time of the RPC call
1446     @param nvinfo_endtime: the end time of the RPC call
1447
1448     """
1449     node = ninfo.name
1450     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1451
1452     ntime = nresult.get(constants.NV_TIME, None)
1453     try:
1454       ntime_merged = utils.MergeTime(ntime)
1455     except (ValueError, TypeError):
1456       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1457       return
1458
1459     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1460       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1461     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1462       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1463     else:
1464       ntime_diff = None
1465
1466     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1467              "Node time diverges by at least %s from master node time",
1468              ntime_diff)
1469
1470   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1471     """Check the node time.
1472
1473     @type ninfo: L{objects.Node}
1474     @param ninfo: the node to check
1475     @param nresult: the remote results for the node
1476     @param vg_name: the configured VG name
1477
1478     """
1479     if vg_name is None:
1480       return
1481
1482     node = ninfo.name
1483     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1484
1485     # checks vg existence and size > 20G
1486     vglist = nresult.get(constants.NV_VGLIST, None)
1487     test = not vglist
1488     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1489     if not test:
1490       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1491                                             constants.MIN_VG_SIZE)
1492       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1493
1494     # check pv names
1495     pvlist = nresult.get(constants.NV_PVLIST, None)
1496     test = pvlist is None
1497     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1498     if not test:
1499       # check that ':' is not present in PV names, since it's a
1500       # special character for lvcreate (denotes the range of PEs to
1501       # use on the PV)
1502       for _, pvname, owner_vg in pvlist:
1503         test = ":" in pvname
1504         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1505                  " '%s' of VG '%s'", pvname, owner_vg)
1506
1507   def _VerifyNodeNetwork(self, ninfo, nresult):
1508     """Check the node time.
1509
1510     @type ninfo: L{objects.Node}
1511     @param ninfo: the node to check
1512     @param nresult: the remote results for the node
1513
1514     """
1515     node = ninfo.name
1516     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1517
1518     test = constants.NV_NODELIST not in nresult
1519     _ErrorIf(test, self.ENODESSH, node,
1520              "node hasn't returned node ssh connectivity data")
1521     if not test:
1522       if nresult[constants.NV_NODELIST]:
1523         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1524           _ErrorIf(True, self.ENODESSH, node,
1525                    "ssh communication with node '%s': %s", a_node, a_msg)
1526
1527     test = constants.NV_NODENETTEST not in nresult
1528     _ErrorIf(test, self.ENODENET, node,
1529              "node hasn't returned node tcp connectivity data")
1530     if not test:
1531       if nresult[constants.NV_NODENETTEST]:
1532         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1533         for anode in nlist:
1534           _ErrorIf(True, self.ENODENET, node,
1535                    "tcp communication with node '%s': %s",
1536                    anode, nresult[constants.NV_NODENETTEST][anode])
1537
1538     test = constants.NV_MASTERIP not in nresult
1539     _ErrorIf(test, self.ENODENET, node,
1540              "node hasn't returned node master IP reachability data")
1541     if not test:
1542       if not nresult[constants.NV_MASTERIP]:
1543         if node == self.master_node:
1544           msg = "the master node cannot reach the master IP (not configured?)"
1545         else:
1546           msg = "cannot reach the master IP"
1547         _ErrorIf(True, self.ENODENET, node, msg)
1548
1549
1550   def _VerifyInstance(self, instance, instanceconfig, node_image):
1551     """Verify an instance.
1552
1553     This function checks to see if the required block devices are
1554     available on the instance's node.
1555
1556     """
1557     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1558     node_current = instanceconfig.primary_node
1559
1560     node_vol_should = {}
1561     instanceconfig.MapLVsByNode(node_vol_should)
1562
1563     for node in node_vol_should:
1564       n_img = node_image[node]
1565       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1566         # ignore missing volumes on offline or broken nodes
1567         continue
1568       for volume in node_vol_should[node]:
1569         test = volume not in n_img.volumes
1570         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1571                  "volume %s missing on node %s", volume, node)
1572
1573     if instanceconfig.admin_up:
1574       pri_img = node_image[node_current]
1575       test = instance not in pri_img.instances and not pri_img.offline
1576       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1577                "instance not running on its primary node %s",
1578                node_current)
1579
1580     for node, n_img in node_image.items():
1581       if (not node == node_current):
1582         test = instance in n_img.instances
1583         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1584                  "instance should not run on node %s", node)
1585
1586   def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1587     """Verify if there are any unknown volumes in the cluster.
1588
1589     The .os, .swap and backup volumes are ignored. All other volumes are
1590     reported as unknown.
1591
1592     """
1593     for node, n_img in node_image.items():
1594       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1595         # skip non-healthy nodes
1596         continue
1597       for volume in n_img.volumes:
1598         test = (node not in node_vol_should or
1599                 volume not in node_vol_should[node])
1600         self._ErrorIf(test, self.ENODEORPHANLV, node,
1601                       "volume %s is unknown", volume)
1602
1603   def _VerifyOrphanInstances(self, instancelist, node_image):
1604     """Verify the list of running instances.
1605
1606     This checks what instances are running but unknown to the cluster.
1607
1608     """
1609     for node, n_img in node_image.items():
1610       for o_inst in n_img.instances:
1611         test = o_inst not in instancelist
1612         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1613                       "instance %s on node %s should not exist", o_inst, node)
1614
1615   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1616     """Verify N+1 Memory Resilience.
1617
1618     Check that if one single node dies we can still start all the
1619     instances it was primary for.
1620
1621     """
1622     for node, n_img in node_image.items():
1623       # This code checks that every node which is now listed as
1624       # secondary has enough memory to host all instances it is
1625       # supposed to should a single other node in the cluster fail.
1626       # FIXME: not ready for failover to an arbitrary node
1627       # FIXME: does not support file-backed instances
1628       # WARNING: we currently take into account down instances as well
1629       # as up ones, considering that even if they're down someone
1630       # might want to start them even in the event of a node failure.
1631       for prinode, instances in n_img.sbp.items():
1632         needed_mem = 0
1633         for instance in instances:
1634           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1635           if bep[constants.BE_AUTO_BALANCE]:
1636             needed_mem += bep[constants.BE_MEMORY]
1637         test = n_img.mfree < needed_mem
1638         self._ErrorIf(test, self.ENODEN1, node,
1639                       "not enough memory on to accommodate"
1640                       " failovers should peer node %s fail", prinode)
1641
1642   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1643                        master_files):
1644     """Verifies and computes the node required file checksums.
1645
1646     @type ninfo: L{objects.Node}
1647     @param ninfo: the node to check
1648     @param nresult: the remote results for the node
1649     @param file_list: required list of files
1650     @param local_cksum: dictionary of local files and their checksums
1651     @param master_files: list of files that only masters should have
1652
1653     """
1654     node = ninfo.name
1655     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1656
1657     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1658     test = not isinstance(remote_cksum, dict)
1659     _ErrorIf(test, self.ENODEFILECHECK, node,
1660              "node hasn't returned file checksum data")
1661     if test:
1662       return
1663
1664     for file_name in file_list:
1665       node_is_mc = ninfo.master_candidate
1666       must_have = (file_name not in master_files) or node_is_mc
1667       # missing
1668       test1 = file_name not in remote_cksum
1669       # invalid checksum
1670       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1671       # existing and good
1672       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1673       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1674                "file '%s' missing", file_name)
1675       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1676                "file '%s' has wrong checksum", file_name)
1677       # not candidate and this is not a must-have file
1678       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1679                "file '%s' should not exist on non master"
1680                " candidates (and the file is outdated)", file_name)
1681       # all good, except non-master/non-must have combination
1682       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1683                "file '%s' should not exist"
1684                " on non master candidates", file_name)
1685
1686   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1687     """Verifies and the node DRBD status.
1688
1689     @type ninfo: L{objects.Node}
1690     @param ninfo: the node to check
1691     @param nresult: the remote results for the node
1692     @param instanceinfo: the dict of instances
1693     @param drbd_map: the DRBD map as returned by
1694         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1695
1696     """
1697     node = ninfo.name
1698     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1699
1700     # compute the DRBD minors
1701     node_drbd = {}
1702     for minor, instance in drbd_map[node].items():
1703       test = instance not in instanceinfo
1704       _ErrorIf(test, self.ECLUSTERCFG, None,
1705                "ghost instance '%s' in temporary DRBD map", instance)
1706         # ghost instance should not be running, but otherwise we
1707         # don't give double warnings (both ghost instance and
1708         # unallocated minor in use)
1709       if test:
1710         node_drbd[minor] = (instance, False)
1711       else:
1712         instance = instanceinfo[instance]
1713         node_drbd[minor] = (instance.name, instance.admin_up)
1714
1715     # and now check them
1716     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1717     test = not isinstance(used_minors, (tuple, list))
1718     _ErrorIf(test, self.ENODEDRBD, node,
1719              "cannot parse drbd status file: %s", str(used_minors))
1720     if test:
1721       # we cannot check drbd status
1722       return
1723
1724     for minor, (iname, must_exist) in node_drbd.items():
1725       test = minor not in used_minors and must_exist
1726       _ErrorIf(test, self.ENODEDRBD, node,
1727                "drbd minor %d of instance %s is not active", minor, iname)
1728     for minor in used_minors:
1729       test = minor not in node_drbd
1730       _ErrorIf(test, self.ENODEDRBD, node,
1731                "unallocated drbd minor %d is in use", minor)
1732
1733   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1734     """Builds the node OS structures.
1735
1736     @type ninfo: L{objects.Node}
1737     @param ninfo: the node to check
1738     @param nresult: the remote results for the node
1739     @param nimg: the node image object
1740
1741     """
1742     node = ninfo.name
1743     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1744
1745     remote_os = nresult.get(constants.NV_OSLIST, None)
1746     test = (not isinstance(remote_os, list) or
1747             not compat.all(remote_os,
1748                            lambda v: isinstance(v, list) and len(v) == 7))
1749
1750     _ErrorIf(test, self.ENODEOS, node,
1751              "node hasn't returned valid OS data")
1752
1753     nimg.os_fail = test
1754
1755     if test:
1756       return
1757
1758     os_dict = {}
1759
1760     for (name, os_path, status, diagnose,
1761          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1762
1763       if name not in os_dict:
1764         os_dict[name] = []
1765
1766       # parameters is a list of lists instead of list of tuples due to
1767       # JSON lacking a real tuple type, fix it:
1768       parameters = [tuple(v) for v in parameters]
1769       os_dict[name].append((os_path, status, diagnose,
1770                             set(variants), set(parameters), set(api_ver)))
1771
1772     nimg.oslist = os_dict
1773
1774   def _VerifyNodeOS(self, ninfo, nimg, base):
1775     """Verifies the node OS list.
1776
1777     @type ninfo: L{objects.Node}
1778     @param ninfo: the node to check
1779     @param nimg: the node image object
1780     @param base: the 'template' node we match against (e.g. from the master)
1781
1782     """
1783     node = ninfo.name
1784     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1785
1786     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1787
1788     for os_name, os_data in nimg.oslist.items():
1789       assert os_data, "Empty OS status for OS %s?!" % os_name
1790       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1791       _ErrorIf(not f_status, self.ENODEOS, node,
1792                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1793       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1794                "OS '%s' has multiple entries (first one shadows the rest): %s",
1795                os_name, utils.CommaJoin([v[0] for v in os_data]))
1796       # this will catched in backend too
1797       _ErrorIf(compat.any(f_api, lambda v: v >= constants.OS_API_V15)
1798                and not f_var, self.ENODEOS, node,
1799                "OS %s with API at least %d does not declare any variant",
1800                os_name, constants.OS_API_V15)
1801       # comparisons with the 'base' image
1802       test = os_name not in base.oslist
1803       _ErrorIf(test, self.ENODEOS, node,
1804                "Extra OS %s not present on reference node (%s)",
1805                os_name, base.name)
1806       if test:
1807         continue
1808       assert base.oslist[os_name], "Base node has empty OS status?"
1809       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1810       if not b_status:
1811         # base OS is invalid, skipping
1812         continue
1813       for kind, a, b in [("API version", f_api, b_api),
1814                          ("variants list", f_var, b_var),
1815                          ("parameters", f_param, b_param)]:
1816         _ErrorIf(a != b, self.ENODEOS, node,
1817                  "OS %s %s differs from reference node %s: %s vs. %s",
1818                  kind, os_name, base.name,
1819                  utils.CommaJoin(a), utils.CommaJoin(a))
1820
1821     # check any missing OSes
1822     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1823     _ErrorIf(missing, self.ENODEOS, node,
1824              "OSes present on reference node %s but missing on this node: %s",
1825              base.name, utils.CommaJoin(missing))
1826
1827   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1828     """Verifies and updates the node volume data.
1829
1830     This function will update a L{NodeImage}'s internal structures
1831     with data from the remote call.
1832
1833     @type ninfo: L{objects.Node}
1834     @param ninfo: the node to check
1835     @param nresult: the remote results for the node
1836     @param nimg: the node image object
1837     @param vg_name: the configured VG name
1838
1839     """
1840     node = ninfo.name
1841     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1842
1843     nimg.lvm_fail = True
1844     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1845     if vg_name is None:
1846       pass
1847     elif isinstance(lvdata, basestring):
1848       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1849                utils.SafeEncode(lvdata))
1850     elif not isinstance(lvdata, dict):
1851       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1852     else:
1853       nimg.volumes = lvdata
1854       nimg.lvm_fail = False
1855
1856   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1857     """Verifies and updates the node instance list.
1858
1859     If the listing was successful, then updates this node's instance
1860     list. Otherwise, it marks the RPC call as failed for the instance
1861     list key.
1862
1863     @type ninfo: L{objects.Node}
1864     @param ninfo: the node to check
1865     @param nresult: the remote results for the node
1866     @param nimg: the node image object
1867
1868     """
1869     idata = nresult.get(constants.NV_INSTANCELIST, None)
1870     test = not isinstance(idata, list)
1871     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1872                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1873     if test:
1874       nimg.hyp_fail = True
1875     else:
1876       nimg.instances = idata
1877
1878   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1879     """Verifies and computes a node information map
1880
1881     @type ninfo: L{objects.Node}
1882     @param ninfo: the node to check
1883     @param nresult: the remote results for the node
1884     @param nimg: the node image object
1885     @param vg_name: the configured VG name
1886
1887     """
1888     node = ninfo.name
1889     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1890
1891     # try to read free memory (from the hypervisor)
1892     hv_info = nresult.get(constants.NV_HVINFO, None)
1893     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1894     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1895     if not test:
1896       try:
1897         nimg.mfree = int(hv_info["memory_free"])
1898       except (ValueError, TypeError):
1899         _ErrorIf(True, self.ENODERPC, node,
1900                  "node returned invalid nodeinfo, check hypervisor")
1901
1902     # FIXME: devise a free space model for file based instances as well
1903     if vg_name is not None:
1904       test = (constants.NV_VGLIST not in nresult or
1905               vg_name not in nresult[constants.NV_VGLIST])
1906       _ErrorIf(test, self.ENODELVM, node,
1907                "node didn't return data for the volume group '%s'"
1908                " - it is either missing or broken", vg_name)
1909       if not test:
1910         try:
1911           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1912         except (ValueError, TypeError):
1913           _ErrorIf(True, self.ENODERPC, node,
1914                    "node returned invalid LVM info, check LVM status")
1915
1916   def BuildHooksEnv(self):
1917     """Build hooks env.
1918
1919     Cluster-Verify hooks just ran in the post phase and their failure makes
1920     the output be logged in the verify output and the verification to fail.
1921
1922     """
1923     all_nodes = self.cfg.GetNodeList()
1924     env = {
1925       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1926       }
1927     for node in self.cfg.GetAllNodesInfo().values():
1928       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1929
1930     return env, [], all_nodes
1931
1932   def Exec(self, feedback_fn):
1933     """Verify integrity of cluster, performing various test on nodes.
1934
1935     """
1936     self.bad = False
1937     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1938     verbose = self.op.verbose
1939     self._feedback_fn = feedback_fn
1940     feedback_fn("* Verifying global settings")
1941     for msg in self.cfg.VerifyConfig():
1942       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1943
1944     # Check the cluster certificates
1945     for cert_filename in constants.ALL_CERT_FILES:
1946       (errcode, msg) = _VerifyCertificate(cert_filename)
1947       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1948
1949     vg_name = self.cfg.GetVGName()
1950     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1951     cluster = self.cfg.GetClusterInfo()
1952     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1953     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1954     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1955     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1956                         for iname in instancelist)
1957     i_non_redundant = [] # Non redundant instances
1958     i_non_a_balanced = [] # Non auto-balanced instances
1959     n_offline = 0 # Count of offline nodes
1960     n_drained = 0 # Count of nodes being drained
1961     node_vol_should = {}
1962
1963     # FIXME: verify OS list
1964     # do local checksums
1965     master_files = [constants.CLUSTER_CONF_FILE]
1966     master_node = self.master_node = self.cfg.GetMasterNode()
1967     master_ip = self.cfg.GetMasterIP()
1968
1969     file_names = ssconf.SimpleStore().GetFileList()
1970     file_names.extend(constants.ALL_CERT_FILES)
1971     file_names.extend(master_files)
1972     if cluster.modify_etc_hosts:
1973       file_names.append(constants.ETC_HOSTS)
1974
1975     local_checksums = utils.FingerprintFiles(file_names)
1976
1977     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1978     node_verify_param = {
1979       constants.NV_FILELIST: file_names,
1980       constants.NV_NODELIST: [node.name for node in nodeinfo
1981                               if not node.offline],
1982       constants.NV_HYPERVISOR: hypervisors,
1983       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1984                                   node.secondary_ip) for node in nodeinfo
1985                                  if not node.offline],
1986       constants.NV_INSTANCELIST: hypervisors,
1987       constants.NV_VERSION: None,
1988       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1989       constants.NV_NODESETUP: None,
1990       constants.NV_TIME: None,
1991       constants.NV_MASTERIP: (master_node, master_ip),
1992       constants.NV_OSLIST: None,
1993       }
1994
1995     if vg_name is not None:
1996       node_verify_param[constants.NV_VGLIST] = None
1997       node_verify_param[constants.NV_LVLIST] = vg_name
1998       node_verify_param[constants.NV_PVLIST] = [vg_name]
1999       node_verify_param[constants.NV_DRBDLIST] = None
2000
2001     # Build our expected cluster state
2002     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2003                                                  name=node.name))
2004                       for node in nodeinfo)
2005
2006     for instance in instancelist:
2007       inst_config = instanceinfo[instance]
2008
2009       for nname in inst_config.all_nodes:
2010         if nname not in node_image:
2011           # ghost node
2012           gnode = self.NodeImage(name=nname)
2013           gnode.ghost = True
2014           node_image[nname] = gnode
2015
2016       inst_config.MapLVsByNode(node_vol_should)
2017
2018       pnode = inst_config.primary_node
2019       node_image[pnode].pinst.append(instance)
2020
2021       for snode in inst_config.secondary_nodes:
2022         nimg = node_image[snode]
2023         nimg.sinst.append(instance)
2024         if pnode not in nimg.sbp:
2025           nimg.sbp[pnode] = []
2026         nimg.sbp[pnode].append(instance)
2027
2028     # At this point, we have the in-memory data structures complete,
2029     # except for the runtime information, which we'll gather next
2030
2031     # Due to the way our RPC system works, exact response times cannot be
2032     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2033     # time before and after executing the request, we can at least have a time
2034     # window.
2035     nvinfo_starttime = time.time()
2036     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2037                                            self.cfg.GetClusterName())
2038     nvinfo_endtime = time.time()
2039
2040     all_drbd_map = self.cfg.ComputeDRBDMap()
2041
2042     feedback_fn("* Verifying node status")
2043
2044     refos_img = None
2045
2046     for node_i in nodeinfo:
2047       node = node_i.name
2048       nimg = node_image[node]
2049
2050       if node_i.offline:
2051         if verbose:
2052           feedback_fn("* Skipping offline node %s" % (node,))
2053         n_offline += 1
2054         continue
2055
2056       if node == master_node:
2057         ntype = "master"
2058       elif node_i.master_candidate:
2059         ntype = "master candidate"
2060       elif node_i.drained:
2061         ntype = "drained"
2062         n_drained += 1
2063       else:
2064         ntype = "regular"
2065       if verbose:
2066         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2067
2068       msg = all_nvinfo[node].fail_msg
2069       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2070       if msg:
2071         nimg.rpc_fail = True
2072         continue
2073
2074       nresult = all_nvinfo[node].payload
2075
2076       nimg.call_ok = self._VerifyNode(node_i, nresult)
2077       self._VerifyNodeNetwork(node_i, nresult)
2078       self._VerifyNodeLVM(node_i, nresult, vg_name)
2079       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2080                             master_files)
2081       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
2082       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2083
2084       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2085       self._UpdateNodeInstances(node_i, nresult, nimg)
2086       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2087       self._UpdateNodeOS(node_i, nresult, nimg)
2088       if not nimg.os_fail:
2089         if refos_img is None:
2090           refos_img = nimg
2091         self._VerifyNodeOS(node_i, nimg, refos_img)
2092
2093     feedback_fn("* Verifying instance status")
2094     for instance in instancelist:
2095       if verbose:
2096         feedback_fn("* Verifying instance %s" % instance)
2097       inst_config = instanceinfo[instance]
2098       self._VerifyInstance(instance, inst_config, node_image)
2099       inst_nodes_offline = []
2100
2101       pnode = inst_config.primary_node
2102       pnode_img = node_image[pnode]
2103       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2104                self.ENODERPC, pnode, "instance %s, connection to"
2105                " primary node failed", instance)
2106
2107       if pnode_img.offline:
2108         inst_nodes_offline.append(pnode)
2109
2110       # If the instance is non-redundant we cannot survive losing its primary
2111       # node, so we are not N+1 compliant. On the other hand we have no disk
2112       # templates with more than one secondary so that situation is not well
2113       # supported either.
2114       # FIXME: does not support file-backed instances
2115       if not inst_config.secondary_nodes:
2116         i_non_redundant.append(instance)
2117       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2118                instance, "instance has multiple secondary nodes: %s",
2119                utils.CommaJoin(inst_config.secondary_nodes),
2120                code=self.ETYPE_WARNING)
2121
2122       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2123         i_non_a_balanced.append(instance)
2124
2125       for snode in inst_config.secondary_nodes:
2126         s_img = node_image[snode]
2127         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2128                  "instance %s, connection to secondary node failed", instance)
2129
2130         if s_img.offline:
2131           inst_nodes_offline.append(snode)
2132
2133       # warn that the instance lives on offline nodes
2134       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2135                "instance lives on offline node(s) %s",
2136                utils.CommaJoin(inst_nodes_offline))
2137       # ... or ghost nodes
2138       for node in inst_config.all_nodes:
2139         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2140                  "instance lives on ghost node %s", node)
2141
2142     feedback_fn("* Verifying orphan volumes")
2143     self._VerifyOrphanVolumes(node_vol_should, node_image)
2144
2145     feedback_fn("* Verifying orphan instances")
2146     self._VerifyOrphanInstances(instancelist, node_image)
2147
2148     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2149       feedback_fn("* Verifying N+1 Memory redundancy")
2150       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2151
2152     feedback_fn("* Other Notes")
2153     if i_non_redundant:
2154       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2155                   % len(i_non_redundant))
2156
2157     if i_non_a_balanced:
2158       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2159                   % len(i_non_a_balanced))
2160
2161     if n_offline:
2162       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2163
2164     if n_drained:
2165       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2166
2167     return not self.bad
2168
2169   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2170     """Analyze the post-hooks' result
2171
2172     This method analyses the hook result, handles it, and sends some
2173     nicely-formatted feedback back to the user.
2174
2175     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2176         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2177     @param hooks_results: the results of the multi-node hooks rpc call
2178     @param feedback_fn: function used send feedback back to the caller
2179     @param lu_result: previous Exec result
2180     @return: the new Exec result, based on the previous result
2181         and hook results
2182
2183     """
2184     # We only really run POST phase hooks, and are only interested in
2185     # their results
2186     if phase == constants.HOOKS_PHASE_POST:
2187       # Used to change hooks' output to proper indentation
2188       indent_re = re.compile('^', re.M)
2189       feedback_fn("* Hooks Results")
2190       assert hooks_results, "invalid result from hooks"
2191
2192       for node_name in hooks_results:
2193         res = hooks_results[node_name]
2194         msg = res.fail_msg
2195         test = msg and not res.offline
2196         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2197                       "Communication failure in hooks execution: %s", msg)
2198         if res.offline or msg:
2199           # No need to investigate payload if node is offline or gave an error.
2200           # override manually lu_result here as _ErrorIf only
2201           # overrides self.bad
2202           lu_result = 1
2203           continue
2204         for script, hkr, output in res.payload:
2205           test = hkr == constants.HKR_FAIL
2206           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2207                         "Script %s failed, output:", script)
2208           if test:
2209             output = indent_re.sub('      ', output)
2210             feedback_fn("%s" % output)
2211             lu_result = 0
2212
2213       return lu_result
2214
2215
2216 class LUVerifyDisks(NoHooksLU):
2217   """Verifies the cluster disks status.
2218
2219   """
2220   _OP_REQP = []
2221   REQ_BGL = False
2222
2223   def ExpandNames(self):
2224     self.needed_locks = {
2225       locking.LEVEL_NODE: locking.ALL_SET,
2226       locking.LEVEL_INSTANCE: locking.ALL_SET,
2227     }
2228     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2229
2230   def Exec(self, feedback_fn):
2231     """Verify integrity of cluster disks.
2232
2233     @rtype: tuple of three items
2234     @return: a tuple of (dict of node-to-node_error, list of instances
2235         which need activate-disks, dict of instance: (node, volume) for
2236         missing volumes
2237
2238     """
2239     result = res_nodes, res_instances, res_missing = {}, [], {}
2240
2241     vg_name = self.cfg.GetVGName()
2242     nodes = utils.NiceSort(self.cfg.GetNodeList())
2243     instances = [self.cfg.GetInstanceInfo(name)
2244                  for name in self.cfg.GetInstanceList()]
2245
2246     nv_dict = {}
2247     for inst in instances:
2248       inst_lvs = {}
2249       if (not inst.admin_up or
2250           inst.disk_template not in constants.DTS_NET_MIRROR):
2251         continue
2252       inst.MapLVsByNode(inst_lvs)
2253       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2254       for node, vol_list in inst_lvs.iteritems():
2255         for vol in vol_list:
2256           nv_dict[(node, vol)] = inst
2257
2258     if not nv_dict:
2259       return result
2260
2261     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2262
2263     for node in nodes:
2264       # node_volume
2265       node_res = node_lvs[node]
2266       if node_res.offline:
2267         continue
2268       msg = node_res.fail_msg
2269       if msg:
2270         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2271         res_nodes[node] = msg
2272         continue
2273
2274       lvs = node_res.payload
2275       for lv_name, (_, _, lv_online) in lvs.items():
2276         inst = nv_dict.pop((node, lv_name), None)
2277         if (not lv_online and inst is not None
2278             and inst.name not in res_instances):
2279           res_instances.append(inst.name)
2280
2281     # any leftover items in nv_dict are missing LVs, let's arrange the
2282     # data better
2283     for key, inst in nv_dict.iteritems():
2284       if inst.name not in res_missing:
2285         res_missing[inst.name] = []
2286       res_missing[inst.name].append(key)
2287
2288     return result
2289
2290
2291 class LURepairDiskSizes(NoHooksLU):
2292   """Verifies the cluster disks sizes.
2293
2294   """
2295   _OP_REQP = [("instances", _TListOf(_TNEString))]
2296   REQ_BGL = False
2297
2298   def ExpandNames(self):
2299     if self.op.instances:
2300       self.wanted_names = []
2301       for name in self.op.instances:
2302         full_name = _ExpandInstanceName(self.cfg, name)
2303         self.wanted_names.append(full_name)
2304       self.needed_locks = {
2305         locking.LEVEL_NODE: [],
2306         locking.LEVEL_INSTANCE: self.wanted_names,
2307         }
2308       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2309     else:
2310       self.wanted_names = None
2311       self.needed_locks = {
2312         locking.LEVEL_NODE: locking.ALL_SET,
2313         locking.LEVEL_INSTANCE: locking.ALL_SET,
2314         }
2315     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2316
2317   def DeclareLocks(self, level):
2318     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2319       self._LockInstancesNodes(primary_only=True)
2320
2321   def CheckPrereq(self):
2322     """Check prerequisites.
2323
2324     This only checks the optional instance list against the existing names.
2325
2326     """
2327     if self.wanted_names is None:
2328       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2329
2330     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2331                              in self.wanted_names]
2332
2333   def _EnsureChildSizes(self, disk):
2334     """Ensure children of the disk have the needed disk size.
2335
2336     This is valid mainly for DRBD8 and fixes an issue where the
2337     children have smaller disk size.
2338
2339     @param disk: an L{ganeti.objects.Disk} object
2340
2341     """
2342     if disk.dev_type == constants.LD_DRBD8:
2343       assert disk.children, "Empty children for DRBD8?"
2344       fchild = disk.children[0]
2345       mismatch = fchild.size < disk.size
2346       if mismatch:
2347         self.LogInfo("Child disk has size %d, parent %d, fixing",
2348                      fchild.size, disk.size)
2349         fchild.size = disk.size
2350
2351       # and we recurse on this child only, not on the metadev
2352       return self._EnsureChildSizes(fchild) or mismatch
2353     else:
2354       return False
2355
2356   def Exec(self, feedback_fn):
2357     """Verify the size of cluster disks.
2358
2359     """
2360     # TODO: check child disks too
2361     # TODO: check differences in size between primary/secondary nodes
2362     per_node_disks = {}
2363     for instance in self.wanted_instances:
2364       pnode = instance.primary_node
2365       if pnode not in per_node_disks:
2366         per_node_disks[pnode] = []
2367       for idx, disk in enumerate(instance.disks):
2368         per_node_disks[pnode].append((instance, idx, disk))
2369
2370     changed = []
2371     for node, dskl in per_node_disks.items():
2372       newl = [v[2].Copy() for v in dskl]
2373       for dsk in newl:
2374         self.cfg.SetDiskID(dsk, node)
2375       result = self.rpc.call_blockdev_getsizes(node, newl)
2376       if result.fail_msg:
2377         self.LogWarning("Failure in blockdev_getsizes call to node"
2378                         " %s, ignoring", node)
2379         continue
2380       if len(result.data) != len(dskl):
2381         self.LogWarning("Invalid result from node %s, ignoring node results",
2382                         node)
2383         continue
2384       for ((instance, idx, disk), size) in zip(dskl, result.data):
2385         if size is None:
2386           self.LogWarning("Disk %d of instance %s did not return size"
2387                           " information, ignoring", idx, instance.name)
2388           continue
2389         if not isinstance(size, (int, long)):
2390           self.LogWarning("Disk %d of instance %s did not return valid"
2391                           " size information, ignoring", idx, instance.name)
2392           continue
2393         size = size >> 20
2394         if size != disk.size:
2395           self.LogInfo("Disk %d of instance %s has mismatched size,"
2396                        " correcting: recorded %d, actual %d", idx,
2397                        instance.name, disk.size, size)
2398           disk.size = size
2399           self.cfg.Update(instance, feedback_fn)
2400           changed.append((instance.name, idx, size))
2401         if self._EnsureChildSizes(disk):
2402           self.cfg.Update(instance, feedback_fn)
2403           changed.append((instance.name, idx, disk.size))
2404     return changed
2405
2406
2407 class LURenameCluster(LogicalUnit):
2408   """Rename the cluster.
2409
2410   """
2411   HPATH = "cluster-rename"
2412   HTYPE = constants.HTYPE_CLUSTER
2413   _OP_REQP = [("name", _TNEString)]
2414
2415   def BuildHooksEnv(self):
2416     """Build hooks env.
2417
2418     """
2419     env = {
2420       "OP_TARGET": self.cfg.GetClusterName(),
2421       "NEW_NAME": self.op.name,
2422       }
2423     mn = self.cfg.GetMasterNode()
2424     all_nodes = self.cfg.GetNodeList()
2425     return env, [mn], all_nodes
2426
2427   def CheckPrereq(self):
2428     """Verify that the passed name is a valid one.
2429
2430     """
2431     hostname = utils.GetHostInfo(self.op.name)
2432
2433     new_name = hostname.name
2434     self.ip = new_ip = hostname.ip
2435     old_name = self.cfg.GetClusterName()
2436     old_ip = self.cfg.GetMasterIP()
2437     if new_name == old_name and new_ip == old_ip:
2438       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2439                                  " cluster has changed",
2440                                  errors.ECODE_INVAL)
2441     if new_ip != old_ip:
2442       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2443         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2444                                    " reachable on the network. Aborting." %
2445                                    new_ip, errors.ECODE_NOTUNIQUE)
2446
2447     self.op.name = new_name
2448
2449   def Exec(self, feedback_fn):
2450     """Rename the cluster.
2451
2452     """
2453     clustername = self.op.name
2454     ip = self.ip
2455
2456     # shutdown the master IP
2457     master = self.cfg.GetMasterNode()
2458     result = self.rpc.call_node_stop_master(master, False)
2459     result.Raise("Could not disable the master role")
2460
2461     try:
2462       cluster = self.cfg.GetClusterInfo()
2463       cluster.cluster_name = clustername
2464       cluster.master_ip = ip
2465       self.cfg.Update(cluster, feedback_fn)
2466
2467       # update the known hosts file
2468       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2469       node_list = self.cfg.GetNodeList()
2470       try:
2471         node_list.remove(master)
2472       except ValueError:
2473         pass
2474       result = self.rpc.call_upload_file(node_list,
2475                                          constants.SSH_KNOWN_HOSTS_FILE)
2476       for to_node, to_result in result.iteritems():
2477         msg = to_result.fail_msg
2478         if msg:
2479           msg = ("Copy of file %s to node %s failed: %s" %
2480                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2481           self.proc.LogWarning(msg)
2482
2483     finally:
2484       result = self.rpc.call_node_start_master(master, False, False)
2485       msg = result.fail_msg
2486       if msg:
2487         self.LogWarning("Could not re-enable the master role on"
2488                         " the master, please restart manually: %s", msg)
2489
2490
2491 def _RecursiveCheckIfLVMBased(disk):
2492   """Check if the given disk or its children are lvm-based.
2493
2494   @type disk: L{objects.Disk}
2495   @param disk: the disk to check
2496   @rtype: boolean
2497   @return: boolean indicating whether a LD_LV dev_type was found or not
2498
2499   """
2500   if disk.children:
2501     for chdisk in disk.children:
2502       if _RecursiveCheckIfLVMBased(chdisk):
2503         return True
2504   return disk.dev_type == constants.LD_LV
2505
2506
2507 class LUSetClusterParams(LogicalUnit):
2508   """Change the parameters of the cluster.
2509
2510   """
2511   HPATH = "cluster-modify"
2512   HTYPE = constants.HTYPE_CLUSTER
2513   _OP_REQP = [
2514     ("hvparams", _TOr(_TDictOf(_TNEString, _TDict), _TNone)),
2515     ("os_hvp", _TOr(_TDictOf(_TNEString, _TDict), _TNone)),
2516     ("osparams", _TOr(_TDictOf(_TNEString, _TDict), _TNone)),
2517     ("enabled_hypervisors",
2518      _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2519     ]
2520   _OP_DEFS = [
2521     ("candidate_pool_size", None),
2522     ("uid_pool", None),
2523     ("add_uids", None),
2524     ("remove_uids", None),
2525     ("hvparams", None),
2526     ("ov_hvp", None),
2527     ]
2528   REQ_BGL = False
2529
2530   def CheckArguments(self):
2531     """Check parameters
2532
2533     """
2534     if self.op.candidate_pool_size is not None:
2535       try:
2536         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2537       except (ValueError, TypeError), err:
2538         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2539                                    str(err), errors.ECODE_INVAL)
2540       if self.op.candidate_pool_size < 1:
2541         raise errors.OpPrereqError("At least one master candidate needed",
2542                                    errors.ECODE_INVAL)
2543
2544     _CheckBooleanOpField(self.op, "maintain_node_health")
2545
2546     if self.op.uid_pool:
2547       uidpool.CheckUidPool(self.op.uid_pool)
2548
2549     if self.op.add_uids:
2550       uidpool.CheckUidPool(self.op.add_uids)
2551
2552     if self.op.remove_uids:
2553       uidpool.CheckUidPool(self.op.remove_uids)
2554
2555   def ExpandNames(self):
2556     # FIXME: in the future maybe other cluster params won't require checking on
2557     # all nodes to be modified.
2558     self.needed_locks = {
2559       locking.LEVEL_NODE: locking.ALL_SET,
2560     }
2561     self.share_locks[locking.LEVEL_NODE] = 1
2562
2563   def BuildHooksEnv(self):
2564     """Build hooks env.
2565
2566     """
2567     env = {
2568       "OP_TARGET": self.cfg.GetClusterName(),
2569       "NEW_VG_NAME": self.op.vg_name,
2570       }
2571     mn = self.cfg.GetMasterNode()
2572     return env, [mn], [mn]
2573
2574   def CheckPrereq(self):
2575     """Check prerequisites.
2576
2577     This checks whether the given params don't conflict and
2578     if the given volume group is valid.
2579
2580     """
2581     if self.op.vg_name is not None and not self.op.vg_name:
2582       instances = self.cfg.GetAllInstancesInfo().values()
2583       for inst in instances:
2584         for disk in inst.disks:
2585           if _RecursiveCheckIfLVMBased(disk):
2586             raise errors.OpPrereqError("Cannot disable lvm storage while"
2587                                        " lvm-based instances exist",
2588                                        errors.ECODE_INVAL)
2589
2590     node_list = self.acquired_locks[locking.LEVEL_NODE]
2591
2592     # if vg_name not None, checks given volume group on all nodes
2593     if self.op.vg_name:
2594       vglist = self.rpc.call_vg_list(node_list)
2595       for node in node_list:
2596         msg = vglist[node].fail_msg
2597         if msg:
2598           # ignoring down node
2599           self.LogWarning("Error while gathering data on node %s"
2600                           " (ignoring node): %s", node, msg)
2601           continue
2602         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2603                                               self.op.vg_name,
2604                                               constants.MIN_VG_SIZE)
2605         if vgstatus:
2606           raise errors.OpPrereqError("Error on node '%s': %s" %
2607                                      (node, vgstatus), errors.ECODE_ENVIRON)
2608
2609     self.cluster = cluster = self.cfg.GetClusterInfo()
2610     # validate params changes
2611     if self.op.beparams:
2612       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2613       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2614
2615     if self.op.nicparams:
2616       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2617       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2618       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2619       nic_errors = []
2620
2621       # check all instances for consistency
2622       for instance in self.cfg.GetAllInstancesInfo().values():
2623         for nic_idx, nic in enumerate(instance.nics):
2624           params_copy = copy.deepcopy(nic.nicparams)
2625           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2626
2627           # check parameter syntax
2628           try:
2629             objects.NIC.CheckParameterSyntax(params_filled)
2630           except errors.ConfigurationError, err:
2631             nic_errors.append("Instance %s, nic/%d: %s" %
2632                               (instance.name, nic_idx, err))
2633
2634           # if we're moving instances to routed, check that they have an ip
2635           target_mode = params_filled[constants.NIC_MODE]
2636           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2637             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2638                               (instance.name, nic_idx))
2639       if nic_errors:
2640         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2641                                    "\n".join(nic_errors))
2642
2643     # hypervisor list/parameters
2644     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2645     if self.op.hvparams:
2646       for hv_name, hv_dict in self.op.hvparams.items():
2647         if hv_name not in self.new_hvparams:
2648           self.new_hvparams[hv_name] = hv_dict
2649         else:
2650           self.new_hvparams[hv_name].update(hv_dict)
2651
2652     # os hypervisor parameters
2653     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2654     if self.op.os_hvp:
2655       for os_name, hvs in self.op.os_hvp.items():
2656         if os_name not in self.new_os_hvp:
2657           self.new_os_hvp[os_name] = hvs
2658         else:
2659           for hv_name, hv_dict in hvs.items():
2660             if hv_name not in self.new_os_hvp[os_name]:
2661               self.new_os_hvp[os_name][hv_name] = hv_dict
2662             else:
2663               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2664
2665     # os parameters
2666     self.new_osp = objects.FillDict(cluster.osparams, {})
2667     if self.op.osparams:
2668       for os_name, osp in self.op.osparams.items():
2669         if os_name not in self.new_osp:
2670           self.new_osp[os_name] = {}
2671
2672         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2673                                                   use_none=True)
2674
2675         if not self.new_osp[os_name]:
2676           # we removed all parameters
2677           del self.new_osp[os_name]
2678         else:
2679           # check the parameter validity (remote check)
2680           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2681                          os_name, self.new_osp[os_name])
2682
2683     # changes to the hypervisor list
2684     if self.op.enabled_hypervisors is not None:
2685       self.hv_list = self.op.enabled_hypervisors
2686       for hv in self.hv_list:
2687         # if the hypervisor doesn't already exist in the cluster
2688         # hvparams, we initialize it to empty, and then (in both
2689         # cases) we make sure to fill the defaults, as we might not
2690         # have a complete defaults list if the hypervisor wasn't
2691         # enabled before
2692         if hv not in new_hvp:
2693           new_hvp[hv] = {}
2694         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2695         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2696     else:
2697       self.hv_list = cluster.enabled_hypervisors
2698
2699     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2700       # either the enabled list has changed, or the parameters have, validate
2701       for hv_name, hv_params in self.new_hvparams.items():
2702         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2703             (self.op.enabled_hypervisors and
2704              hv_name in self.op.enabled_hypervisors)):
2705           # either this is a new hypervisor, or its parameters have changed
2706           hv_class = hypervisor.GetHypervisor(hv_name)
2707           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2708           hv_class.CheckParameterSyntax(hv_params)
2709           _CheckHVParams(self, node_list, hv_name, hv_params)
2710
2711     if self.op.os_hvp:
2712       # no need to check any newly-enabled hypervisors, since the
2713       # defaults have already been checked in the above code-block
2714       for os_name, os_hvp in self.new_os_hvp.items():
2715         for hv_name, hv_params in os_hvp.items():
2716           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2717           # we need to fill in the new os_hvp on top of the actual hv_p
2718           cluster_defaults = self.new_hvparams.get(hv_name, {})
2719           new_osp = objects.FillDict(cluster_defaults, hv_params)
2720           hv_class = hypervisor.GetHypervisor(hv_name)
2721           hv_class.CheckParameterSyntax(new_osp)
2722           _CheckHVParams(self, node_list, hv_name, new_osp)
2723
2724
2725   def Exec(self, feedback_fn):
2726     """Change the parameters of the cluster.
2727
2728     """
2729     if self.op.vg_name is not None:
2730       new_volume = self.op.vg_name
2731       if not new_volume:
2732         new_volume = None
2733       if new_volume != self.cfg.GetVGName():
2734         self.cfg.SetVGName(new_volume)
2735       else:
2736         feedback_fn("Cluster LVM configuration already in desired"
2737                     " state, not changing")
2738     if self.op.hvparams:
2739       self.cluster.hvparams = self.new_hvparams
2740     if self.op.os_hvp:
2741       self.cluster.os_hvp = self.new_os_hvp
2742     if self.op.enabled_hypervisors is not None:
2743       self.cluster.hvparams = self.new_hvparams
2744       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2745     if self.op.beparams:
2746       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2747     if self.op.nicparams:
2748       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2749     if self.op.osparams:
2750       self.cluster.osparams = self.new_osp
2751
2752     if self.op.candidate_pool_size is not None:
2753       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2754       # we need to update the pool size here, otherwise the save will fail
2755       _AdjustCandidatePool(self, [])
2756
2757     if self.op.maintain_node_health is not None:
2758       self.cluster.maintain_node_health = self.op.maintain_node_health
2759
2760     if self.op.add_uids is not None:
2761       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2762
2763     if self.op.remove_uids is not None:
2764       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2765
2766     if self.op.uid_pool is not None:
2767       self.cluster.uid_pool = self.op.uid_pool
2768
2769     self.cfg.Update(self.cluster, feedback_fn)
2770
2771
2772 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2773   """Distribute additional files which are part of the cluster configuration.
2774
2775   ConfigWriter takes care of distributing the config and ssconf files, but
2776   there are more files which should be distributed to all nodes. This function
2777   makes sure those are copied.
2778
2779   @param lu: calling logical unit
2780   @param additional_nodes: list of nodes not in the config to distribute to
2781
2782   """
2783   # 1. Gather target nodes
2784   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2785   dist_nodes = lu.cfg.GetOnlineNodeList()
2786   if additional_nodes is not None:
2787     dist_nodes.extend(additional_nodes)
2788   if myself.name in dist_nodes:
2789     dist_nodes.remove(myself.name)
2790
2791   # 2. Gather files to distribute
2792   dist_files = set([constants.ETC_HOSTS,
2793                     constants.SSH_KNOWN_HOSTS_FILE,
2794                     constants.RAPI_CERT_FILE,
2795                     constants.RAPI_USERS_FILE,
2796                     constants.CONFD_HMAC_KEY,
2797                     constants.CLUSTER_DOMAIN_SECRET_FILE,
2798                    ])
2799
2800   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2801   for hv_name in enabled_hypervisors:
2802     hv_class = hypervisor.GetHypervisor(hv_name)
2803     dist_files.update(hv_class.GetAncillaryFiles())
2804
2805   # 3. Perform the files upload
2806   for fname in dist_files:
2807     if os.path.exists(fname):
2808       result = lu.rpc.call_upload_file(dist_nodes, fname)
2809       for to_node, to_result in result.items():
2810         msg = to_result.fail_msg
2811         if msg:
2812           msg = ("Copy of file %s to node %s failed: %s" %
2813                  (fname, to_node, msg))
2814           lu.proc.LogWarning(msg)
2815
2816
2817 class LURedistributeConfig(NoHooksLU):
2818   """Force the redistribution of cluster configuration.
2819
2820   This is a very simple LU.
2821
2822   """
2823   _OP_REQP = []
2824   REQ_BGL = False
2825
2826   def ExpandNames(self):
2827     self.needed_locks = {
2828       locking.LEVEL_NODE: locking.ALL_SET,
2829     }
2830     self.share_locks[locking.LEVEL_NODE] = 1
2831
2832   def Exec(self, feedback_fn):
2833     """Redistribute the configuration.
2834
2835     """
2836     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2837     _RedistributeAncillaryFiles(self)
2838
2839
2840 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2841   """Sleep and poll for an instance's disk to sync.
2842
2843   """
2844   if not instance.disks or disks is not None and not disks:
2845     return True
2846
2847   disks = _ExpandCheckDisks(instance, disks)
2848
2849   if not oneshot:
2850     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2851
2852   node = instance.primary_node
2853
2854   for dev in disks:
2855     lu.cfg.SetDiskID(dev, node)
2856
2857   # TODO: Convert to utils.Retry
2858
2859   retries = 0
2860   degr_retries = 10 # in seconds, as we sleep 1 second each time
2861   while True:
2862     max_time = 0
2863     done = True
2864     cumul_degraded = False
2865     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2866     msg = rstats.fail_msg
2867     if msg:
2868       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2869       retries += 1
2870       if retries >= 10:
2871         raise errors.RemoteError("Can't contact node %s for mirror data,"
2872                                  " aborting." % node)
2873       time.sleep(6)
2874       continue
2875     rstats = rstats.payload
2876     retries = 0
2877     for i, mstat in enumerate(rstats):
2878       if mstat is None:
2879         lu.LogWarning("Can't compute data for node %s/%s",
2880                            node, disks[i].iv_name)
2881         continue
2882
2883       cumul_degraded = (cumul_degraded or
2884                         (mstat.is_degraded and mstat.sync_percent is None))
2885       if mstat.sync_percent is not None:
2886         done = False
2887         if mstat.estimated_time is not None:
2888           rem_time = ("%s remaining (estimated)" %
2889                       utils.FormatSeconds(mstat.estimated_time))
2890           max_time = mstat.estimated_time
2891         else:
2892           rem_time = "no time estimate"
2893         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2894                         (disks[i].iv_name, mstat.sync_percent, rem_time))
2895
2896     # if we're done but degraded, let's do a few small retries, to
2897     # make sure we see a stable and not transient situation; therefore
2898     # we force restart of the loop
2899     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2900       logging.info("Degraded disks found, %d retries left", degr_retries)
2901       degr_retries -= 1
2902       time.sleep(1)
2903       continue
2904
2905     if done or oneshot:
2906       break
2907
2908     time.sleep(min(60, max_time))
2909
2910   if done:
2911     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2912   return not cumul_degraded
2913
2914
2915 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2916   """Check that mirrors are not degraded.
2917
2918   The ldisk parameter, if True, will change the test from the
2919   is_degraded attribute (which represents overall non-ok status for
2920   the device(s)) to the ldisk (representing the local storage status).
2921
2922   """
2923   lu.cfg.SetDiskID(dev, node)
2924
2925   result = True
2926
2927   if on_primary or dev.AssembleOnSecondary():
2928     rstats = lu.rpc.call_blockdev_find(node, dev)
2929     msg = rstats.fail_msg
2930     if msg:
2931       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2932       result = False
2933     elif not rstats.payload:
2934       lu.LogWarning("Can't find disk on node %s", node)
2935       result = False
2936     else:
2937       if ldisk:
2938         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2939       else:
2940         result = result and not rstats.payload.is_degraded
2941
2942   if dev.children:
2943     for child in dev.children:
2944       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2945
2946   return result
2947
2948
2949 class LUDiagnoseOS(NoHooksLU):
2950   """Logical unit for OS diagnose/query.
2951
2952   """
2953   _OP_REQP = [
2954     ("output_fields", _TListOf(_TNEString)),
2955     ("names", _TListOf(_TNEString)),
2956     ]
2957   REQ_BGL = False
2958   _FIELDS_STATIC = utils.FieldSet()
2959   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2960                                    "parameters", "api_versions")
2961
2962   def CheckArguments(self):
2963     if self.op.names:
2964       raise errors.OpPrereqError("Selective OS query not supported",
2965                                  errors.ECODE_INVAL)
2966
2967     _CheckOutputFields(static=self._FIELDS_STATIC,
2968                        dynamic=self._FIELDS_DYNAMIC,
2969                        selected=self.op.output_fields)
2970
2971   def ExpandNames(self):
2972     # Lock all nodes, in shared mode
2973     # Temporary removal of locks, should be reverted later
2974     # TODO: reintroduce locks when they are lighter-weight
2975     self.needed_locks = {}
2976     #self.share_locks[locking.LEVEL_NODE] = 1
2977     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2978
2979   @staticmethod
2980   def _DiagnoseByOS(rlist):
2981     """Remaps a per-node return list into an a per-os per-node dictionary
2982
2983     @param rlist: a map with node names as keys and OS objects as values
2984
2985     @rtype: dict
2986     @return: a dictionary with osnames as keys and as value another
2987         map, with nodes as keys and tuples of (path, status, diagnose,
2988         variants, parameters, api_versions) as values, eg::
2989
2990           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2991                                      (/srv/..., False, "invalid api")],
2992                            "node2": [(/srv/..., True, "", [], [])]}
2993           }
2994
2995     """
2996     all_os = {}
2997     # we build here the list of nodes that didn't fail the RPC (at RPC
2998     # level), so that nodes with a non-responding node daemon don't
2999     # make all OSes invalid
3000     good_nodes = [node_name for node_name in rlist
3001                   if not rlist[node_name].fail_msg]
3002     for node_name, nr in rlist.items():
3003       if nr.fail_msg or not nr.payload:
3004         continue
3005       for (name, path, status, diagnose, variants,
3006            params, api_versions) in nr.payload:
3007         if name not in all_os:
3008           # build a list of nodes for this os containing empty lists
3009           # for each node in node_list
3010           all_os[name] = {}
3011           for nname in good_nodes:
3012             all_os[name][nname] = []
3013         # convert params from [name, help] to (name, help)
3014         params = [tuple(v) for v in params]
3015         all_os[name][node_name].append((path, status, diagnose,
3016                                         variants, params, api_versions))
3017     return all_os
3018
3019   def Exec(self, feedback_fn):
3020     """Compute the list of OSes.
3021
3022     """
3023     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3024     node_data = self.rpc.call_os_diagnose(valid_nodes)
3025     pol = self._DiagnoseByOS(node_data)
3026     output = []
3027
3028     for os_name, os_data in pol.items():
3029       row = []
3030       valid = True
3031       (variants, params, api_versions) = null_state = (set(), set(), set())
3032       for idx, osl in enumerate(os_data.values()):
3033         valid = bool(valid and osl and osl[0][1])
3034         if not valid:
3035           (variants, params, api_versions) = null_state
3036           break
3037         node_variants, node_params, node_api = osl[0][3:6]
3038         if idx == 0: # first entry
3039           variants = set(node_variants)
3040           params = set(node_params)
3041           api_versions = set(node_api)
3042         else: # keep consistency
3043           variants.intersection_update(node_variants)
3044           params.intersection_update(node_params)
3045           api_versions.intersection_update(node_api)
3046
3047       for field in self.op.output_fields:
3048         if field == "name":
3049           val = os_name
3050         elif field == "valid":
3051           val = valid
3052         elif field == "node_status":
3053           # this is just a copy of the dict
3054           val = {}
3055           for node_name, nos_list in os_data.items():
3056             val[node_name] = nos_list
3057         elif field == "variants":
3058           val = list(variants)
3059         elif field == "parameters":
3060           val = list(params)
3061         elif field == "api_versions":
3062           val = list(api_versions)
3063         else:
3064           raise errors.ParameterError(field)
3065         row.append(val)
3066       output.append(row)
3067
3068     return output
3069
3070
3071 class LURemoveNode(LogicalUnit):
3072   """Logical unit for removing a node.
3073
3074   """
3075   HPATH = "node-remove"
3076   HTYPE = constants.HTYPE_NODE
3077   _OP_REQP = [("node_name", _TNEString)]
3078
3079   def BuildHooksEnv(self):
3080     """Build hooks env.
3081
3082     This doesn't run on the target node in the pre phase as a failed
3083     node would then be impossible to remove.
3084
3085     """
3086     env = {
3087       "OP_TARGET": self.op.node_name,
3088       "NODE_NAME": self.op.node_name,
3089       }
3090     all_nodes = self.cfg.GetNodeList()
3091     try:
3092       all_nodes.remove(self.op.node_name)
3093     except ValueError:
3094       logging.warning("Node %s which is about to be removed not found"
3095                       " in the all nodes list", self.op.node_name)
3096     return env, all_nodes, all_nodes
3097
3098   def CheckPrereq(self):
3099     """Check prerequisites.
3100
3101     This checks:
3102      - the node exists in the configuration
3103      - it does not have primary or secondary instances
3104      - it's not the master
3105
3106     Any errors are signaled by raising errors.OpPrereqError.
3107
3108     """
3109     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3110     node = self.cfg.GetNodeInfo(self.op.node_name)
3111     assert node is not None
3112
3113     instance_list = self.cfg.GetInstanceList()
3114
3115     masternode = self.cfg.GetMasterNode()
3116     if node.name == masternode:
3117       raise errors.OpPrereqError("Node is the master node,"
3118                                  " you need to failover first.",
3119                                  errors.ECODE_INVAL)
3120
3121     for instance_name in instance_list:
3122       instance = self.cfg.GetInstanceInfo(instance_name)
3123       if node.name in instance.all_nodes:
3124         raise errors.OpPrereqError("Instance %s is still running on the node,"
3125                                    " please remove first." % instance_name,
3126                                    errors.ECODE_INVAL)
3127     self.op.node_name = node.name
3128     self.node = node
3129
3130   def Exec(self, feedback_fn):
3131     """Removes the node from the cluster.
3132
3133     """
3134     node = self.node
3135     logging.info("Stopping the node daemon and removing configs from node %s",
3136                  node.name)
3137
3138     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3139
3140     # Promote nodes to master candidate as needed
3141     _AdjustCandidatePool(self, exceptions=[node.name])
3142     self.context.RemoveNode(node.name)
3143
3144     # Run post hooks on the node before it's removed
3145     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3146     try:
3147       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3148     except:
3149       # pylint: disable-msg=W0702
3150       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3151
3152     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3153     msg = result.fail_msg
3154     if msg:
3155       self.LogWarning("Errors encountered on the remote node while leaving"
3156                       " the cluster: %s", msg)
3157
3158     # Remove node from our /etc/hosts
3159     if self.cfg.GetClusterInfo().modify_etc_hosts:
3160       # FIXME: this should be done via an rpc call to node daemon
3161       utils.RemoveHostFromEtcHosts(node.name)
3162       _RedistributeAncillaryFiles(self)
3163
3164
3165 class LUQueryNodes(NoHooksLU):
3166   """Logical unit for querying nodes.
3167
3168   """
3169   # pylint: disable-msg=W0142
3170   _OP_REQP = [
3171     ("output_fields", _TListOf(_TNEString)),
3172     ("names", _TListOf(_TNEString)),
3173     ("use_locking", _TBool),
3174     ]
3175   REQ_BGL = False
3176
3177   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3178                     "master_candidate", "offline", "drained"]
3179
3180   _FIELDS_DYNAMIC = utils.FieldSet(
3181     "dtotal", "dfree",
3182     "mtotal", "mnode", "mfree",
3183     "bootid",
3184     "ctotal", "cnodes", "csockets",
3185     )
3186
3187   _FIELDS_STATIC = utils.FieldSet(*[
3188     "pinst_cnt", "sinst_cnt",
3189     "pinst_list", "sinst_list",
3190     "pip", "sip", "tags",
3191     "master",
3192     "role"] + _SIMPLE_FIELDS
3193     )
3194
3195   def CheckArguments(self):
3196     _CheckOutputFields(static=self._FIELDS_STATIC,
3197                        dynamic=self._FIELDS_DYNAMIC,
3198                        selected=self.op.output_fields)
3199
3200   def ExpandNames(self):
3201     self.needed_locks = {}
3202     self.share_locks[locking.LEVEL_NODE] = 1
3203
3204     if self.op.names:
3205       self.wanted = _GetWantedNodes(self, self.op.names)
3206     else:
3207       self.wanted = locking.ALL_SET
3208
3209     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3210     self.do_locking = self.do_node_query and self.op.use_locking
3211     if self.do_locking:
3212       # if we don't request only static fields, we need to lock the nodes
3213       self.needed_locks[locking.LEVEL_NODE] = self.wanted
3214
3215   def Exec(self, feedback_fn):
3216     """Computes the list of nodes and their attributes.
3217
3218     """
3219     all_info = self.cfg.GetAllNodesInfo()
3220     if self.do_locking:
3221       nodenames = self.acquired_locks[locking.LEVEL_NODE]
3222     elif self.wanted != locking.ALL_SET:
3223       nodenames = self.wanted
3224       missing = set(nodenames).difference(all_info.keys())
3225       if missing:
3226         raise errors.OpExecError(
3227           "Some nodes were removed before retrieving their data: %s" % missing)
3228     else:
3229       nodenames = all_info.keys()
3230
3231     nodenames = utils.NiceSort(nodenames)
3232     nodelist = [all_info[name] for name in nodenames]
3233
3234     # begin data gathering
3235
3236     if self.do_node_query:
3237       live_data = {}
3238       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3239                                           self.cfg.GetHypervisorType())
3240       for name in nodenames:
3241         nodeinfo = node_data[name]
3242         if not nodeinfo.fail_msg and nodeinfo.payload:
3243           nodeinfo = nodeinfo.payload
3244           fn = utils.TryConvert
3245           live_data[name] = {
3246             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3247             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3248             "mfree": fn(int, nodeinfo.get('memory_free', None)),
3249             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3250             "dfree": fn(int, nodeinfo.get('vg_free', None)),
3251             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3252             "bootid": nodeinfo.get('bootid', None),
3253             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3254             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3255             }
3256         else:
3257           live_data[name] = {}
3258     else:
3259       live_data = dict.fromkeys(nodenames, {})
3260
3261     node_to_primary = dict([(name, set()) for name in nodenames])
3262     node_to_secondary = dict([(name, set()) for name in nodenames])
3263
3264     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3265                              "sinst_cnt", "sinst_list"))
3266     if inst_fields & frozenset(self.op.output_fields):
3267       inst_data = self.cfg.GetAllInstancesInfo()
3268
3269       for inst in inst_data.values():
3270         if inst.primary_node in node_to_primary:
3271           node_to_primary[inst.primary_node].add(inst.name)
3272         for secnode in inst.secondary_nodes:
3273           if secnode in node_to_secondary:
3274             node_to_secondary[secnode].add(inst.name)
3275
3276     master_node = self.cfg.GetMasterNode()
3277
3278     # end data gathering
3279
3280     output = []
3281     for node in nodelist:
3282       node_output = []
3283       for field in self.op.output_fields:
3284         if field in self._SIMPLE_FIELDS:
3285           val = getattr(node, field)
3286         elif field == "pinst_list":
3287           val = list(node_to_primary[node.name])
3288         elif field == "sinst_list":
3289           val = list(node_to_secondary[node.name])
3290         elif field == "pinst_cnt":
3291           val = len(node_to_primary[node.name])
3292         elif field == "sinst_cnt":
3293           val = len(node_to_secondary[node.name])
3294         elif field == "pip":
3295           val = node.primary_ip
3296         elif field == "sip":
3297           val = node.secondary_ip
3298         elif field == "tags":
3299           val = list(node.GetTags())
3300         elif field == "master":
3301           val = node.name == master_node
3302         elif self._FIELDS_DYNAMIC.Matches(field):
3303           val = live_data[node.name].get(field, None)
3304         elif field == "role":
3305           if node.name == master_node:
3306             val = "M"
3307           elif node.master_candidate:
3308             val = "C"
3309           elif node.drained:
3310             val = "D"
3311           elif node.offline:
3312             val = "O"
3313           else:
3314             val = "R"
3315         else:
3316           raise errors.ParameterError(field)
3317         node_output.append(val)
3318       output.append(node_output)
3319
3320     return output
3321
3322
3323 class LUQueryNodeVolumes(NoHooksLU):
3324   """Logical unit for getting volumes on node(s).
3325
3326   """
3327   _OP_REQP = [
3328     ("nodes", _TListOf(_TNEString)),
3329     ("output_fields", _TListOf(_TNEString)),
3330     ]
3331   REQ_BGL = False
3332   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3333   _FIELDS_STATIC = utils.FieldSet("node")
3334
3335   def CheckArguments(self):
3336     _CheckOutputFields(static=self._FIELDS_STATIC,
3337                        dynamic=self._FIELDS_DYNAMIC,
3338                        selected=self.op.output_fields)
3339
3340   def ExpandNames(self):
3341     self.needed_locks = {}
3342     self.share_locks[locking.LEVEL_NODE] = 1
3343     if not self.op.nodes:
3344       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3345     else:
3346       self.needed_locks[locking.LEVEL_NODE] = \
3347         _GetWantedNodes(self, self.op.nodes)
3348
3349   def Exec(self, feedback_fn):
3350     """Computes the list of nodes and their attributes.
3351
3352     """
3353     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3354     volumes = self.rpc.call_node_volumes(nodenames)
3355
3356     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3357              in self.cfg.GetInstanceList()]
3358
3359     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3360
3361     output = []
3362     for node in nodenames:
3363       nresult = volumes[node]
3364       if nresult.offline:
3365         continue
3366       msg = nresult.fail_msg
3367       if msg:
3368         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3369         continue
3370
3371       node_vols = nresult.payload[:]
3372       node_vols.sort(key=lambda vol: vol['dev'])
3373
3374       for vol in node_vols:
3375         node_output = []
3376         for field in self.op.output_fields:
3377           if field == "node":
3378             val = node
3379           elif field == "phys":
3380             val = vol['dev']
3381           elif field == "vg":
3382             val = vol['vg']
3383           elif field == "name":
3384             val = vol['name']
3385           elif field == "size":
3386             val = int(float(vol['size']))
3387           elif field == "instance":
3388             for inst in ilist:
3389               if node not in lv_by_node[inst]:
3390                 continue
3391               if vol['name'] in lv_by_node[inst][node]:
3392                 val = inst.name
3393                 break
3394             else:
3395               val = '-'
3396           else:
3397             raise errors.ParameterError(field)
3398           node_output.append(str(val))
3399
3400         output.append(node_output)
3401
3402     return output
3403
3404
3405 class LUQueryNodeStorage(NoHooksLU):
3406   """Logical unit for getting information on storage units on node(s).
3407
3408   """
3409   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3410   _OP_REQP = [
3411     ("nodes", _TListOf(_TNEString)),
3412     ("storage_type", _CheckStorageType),
3413     ("output_fields", _TListOf(_TNEString)),
3414     ]
3415   _OP_DEFS = [("name", None)]
3416   REQ_BGL = False
3417
3418   def CheckArguments(self):
3419     _CheckOutputFields(static=self._FIELDS_STATIC,
3420                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3421                        selected=self.op.output_fields)
3422
3423   def ExpandNames(self):
3424     self.needed_locks = {}
3425     self.share_locks[locking.LEVEL_NODE] = 1
3426
3427     if self.op.nodes:
3428       self.needed_locks[locking.LEVEL_NODE] = \
3429         _GetWantedNodes(self, self.op.nodes)
3430     else:
3431       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3432
3433   def Exec(self, feedback_fn):
3434     """Computes the list of nodes and their attributes.
3435
3436     """
3437     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3438
3439     # Always get name to sort by
3440     if constants.SF_NAME in self.op.output_fields:
3441       fields = self.op.output_fields[:]
3442     else:
3443       fields = [constants.SF_NAME] + self.op.output_fields
3444
3445     # Never ask for node or type as it's only known to the LU
3446     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3447       while extra in fields:
3448         fields.remove(extra)
3449
3450     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3451     name_idx = field_idx[constants.SF_NAME]
3452
3453     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3454     data = self.rpc.call_storage_list(self.nodes,
3455                                       self.op.storage_type, st_args,
3456                                       self.op.name, fields)
3457
3458     result = []
3459
3460     for node in utils.NiceSort(self.nodes):
3461       nresult = data[node]
3462       if nresult.offline:
3463         continue
3464
3465       msg = nresult.fail_msg
3466       if msg:
3467         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3468         continue
3469
3470       rows = dict([(row[name_idx], row) for row in nresult.payload])
3471
3472       for name in utils.NiceSort(rows.keys()):
3473         row = rows[name]
3474
3475         out = []
3476
3477         for field in self.op.output_fields:
3478           if field == constants.SF_NODE:
3479             val = node
3480           elif field == constants.SF_TYPE:
3481             val = self.op.storage_type
3482           elif field in field_idx:
3483             val = row[field_idx[field]]
3484           else:
3485             raise errors.ParameterError(field)
3486
3487           out.append(val)
3488
3489         result.append(out)
3490
3491     return result
3492
3493
3494 class LUModifyNodeStorage(NoHooksLU):
3495   """Logical unit for modifying a storage volume on a node.
3496
3497   """
3498   _OP_REQP = [
3499     ("node_name", _TNEString),
3500     ("storage_type", _CheckStorageType),
3501     ("name", _TNEString),
3502     ("changes", _TDict),
3503     ]
3504   REQ_BGL = False
3505
3506   def CheckArguments(self):
3507     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3508
3509     storage_type = self.op.storage_type
3510
3511     try:
3512       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3513     except KeyError:
3514       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3515                                  " modified" % storage_type,
3516                                  errors.ECODE_INVAL)
3517
3518     diff = set(self.op.changes.keys()) - modifiable
3519     if diff:
3520       raise errors.OpPrereqError("The following fields can not be modified for"
3521                                  " storage units of type '%s': %r" %
3522                                  (storage_type, list(diff)),
3523                                  errors.ECODE_INVAL)
3524
3525   def ExpandNames(self):
3526     self.needed_locks = {
3527       locking.LEVEL_NODE: self.op.node_name,
3528       }
3529
3530   def Exec(self, feedback_fn):
3531     """Computes the list of nodes and their attributes.
3532
3533     """
3534     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3535     result = self.rpc.call_storage_modify(self.op.node_name,
3536                                           self.op.storage_type, st_args,
3537                                           self.op.name, self.op.changes)
3538     result.Raise("Failed to modify storage unit '%s' on %s" %
3539                  (self.op.name, self.op.node_name))
3540
3541
3542 class LUAddNode(LogicalUnit):
3543   """Logical unit for adding node to the cluster.
3544
3545   """
3546   HPATH = "node-add"
3547   HTYPE = constants.HTYPE_NODE
3548   _OP_REQP = [
3549     ("node_name", _TNEString),
3550     ]
3551   _OP_DEFS = [("secondary_ip", None)]
3552
3553   def CheckArguments(self):
3554     # validate/normalize the node name
3555     self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3556
3557   def BuildHooksEnv(self):
3558     """Build hooks env.
3559
3560     This will run on all nodes before, and on all nodes + the new node after.
3561
3562     """
3563     env = {
3564       "OP_TARGET": self.op.node_name,
3565       "NODE_NAME": self.op.node_name,
3566       "NODE_PIP": self.op.primary_ip,
3567       "NODE_SIP": self.op.secondary_ip,
3568       }
3569     nodes_0 = self.cfg.GetNodeList()
3570     nodes_1 = nodes_0 + [self.op.node_name, ]
3571     return env, nodes_0, nodes_1
3572
3573   def CheckPrereq(self):
3574     """Check prerequisites.
3575
3576     This checks:
3577      - the new node is not already in the config
3578      - it is resolvable
3579      - its parameters (single/dual homed) matches the cluster
3580
3581     Any errors are signaled by raising errors.OpPrereqError.
3582
3583     """
3584     node_name = self.op.node_name
3585     cfg = self.cfg
3586
3587     dns_data = utils.GetHostInfo(node_name)
3588
3589     node = dns_data.name
3590     primary_ip = self.op.primary_ip = dns_data.ip
3591     if self.op.secondary_ip is None:
3592       self.op.secondary_ip = primary_ip
3593     if not utils.IsValidIP(self.op.secondary_ip):
3594       raise errors.OpPrereqError("Invalid secondary IP given",
3595                                  errors.ECODE_INVAL)
3596     secondary_ip = self.op.secondary_ip
3597
3598     node_list = cfg.GetNodeList()
3599     if not self.op.readd and node in node_list:
3600       raise errors.OpPrereqError("Node %s is already in the configuration" %
3601                                  node, errors.ECODE_EXISTS)
3602     elif self.op.readd and node not in node_list:
3603       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3604                                  errors.ECODE_NOENT)
3605
3606     self.changed_primary_ip = False
3607
3608     for existing_node_name in node_list:
3609       existing_node = cfg.GetNodeInfo(existing_node_name)
3610
3611       if self.op.readd and node == existing_node_name:
3612         if existing_node.secondary_ip != secondary_ip:
3613           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3614                                      " address configuration as before",
3615                                      errors.ECODE_INVAL)
3616         if existing_node.primary_ip != primary_ip:
3617           self.changed_primary_ip = True
3618
3619         continue
3620
3621       if (existing_node.primary_ip == primary_ip or
3622           existing_node.secondary_ip == primary_ip or
3623           existing_node.primary_ip == secondary_ip or
3624           existing_node.secondary_ip == secondary_ip):
3625         raise errors.OpPrereqError("New node ip address(es) conflict with"
3626                                    " existing node %s" % existing_node.name,
3627                                    errors.ECODE_NOTUNIQUE)
3628
3629     # check that the type of the node (single versus dual homed) is the
3630     # same as for the master
3631     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3632     master_singlehomed = myself.secondary_ip == myself.primary_ip
3633     newbie_singlehomed = secondary_ip == primary_ip
3634     if master_singlehomed != newbie_singlehomed:
3635       if master_singlehomed:
3636         raise errors.OpPrereqError("The master has no private ip but the"
3637                                    " new node has one",
3638                                    errors.ECODE_INVAL)
3639       else:
3640         raise errors.OpPrereqError("The master has a private ip but the"
3641                                    " new node doesn't have one",
3642                                    errors.ECODE_INVAL)
3643
3644     # checks reachability
3645     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3646       raise errors.OpPrereqError("Node not reachable by ping",
3647                                  errors.ECODE_ENVIRON)
3648
3649     if not newbie_singlehomed:
3650       # check reachability from my secondary ip to newbie's secondary ip
3651       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3652                            source=myself.secondary_ip):
3653         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3654                                    " based ping to noded port",
3655                                    errors.ECODE_ENVIRON)
3656
3657     if self.op.readd:
3658       exceptions = [node]
3659     else:
3660       exceptions = []
3661
3662     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3663
3664     if self.op.readd:
3665       self.new_node = self.cfg.GetNodeInfo(node)
3666       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3667     else:
3668       self.new_node = objects.Node(name=node,
3669                                    primary_ip=primary_ip,
3670                                    secondary_ip=secondary_ip,
3671                                    master_candidate=self.master_candidate,
3672                                    offline=False, drained=False)
3673
3674   def Exec(self, feedback_fn):
3675     """Adds the new node to the cluster.
3676
3677     """
3678     new_node = self.new_node
3679     node = new_node.name
3680
3681     # for re-adds, reset the offline/drained/master-candidate flags;
3682     # we need to reset here, otherwise offline would prevent RPC calls
3683     # later in the procedure; this also means that if the re-add
3684     # fails, we are left with a non-offlined, broken node
3685     if self.op.readd:
3686       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3687       self.LogInfo("Readding a node, the offline/drained flags were reset")
3688       # if we demote the node, we do cleanup later in the procedure
3689       new_node.master_candidate = self.master_candidate
3690       if self.changed_primary_ip:
3691         new_node.primary_ip = self.op.primary_ip
3692
3693     # notify the user about any possible mc promotion
3694     if new_node.master_candidate:
3695       self.LogInfo("Node will be a master candidate")
3696
3697     # check connectivity
3698     result = self.rpc.call_version([node])[node]
3699     result.Raise("Can't get version information from node %s" % node)
3700     if constants.PROTOCOL_VERSION == result.payload:
3701       logging.info("Communication to node %s fine, sw version %s match",
3702                    node, result.payload)
3703     else:
3704       raise errors.OpExecError("Version mismatch master version %s,"
3705                                " node version %s" %
3706                                (constants.PROTOCOL_VERSION, result.payload))
3707
3708     # setup ssh on node
3709     if self.cfg.GetClusterInfo().modify_ssh_setup:
3710       logging.info("Copy ssh key to node %s", node)
3711       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3712       keyarray = []
3713       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3714                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3715                   priv_key, pub_key]
3716
3717       for i in keyfiles:
3718         keyarray.append(utils.ReadFile(i))
3719
3720       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3721                                       keyarray[2], keyarray[3], keyarray[4],
3722                                       keyarray[5])
3723       result.Raise("Cannot transfer ssh keys to the new node")
3724
3725     # Add node to our /etc/hosts, and add key to known_hosts
3726     if self.cfg.GetClusterInfo().modify_etc_hosts:
3727       # FIXME: this should be done via an rpc call to node daemon
3728       utils.AddHostToEtcHosts(new_node.name)
3729
3730     if new_node.secondary_ip != new_node.primary_ip:
3731       result = self.rpc.call_node_has_ip_address(new_node.name,
3732                                                  new_node.secondary_ip)
3733       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3734                    prereq=True, ecode=errors.ECODE_ENVIRON)
3735       if not result.payload:
3736         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3737                                  " you gave (%s). Please fix and re-run this"
3738                                  " command." % new_node.secondary_ip)
3739
3740     node_verify_list = [self.cfg.GetMasterNode()]
3741     node_verify_param = {
3742       constants.NV_NODELIST: [node],
3743       # TODO: do a node-net-test as well?
3744     }
3745
3746     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3747                                        self.cfg.GetClusterName())
3748     for verifier in node_verify_list:
3749       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3750       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3751       if nl_payload:
3752         for failed in nl_payload:
3753           feedback_fn("ssh/hostname verification failed"
3754                       " (checking from %s): %s" %
3755                       (verifier, nl_payload[failed]))
3756         raise errors.OpExecError("ssh/hostname verification failed.")
3757
3758     if self.op.readd:
3759       _RedistributeAncillaryFiles(self)
3760       self.context.ReaddNode(new_node)
3761       # make sure we redistribute the config
3762       self.cfg.Update(new_node, feedback_fn)
3763       # and make sure the new node will not have old files around
3764       if not new_node.master_candidate:
3765         result = self.rpc.call_node_demote_from_mc(new_node.name)
3766         msg = result.fail_msg
3767         if msg:
3768           self.LogWarning("Node failed to demote itself from master"
3769                           " candidate status: %s" % msg)
3770     else:
3771       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3772       self.context.AddNode(new_node, self.proc.GetECId())
3773
3774
3775 class LUSetNodeParams(LogicalUnit):
3776   """Modifies the parameters of a node.
3777
3778   """
3779   HPATH = "node-modify"
3780   HTYPE = constants.HTYPE_NODE
3781   _OP_REQP = [("node_name", _TNEString)]
3782   REQ_BGL = False
3783
3784   def CheckArguments(self):
3785     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3786     _CheckBooleanOpField(self.op, 'master_candidate')
3787     _CheckBooleanOpField(self.op, 'offline')
3788     _CheckBooleanOpField(self.op, 'drained')
3789     _CheckBooleanOpField(self.op, 'auto_promote')
3790     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3791     if all_mods.count(None) == 3:
3792       raise errors.OpPrereqError("Please pass at least one modification",
3793                                  errors.ECODE_INVAL)
3794     if all_mods.count(True) > 1:
3795       raise errors.OpPrereqError("Can't set the node into more than one"
3796                                  " state at the same time",
3797                                  errors.ECODE_INVAL)
3798
3799     # Boolean value that tells us whether we're offlining or draining the node
3800     self.offline_or_drain = (self.op.offline == True or
3801                              self.op.drained == True)
3802     self.deoffline_or_drain = (self.op.offline == False or
3803                                self.op.drained == False)
3804     self.might_demote = (self.op.master_candidate == False or
3805                          self.offline_or_drain)
3806
3807     self.lock_all = self.op.auto_promote and self.might_demote
3808
3809
3810   def ExpandNames(self):
3811     if self.lock_all:
3812       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3813     else:
3814       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3815
3816   def BuildHooksEnv(self):
3817     """Build hooks env.
3818
3819     This runs on the master node.
3820
3821     """
3822     env = {
3823       "OP_TARGET": self.op.node_name,
3824       "MASTER_CANDIDATE": str(self.op.master_candidate),
3825       "OFFLINE": str(self.op.offline),
3826       "DRAINED": str(self.op.drained),
3827       }
3828     nl = [self.cfg.GetMasterNode(),
3829           self.op.node_name]
3830     return env, nl, nl
3831
3832   def CheckPrereq(self):
3833     """Check prerequisites.
3834
3835     This only checks the instance list against the existing names.
3836
3837     """
3838     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3839
3840     if (self.op.master_candidate is not None or
3841         self.op.drained is not None or
3842         self.op.offline is not None):
3843       # we can't change the master's node flags
3844       if self.op.node_name == self.cfg.GetMasterNode():
3845         raise errors.OpPrereqError("The master role can be changed"
3846                                    " only via masterfailover",
3847                                    errors.ECODE_INVAL)
3848
3849
3850     if node.master_candidate and self.might_demote and not self.lock_all:
3851       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3852       # check if after removing the current node, we're missing master
3853       # candidates
3854       (mc_remaining, mc_should, _) = \
3855           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3856       if mc_remaining < mc_should:
3857         raise errors.OpPrereqError("Not enough master candidates, please"
3858                                    " pass auto_promote to allow promotion",
3859                                    errors.ECODE_INVAL)
3860
3861     if (self.op.master_candidate == True and
3862         ((node.offline and not self.op.offline == False) or
3863          (node.drained and not self.op.drained == False))):
3864       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3865                                  " to master_candidate" % node.name,
3866                                  errors.ECODE_INVAL)
3867
3868     # If we're being deofflined/drained, we'll MC ourself if needed
3869     if (self.deoffline_or_drain and not self.offline_or_drain and not
3870         self.op.master_candidate == True and not node.master_candidate):
3871       self.op.master_candidate = _DecideSelfPromotion(self)
3872       if self.op.master_candidate:
3873         self.LogInfo("Autopromoting node to master candidate")
3874
3875     return
3876
3877   def Exec(self, feedback_fn):
3878     """Modifies a node.
3879
3880     """
3881     node = self.node
3882
3883     result = []
3884     changed_mc = False
3885
3886     if self.op.offline is not None:
3887       node.offline = self.op.offline
3888       result.append(("offline", str(self.op.offline)))
3889       if self.op.offline == True:
3890         if node.master_candidate:
3891           node.master_candidate = False
3892           changed_mc = True
3893           result.append(("master_candidate", "auto-demotion due to offline"))
3894         if node.drained:
3895           node.drained = False
3896           result.append(("drained", "clear drained status due to offline"))
3897
3898     if self.op.master_candidate is not None:
3899       node.master_candidate = self.op.master_candidate
3900       changed_mc = True
3901       result.append(("master_candidate", str(self.op.master_candidate)))
3902       if self.op.master_candidate == False:
3903         rrc = self.rpc.call_node_demote_from_mc(node.name)
3904         msg = rrc.fail_msg
3905         if msg:
3906           self.LogWarning("Node failed to demote itself: %s" % msg)
3907
3908     if self.op.drained is not None:
3909       node.drained = self.op.drained
3910       result.append(("drained", str(self.op.drained)))
3911       if self.op.drained == True:
3912         if node.master_candidate:
3913           node.master_candidate = False
3914           changed_mc = True
3915           result.append(("master_candidate", "auto-demotion due to drain"))
3916           rrc = self.rpc.call_node_demote_from_mc(node.name)
3917           msg = rrc.fail_msg
3918           if msg:
3919             self.LogWarning("Node failed to demote itself: %s" % msg)
3920         if node.offline:
3921           node.offline = False
3922           result.append(("offline", "clear offline status due to drain"))
3923
3924     # we locked all nodes, we adjust the CP before updating this node
3925     if self.lock_all:
3926       _AdjustCandidatePool(self, [node.name])
3927
3928     # this will trigger configuration file update, if needed
3929     self.cfg.Update(node, feedback_fn)
3930
3931     # this will trigger job queue propagation or cleanup
3932     if changed_mc:
3933       self.context.ReaddNode(node)
3934
3935     return result
3936
3937
3938 class LUPowercycleNode(NoHooksLU):
3939   """Powercycles a node.
3940
3941   """
3942   _OP_REQP = [
3943     ("node_name", _TNEString),
3944     ("force", _TBool),
3945     ]
3946   REQ_BGL = False
3947
3948   def CheckArguments(self):
3949     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3950     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3951       raise errors.OpPrereqError("The node is the master and the force"
3952                                  " parameter was not set",
3953                                  errors.ECODE_INVAL)
3954
3955   def ExpandNames(self):
3956     """Locking for PowercycleNode.
3957
3958     This is a last-resort option and shouldn't block on other
3959     jobs. Therefore, we grab no locks.
3960
3961     """
3962     self.needed_locks = {}
3963
3964   def Exec(self, feedback_fn):
3965     """Reboots a node.
3966
3967     """
3968     result = self.rpc.call_node_powercycle(self.op.node_name,
3969                                            self.cfg.GetHypervisorType())
3970     result.Raise("Failed to schedule the reboot")
3971     return result.payload
3972
3973
3974 class LUQueryClusterInfo(NoHooksLU):
3975   """Query cluster configuration.
3976
3977   """
3978   _OP_REQP = []
3979   REQ_BGL = False
3980
3981   def ExpandNames(self):
3982     self.needed_locks = {}
3983
3984   def Exec(self, feedback_fn):
3985     """Return cluster config.
3986
3987     """
3988     cluster = self.cfg.GetClusterInfo()
3989     os_hvp = {}
3990
3991     # Filter just for enabled hypervisors
3992     for os_name, hv_dict in cluster.os_hvp.items():
3993       os_hvp[os_name] = {}
3994       for hv_name, hv_params in hv_dict.items():
3995         if hv_name in cluster.enabled_hypervisors:
3996           os_hvp[os_name][hv_name] = hv_params
3997
3998     result = {
3999       "software_version": constants.RELEASE_VERSION,
4000       "protocol_version": constants.PROTOCOL_VERSION,
4001       "config_version": constants.CONFIG_VERSION,
4002       "os_api_version": max(constants.OS_API_VERSIONS),
4003       "export_version": constants.EXPORT_VERSION,
4004       "architecture": (platform.architecture()[0], platform.machine()),
4005       "name": cluster.cluster_name,
4006       "master": cluster.master_node,
4007       "default_hypervisor": cluster.enabled_hypervisors[0],
4008       "enabled_hypervisors": cluster.enabled_hypervisors,
4009       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4010                         for hypervisor_name in cluster.enabled_hypervisors]),
4011       "os_hvp": os_hvp,
4012       "beparams": cluster.beparams,
4013       "osparams": cluster.osparams,
4014       "nicparams": cluster.nicparams,
4015       "candidate_pool_size": cluster.candidate_pool_size,
4016       "master_netdev": cluster.master_netdev,
4017       "volume_group_name": cluster.volume_group_name,
4018       "file_storage_dir": cluster.file_storage_dir,
4019       "maintain_node_health": cluster.maintain_node_health,
4020       "ctime": cluster.ctime,
4021       "mtime": cluster.mtime,
4022       "uuid": cluster.uuid,
4023       "tags": list(cluster.GetTags()),
4024       "uid_pool": cluster.uid_pool,
4025       }
4026
4027     return result
4028
4029
4030 class LUQueryConfigValues(NoHooksLU):
4031   """Return configuration values.
4032
4033   """
4034   _OP_REQP = []
4035   REQ_BGL = False
4036   _FIELDS_DYNAMIC = utils.FieldSet()
4037   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4038                                   "watcher_pause")
4039
4040   def CheckArguments(self):
4041     _CheckOutputFields(static=self._FIELDS_STATIC,
4042                        dynamic=self._FIELDS_DYNAMIC,
4043                        selected=self.op.output_fields)
4044
4045   def ExpandNames(self):
4046     self.needed_locks = {}
4047
4048   def Exec(self, feedback_fn):
4049     """Dump a representation of the cluster config to the standard output.
4050
4051     """
4052     values = []
4053     for field in self.op.output_fields:
4054       if field == "cluster_name":
4055         entry = self.cfg.GetClusterName()
4056       elif field == "master_node":
4057         entry = self.cfg.GetMasterNode()
4058       elif field == "drain_flag":
4059         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4060       elif field == "watcher_pause":
4061         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4062       else:
4063         raise errors.ParameterError(field)
4064       values.append(entry)
4065     return values
4066
4067
4068 class LUActivateInstanceDisks(NoHooksLU):
4069   """Bring up an instance's disks.
4070
4071   """
4072   _OP_REQP = [("instance_name", _TNEString)]
4073   _OP_DEFS = [("ignore_size", False)]
4074   REQ_BGL = False
4075
4076   def ExpandNames(self):
4077     self._ExpandAndLockInstance()
4078     self.needed_locks[locking.LEVEL_NODE] = []
4079     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4080
4081   def DeclareLocks(self, level):
4082     if level == locking.LEVEL_NODE:
4083       self._LockInstancesNodes()
4084
4085   def CheckPrereq(self):
4086     """Check prerequisites.
4087
4088     This checks that the instance is in the cluster.
4089
4090     """
4091     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4092     assert self.instance is not None, \
4093       "Cannot retrieve locked instance %s" % self.op.instance_name
4094     _CheckNodeOnline(self, self.instance.primary_node)
4095
4096   def Exec(self, feedback_fn):
4097     """Activate the disks.
4098
4099     """
4100     disks_ok, disks_info = \
4101               _AssembleInstanceDisks(self, self.instance,
4102                                      ignore_size=self.op.ignore_size)
4103     if not disks_ok:
4104       raise errors.OpExecError("Cannot activate block devices")
4105
4106     return disks_info
4107
4108
4109 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4110                            ignore_size=False):
4111   """Prepare the block devices for an instance.
4112
4113   This sets up the block devices on all nodes.
4114
4115   @type lu: L{LogicalUnit}
4116   @param lu: the logical unit on whose behalf we execute
4117   @type instance: L{objects.Instance}
4118   @param instance: the instance for whose disks we assemble
4119   @type disks: list of L{objects.Disk} or None
4120   @param disks: which disks to assemble (or all, if None)
4121   @type ignore_secondaries: boolean
4122   @param ignore_secondaries: if true, errors on secondary nodes
4123       won't result in an error return from the function
4124   @type ignore_size: boolean
4125   @param ignore_size: if true, the current known size of the disk
4126       will not be used during the disk activation, useful for cases
4127       when the size is wrong
4128   @return: False if the operation failed, otherwise a list of
4129       (host, instance_visible_name, node_visible_name)
4130       with the mapping from node devices to instance devices
4131
4132   """
4133   device_info = []
4134   disks_ok = True
4135   iname = instance.name
4136   disks = _ExpandCheckDisks(instance, disks)
4137
4138   # With the two passes mechanism we try to reduce the window of
4139   # opportunity for the race condition of switching DRBD to primary
4140   # before handshaking occured, but we do not eliminate it
4141
4142   # The proper fix would be to wait (with some limits) until the
4143   # connection has been made and drbd transitions from WFConnection
4144   # into any other network-connected state (Connected, SyncTarget,
4145   # SyncSource, etc.)
4146
4147   # 1st pass, assemble on all nodes in secondary mode
4148   for inst_disk in disks:
4149     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4150       if ignore_size:
4151         node_disk = node_disk.Copy()
4152         node_disk.UnsetSize()
4153       lu.cfg.SetDiskID(node_disk, node)
4154       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4155       msg = result.fail_msg
4156       if msg:
4157         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4158                            " (is_primary=False, pass=1): %s",
4159                            inst_disk.iv_name, node, msg)
4160         if not ignore_secondaries:
4161           disks_ok = False
4162
4163   # FIXME: race condition on drbd migration to primary
4164
4165   # 2nd pass, do only the primary node
4166   for inst_disk in disks:
4167     dev_path = None
4168
4169     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4170       if node != instance.primary_node:
4171         continue
4172       if ignore_size:
4173         node_disk = node_disk.Copy()
4174         node_disk.UnsetSize()
4175       lu.cfg.SetDiskID(node_disk, node)
4176       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4177       msg = result.fail_msg
4178       if msg:
4179         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4180                            " (is_primary=True, pass=2): %s",
4181                            inst_disk.iv_name, node, msg)
4182         disks_ok = False
4183       else:
4184         dev_path = result.payload
4185
4186     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4187
4188   # leave the disks configured for the primary node
4189   # this is a workaround that would be fixed better by
4190   # improving the logical/physical id handling
4191   for disk in disks:
4192     lu.cfg.SetDiskID(disk, instance.primary_node)
4193
4194   return disks_ok, device_info
4195
4196
4197 def _StartInstanceDisks(lu, instance, force):
4198   """Start the disks of an instance.
4199
4200   """
4201   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4202                                            ignore_secondaries=force)
4203   if not disks_ok:
4204     _ShutdownInstanceDisks(lu, instance)
4205     if force is not None and not force:
4206       lu.proc.LogWarning("", hint="If the message above refers to a"
4207                          " secondary node,"
4208                          " you can retry the operation using '--force'.")
4209     raise errors.OpExecError("Disk consistency error")
4210
4211
4212 class LUDeactivateInstanceDisks(NoHooksLU):
4213   """Shutdown an instance's disks.
4214
4215   """
4216   _OP_REQP = [("instance_name", _TNEString)]
4217   REQ_BGL = False
4218
4219   def ExpandNames(self):
4220     self._ExpandAndLockInstance()
4221     self.needed_locks[locking.LEVEL_NODE] = []
4222     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4223
4224   def DeclareLocks(self, level):
4225     if level == locking.LEVEL_NODE:
4226       self._LockInstancesNodes()
4227
4228   def CheckPrereq(self):
4229     """Check prerequisites.
4230
4231     This checks that the instance is in the cluster.
4232
4233     """
4234     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4235     assert self.instance is not None, \
4236       "Cannot retrieve locked instance %s" % self.op.instance_name
4237
4238   def Exec(self, feedback_fn):
4239     """Deactivate the disks
4240
4241     """
4242     instance = self.instance
4243     _SafeShutdownInstanceDisks(self, instance)
4244
4245
4246 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4247   """Shutdown block devices of an instance.
4248
4249   This function checks if an instance is running, before calling
4250   _ShutdownInstanceDisks.
4251
4252   """
4253   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4254   _ShutdownInstanceDisks(lu, instance, disks=disks)
4255
4256
4257 def _ExpandCheckDisks(instance, disks):
4258   """Return the instance disks selected by the disks list
4259
4260   @type disks: list of L{objects.Disk} or None
4261   @param disks: selected disks
4262   @rtype: list of L{objects.Disk}
4263   @return: selected instance disks to act on
4264
4265   """
4266   if disks is None:
4267     return instance.disks
4268   else:
4269     if not set(disks).issubset(instance.disks):
4270       raise errors.ProgrammerError("Can only act on disks belonging to the"
4271                                    " target instance")
4272     return disks
4273
4274
4275 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4276   """Shutdown block devices of an instance.
4277
4278   This does the shutdown on all nodes of the instance.
4279
4280   If the ignore_primary is false, errors on the primary node are
4281   ignored.
4282
4283   """
4284   all_result = True
4285   disks = _ExpandCheckDisks(instance, disks)
4286
4287   for disk in disks:
4288     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4289       lu.cfg.SetDiskID(top_disk, node)
4290       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4291       msg = result.fail_msg
4292       if msg:
4293         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4294                       disk.iv_name, node, msg)
4295         if not ignore_primary or node != instance.primary_node:
4296           all_result = False
4297   return all_result
4298
4299
4300 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4301   """Checks if a node has enough free memory.
4302
4303   This function check if a given node has the needed amount of free
4304   memory. In case the node has less memory or we cannot get the
4305   information from the node, this function raise an OpPrereqError
4306   exception.
4307
4308   @type lu: C{LogicalUnit}
4309   @param lu: a logical unit from which we get configuration data
4310   @type node: C{str}
4311   @param node: the node to check
4312   @type reason: C{str}
4313   @param reason: string to use in the error message
4314   @type requested: C{int}
4315   @param requested: the amount of memory in MiB to check for
4316   @type hypervisor_name: C{str}
4317   @param hypervisor_name: the hypervisor to ask for memory stats
4318   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4319       we cannot check the node
4320
4321   """
4322   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4323   nodeinfo[node].Raise("Can't get data from node %s" % node,
4324                        prereq=True, ecode=errors.ECODE_ENVIRON)
4325   free_mem = nodeinfo[node].payload.get('memory_free', None)
4326   if not isinstance(free_mem, int):
4327     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4328                                " was '%s'" % (node, free_mem),
4329                                errors.ECODE_ENVIRON)
4330   if requested > free_mem:
4331     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4332                                " needed %s MiB, available %s MiB" %
4333                                (node, reason, requested, free_mem),
4334                                errors.ECODE_NORES)
4335
4336
4337 def _CheckNodesFreeDisk(lu, nodenames, requested):
4338   """Checks if nodes have enough free disk space in the default VG.
4339
4340   This function check if all given nodes have the needed amount of
4341   free disk. In case any node has less disk or we cannot get the
4342   information from the node, this function raise an OpPrereqError
4343   exception.
4344
4345   @type lu: C{LogicalUnit}
4346   @param lu: a logical unit from which we get configuration data
4347   @type nodenames: C{list}
4348   @param nodenames: the list of node names to check
4349   @type requested: C{int}
4350   @param requested: the amount of disk in MiB to check for
4351   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4352       we cannot check the node
4353
4354   """
4355   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4356                                    lu.cfg.GetHypervisorType())
4357   for node in nodenames:
4358     info = nodeinfo[node]
4359     info.Raise("Cannot get current information from node %s" % node,
4360                prereq=True, ecode=errors.ECODE_ENVIRON)
4361     vg_free = info.payload.get("vg_free", None)
4362     if not isinstance(vg_free, int):
4363       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4364                                  " result was '%s'" % (node, vg_free),
4365                                  errors.ECODE_ENVIRON)
4366     if requested > vg_free:
4367       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4368                                  " required %d MiB, available %d MiB" %
4369                                  (node, requested, vg_free),
4370                                  errors.ECODE_NORES)
4371
4372
4373 class LUStartupInstance(LogicalUnit):
4374   """Starts an instance.
4375
4376   """
4377   HPATH = "instance-start"
4378   HTYPE = constants.HTYPE_INSTANCE
4379   _OP_REQP = [
4380     ("instance_name", _TNEString),
4381     ("force", _TBool),
4382     ("beparams", _TDict),
4383     ("hvparams", _TDict),
4384     ]
4385   _OP_DEFS = [
4386     ("beparams", _EmptyDict),
4387     ("hvparams", _EmptyDict),
4388     ]
4389   REQ_BGL = False
4390
4391   def CheckArguments(self):
4392     # extra beparams
4393     if self.op.beparams:
4394       # fill the beparams dict
4395       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4396
4397   def ExpandNames(self):
4398     self._ExpandAndLockInstance()
4399
4400   def BuildHooksEnv(self):
4401     """Build hooks env.
4402
4403     This runs on master, primary and secondary nodes of the instance.
4404
4405     """
4406     env = {
4407       "FORCE": self.op.force,
4408       }
4409     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4410     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4411     return env, nl, nl
4412
4413   def CheckPrereq(self):
4414     """Check prerequisites.
4415
4416     This checks that the instance is in the cluster.
4417
4418     """
4419     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4420     assert self.instance is not None, \
4421       "Cannot retrieve locked instance %s" % self.op.instance_name
4422
4423     # extra hvparams
4424     if self.op.hvparams:
4425       # check hypervisor parameter syntax (locally)
4426       cluster = self.cfg.GetClusterInfo()
4427       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4428       filled_hvp = cluster.FillHV(instance)
4429       filled_hvp.update(self.op.hvparams)
4430       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4431       hv_type.CheckParameterSyntax(filled_hvp)
4432       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4433
4434     _CheckNodeOnline(self, instance.primary_node)
4435
4436     bep = self.cfg.GetClusterInfo().FillBE(instance)
4437     # check bridges existence
4438     _CheckInstanceBridgesExist(self, instance)
4439
4440     remote_info = self.rpc.call_instance_info(instance.primary_node,
4441                                               instance.name,
4442                                               instance.hypervisor)
4443     remote_info.Raise("Error checking node %s" % instance.primary_node,
4444                       prereq=True, ecode=errors.ECODE_ENVIRON)
4445     if not remote_info.payload: # not running already
4446       _CheckNodeFreeMemory(self, instance.primary_node,
4447                            "starting instance %s" % instance.name,
4448                            bep[constants.BE_MEMORY], instance.hypervisor)
4449
4450   def Exec(self, feedback_fn):
4451     """Start the instance.
4452
4453     """
4454     instance = self.instance
4455     force = self.op.force
4456
4457     self.cfg.MarkInstanceUp(instance.name)
4458
4459     node_current = instance.primary_node
4460
4461     _StartInstanceDisks(self, instance, force)
4462
4463     result = self.rpc.call_instance_start(node_current, instance,
4464                                           self.op.hvparams, self.op.beparams)
4465     msg = result.fail_msg
4466     if msg:
4467       _ShutdownInstanceDisks(self, instance)
4468       raise errors.OpExecError("Could not start instance: %s" % msg)
4469
4470
4471 class LURebootInstance(LogicalUnit):
4472   """Reboot an instance.
4473
4474   """
4475   HPATH = "instance-reboot"
4476   HTYPE = constants.HTYPE_INSTANCE
4477   _OP_REQP = [
4478     ("instance_name", _TNEString),
4479     ("ignore_secondaries", _TBool),
4480     ("reboot_type", _TElemOf(constants.REBOOT_TYPES)),
4481     ]
4482   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4483   REQ_BGL = False
4484
4485   def ExpandNames(self):
4486     self._ExpandAndLockInstance()
4487
4488   def BuildHooksEnv(self):
4489     """Build hooks env.
4490
4491     This runs on master, primary and secondary nodes of the instance.
4492
4493     """
4494     env = {
4495       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4496       "REBOOT_TYPE": self.op.reboot_type,
4497       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4498       }
4499     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4500     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4501     return env, nl, nl
4502
4503   def CheckPrereq(self):
4504     """Check prerequisites.
4505
4506     This checks that the instance is in the cluster.
4507
4508     """
4509     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4510     assert self.instance is not None, \
4511       "Cannot retrieve locked instance %s" % self.op.instance_name
4512
4513     _CheckNodeOnline(self, instance.primary_node)
4514
4515     # check bridges existence
4516     _CheckInstanceBridgesExist(self, instance)
4517
4518   def Exec(self, feedback_fn):
4519     """Reboot the instance.
4520
4521     """
4522     instance = self.instance
4523     ignore_secondaries = self.op.ignore_secondaries
4524     reboot_type = self.op.reboot_type
4525
4526     node_current = instance.primary_node
4527
4528     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4529                        constants.INSTANCE_REBOOT_HARD]:
4530       for disk in instance.disks:
4531         self.cfg.SetDiskID(disk, node_current)
4532       result = self.rpc.call_instance_reboot(node_current, instance,
4533                                              reboot_type,
4534                                              self.op.shutdown_timeout)
4535       result.Raise("Could not reboot instance")
4536     else:
4537       result = self.rpc.call_instance_shutdown(node_current, instance,
4538                                                self.op.shutdown_timeout)
4539       result.Raise("Could not shutdown instance for full reboot")
4540       _ShutdownInstanceDisks(self, instance)
4541       _StartInstanceDisks(self, instance, ignore_secondaries)
4542       result = self.rpc.call_instance_start(node_current, instance, None, None)
4543       msg = result.fail_msg
4544       if msg:
4545         _ShutdownInstanceDisks(self, instance)
4546         raise errors.OpExecError("Could not start instance for"
4547                                  " full reboot: %s" % msg)
4548
4549     self.cfg.MarkInstanceUp(instance.name)
4550
4551
4552 class LUShutdownInstance(LogicalUnit):
4553   """Shutdown an instance.
4554
4555   """
4556   HPATH = "instance-stop"
4557   HTYPE = constants.HTYPE_INSTANCE
4558   _OP_REQP = [("instance_name", _TNEString)]
4559   _OP_DEFS = [("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4560   REQ_BGL = False
4561
4562   def ExpandNames(self):
4563     self._ExpandAndLockInstance()
4564
4565   def BuildHooksEnv(self):
4566     """Build hooks env.
4567
4568     This runs on master, primary and secondary nodes of the instance.
4569
4570     """
4571     env = _BuildInstanceHookEnvByObject(self, self.instance)
4572     env["TIMEOUT"] = self.op.timeout
4573     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4574     return env, nl, nl
4575
4576   def CheckPrereq(self):
4577     """Check prerequisites.
4578
4579     This checks that the instance is in the cluster.
4580
4581     """
4582     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4583     assert self.instance is not None, \
4584       "Cannot retrieve locked instance %s" % self.op.instance_name
4585     _CheckNodeOnline(self, self.instance.primary_node)
4586
4587   def Exec(self, feedback_fn):
4588     """Shutdown the instance.
4589
4590     """
4591     instance = self.instance
4592     node_current = instance.primary_node
4593     timeout = self.op.timeout
4594     self.cfg.MarkInstanceDown(instance.name)
4595     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4596     msg = result.fail_msg
4597     if msg:
4598       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4599
4600     _ShutdownInstanceDisks(self, instance)
4601
4602
4603 class LUReinstallInstance(LogicalUnit):
4604   """Reinstall an instance.
4605
4606   """
4607   HPATH = "instance-reinstall"
4608   HTYPE = constants.HTYPE_INSTANCE
4609   _OP_REQP = [("instance_name", _TNEString)]
4610   _OP_DEFS = [
4611     ("os_type", None),
4612     ("force_variant", False),
4613     ]
4614   REQ_BGL = False
4615
4616   def ExpandNames(self):
4617     self._ExpandAndLockInstance()
4618
4619   def BuildHooksEnv(self):
4620     """Build hooks env.
4621
4622     This runs on master, primary and secondary nodes of the instance.
4623
4624     """
4625     env = _BuildInstanceHookEnvByObject(self, self.instance)
4626     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4627     return env, nl, nl
4628
4629   def CheckPrereq(self):
4630     """Check prerequisites.
4631
4632     This checks that the instance is in the cluster and is not running.
4633
4634     """
4635     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4636     assert instance is not None, \
4637       "Cannot retrieve locked instance %s" % self.op.instance_name
4638     _CheckNodeOnline(self, instance.primary_node)
4639
4640     if instance.disk_template == constants.DT_DISKLESS:
4641       raise errors.OpPrereqError("Instance '%s' has no disks" %
4642                                  self.op.instance_name,
4643                                  errors.ECODE_INVAL)
4644     _CheckInstanceDown(self, instance, "cannot reinstall")
4645
4646     if self.op.os_type is not None:
4647       # OS verification
4648       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4649       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4650
4651     self.instance = instance
4652
4653   def Exec(self, feedback_fn):
4654     """Reinstall the instance.
4655
4656     """
4657     inst = self.instance
4658
4659     if self.op.os_type is not None:
4660       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4661       inst.os = self.op.os_type
4662       self.cfg.Update(inst, feedback_fn)
4663
4664     _StartInstanceDisks(self, inst, None)
4665     try:
4666       feedback_fn("Running the instance OS create scripts...")
4667       # FIXME: pass debug option from opcode to backend
4668       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4669                                              self.op.debug_level)
4670       result.Raise("Could not install OS for instance %s on node %s" %
4671                    (inst.name, inst.primary_node))
4672     finally:
4673       _ShutdownInstanceDisks(self, inst)
4674
4675
4676 class LURecreateInstanceDisks(LogicalUnit):
4677   """Recreate an instance's missing disks.
4678
4679   """
4680   HPATH = "instance-recreate-disks"
4681   HTYPE = constants.HTYPE_INSTANCE
4682   _OP_REQP = [
4683     ("instance_name", _TNEString),
4684     ("disks", _TListOf(_TPInt)),
4685     ]
4686   REQ_BGL = False
4687
4688   def ExpandNames(self):
4689     self._ExpandAndLockInstance()
4690
4691   def BuildHooksEnv(self):
4692     """Build hooks env.
4693
4694     This runs on master, primary and secondary nodes of the instance.
4695
4696     """
4697     env = _BuildInstanceHookEnvByObject(self, self.instance)
4698     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4699     return env, nl, nl
4700
4701   def CheckPrereq(self):
4702     """Check prerequisites.
4703
4704     This checks that the instance is in the cluster and is not running.
4705
4706     """
4707     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4708     assert instance is not None, \
4709       "Cannot retrieve locked instance %s" % self.op.instance_name
4710     _CheckNodeOnline(self, instance.primary_node)
4711
4712     if instance.disk_template == constants.DT_DISKLESS:
4713       raise errors.OpPrereqError("Instance '%s' has no disks" %
4714                                  self.op.instance_name, errors.ECODE_INVAL)
4715     _CheckInstanceDown(self, instance, "cannot recreate disks")
4716
4717     if not self.op.disks:
4718       self.op.disks = range(len(instance.disks))
4719     else:
4720       for idx in self.op.disks:
4721         if idx >= len(instance.disks):
4722           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4723                                      errors.ECODE_INVAL)
4724
4725     self.instance = instance
4726
4727   def Exec(self, feedback_fn):
4728     """Recreate the disks.
4729
4730     """
4731     to_skip = []
4732     for idx, _ in enumerate(self.instance.disks):
4733       if idx not in self.op.disks: # disk idx has not been passed in
4734         to_skip.append(idx)
4735         continue
4736
4737     _CreateDisks(self, self.instance, to_skip=to_skip)
4738
4739
4740 class LURenameInstance(LogicalUnit):
4741   """Rename an instance.
4742
4743   """
4744   HPATH = "instance-rename"
4745   HTYPE = constants.HTYPE_INSTANCE
4746   _OP_REQP = [
4747     ("instance_name", _TNEString),
4748     ("new_name", _TNEString),
4749     ]
4750   _OP_DEFS = [("ignore_ip", False)]
4751
4752   def BuildHooksEnv(self):
4753     """Build hooks env.
4754
4755     This runs on master, primary and secondary nodes of the instance.
4756
4757     """
4758     env = _BuildInstanceHookEnvByObject(self, self.instance)
4759     env["INSTANCE_NEW_NAME"] = self.op.new_name
4760     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4761     return env, nl, nl
4762
4763   def CheckPrereq(self):
4764     """Check prerequisites.
4765
4766     This checks that the instance is in the cluster and is not running.
4767
4768     """
4769     self.op.instance_name = _ExpandInstanceName(self.cfg,
4770                                                 self.op.instance_name)
4771     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4772     assert instance is not None
4773     _CheckNodeOnline(self, instance.primary_node)
4774     _CheckInstanceDown(self, instance, "cannot rename")
4775     self.instance = instance
4776
4777     # new name verification
4778     name_info = utils.GetHostInfo(self.op.new_name)
4779
4780     self.op.new_name = new_name = name_info.name
4781     instance_list = self.cfg.GetInstanceList()
4782     if new_name in instance_list:
4783       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4784                                  new_name, errors.ECODE_EXISTS)
4785
4786     if not self.op.ignore_ip:
4787       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4788         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4789                                    (name_info.ip, new_name),
4790                                    errors.ECODE_NOTUNIQUE)
4791
4792   def Exec(self, feedback_fn):
4793     """Reinstall the instance.
4794
4795     """
4796     inst = self.instance
4797     old_name = inst.name
4798
4799     if inst.disk_template == constants.DT_FILE:
4800       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4801
4802     self.cfg.RenameInstance(inst.name, self.op.new_name)
4803     # Change the instance lock. This is definitely safe while we hold the BGL
4804     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4805     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4806
4807     # re-read the instance from the configuration after rename
4808     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4809
4810     if inst.disk_template == constants.DT_FILE:
4811       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4812       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4813                                                      old_file_storage_dir,
4814                                                      new_file_storage_dir)
4815       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4816                    " (but the instance has been renamed in Ganeti)" %
4817                    (inst.primary_node, old_file_storage_dir,
4818                     new_file_storage_dir))
4819
4820     _StartInstanceDisks(self, inst, None)
4821     try:
4822       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4823                                                  old_name, self.op.debug_level)
4824       msg = result.fail_msg
4825       if msg:
4826         msg = ("Could not run OS rename script for instance %s on node %s"
4827                " (but the instance has been renamed in Ganeti): %s" %
4828                (inst.name, inst.primary_node, msg))
4829         self.proc.LogWarning(msg)
4830     finally:
4831       _ShutdownInstanceDisks(self, inst)
4832
4833
4834 class LURemoveInstance(LogicalUnit):
4835   """Remove an instance.
4836
4837   """
4838   HPATH = "instance-remove"
4839   HTYPE = constants.HTYPE_INSTANCE
4840   _OP_REQP = [
4841     ("instance_name", _TNEString),
4842     ("ignore_failures", _TBool),
4843     ]
4844   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4845   REQ_BGL = False
4846
4847   def ExpandNames(self):
4848     self._ExpandAndLockInstance()
4849     self.needed_locks[locking.LEVEL_NODE] = []
4850     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4851
4852   def DeclareLocks(self, level):
4853     if level == locking.LEVEL_NODE:
4854       self._LockInstancesNodes()
4855
4856   def BuildHooksEnv(self):
4857     """Build hooks env.
4858
4859     This runs on master, primary and secondary nodes of the instance.
4860
4861     """
4862     env = _BuildInstanceHookEnvByObject(self, self.instance)
4863     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4864     nl = [self.cfg.GetMasterNode()]
4865     nl_post = list(self.instance.all_nodes) + nl
4866     return env, nl, nl_post
4867
4868   def CheckPrereq(self):
4869     """Check prerequisites.
4870
4871     This checks that the instance is in the cluster.
4872
4873     """
4874     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4875     assert self.instance is not None, \
4876       "Cannot retrieve locked instance %s" % self.op.instance_name
4877
4878   def Exec(self, feedback_fn):
4879     """Remove the instance.
4880
4881     """
4882     instance = self.instance
4883     logging.info("Shutting down instance %s on node %s",
4884                  instance.name, instance.primary_node)
4885
4886     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4887                                              self.op.shutdown_timeout)
4888     msg = result.fail_msg
4889     if msg:
4890       if self.op.ignore_failures:
4891         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4892       else:
4893         raise errors.OpExecError("Could not shutdown instance %s on"
4894                                  " node %s: %s" %
4895                                  (instance.name, instance.primary_node, msg))
4896
4897     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4898
4899
4900 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4901   """Utility function to remove an instance.
4902
4903   """
4904   logging.info("Removing block devices for instance %s", instance.name)
4905
4906   if not _RemoveDisks(lu, instance):
4907     if not ignore_failures:
4908       raise errors.OpExecError("Can't remove instance's disks")
4909     feedback_fn("Warning: can't remove instance's disks")
4910
4911   logging.info("Removing instance %s out of cluster config", instance.name)
4912
4913   lu.cfg.RemoveInstance(instance.name)
4914
4915   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4916     "Instance lock removal conflict"
4917
4918   # Remove lock for the instance
4919   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4920
4921
4922 class LUQueryInstances(NoHooksLU):
4923   """Logical unit for querying instances.
4924
4925   """
4926   # pylint: disable-msg=W0142
4927   _OP_REQP = [
4928     ("output_fields", _TListOf(_TNEString)),
4929     ("names", _TListOf(_TNEString)),
4930     ("use_locking", _TBool),
4931     ]
4932   REQ_BGL = False
4933   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4934                     "serial_no", "ctime", "mtime", "uuid"]
4935   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4936                                     "admin_state",
4937                                     "disk_template", "ip", "mac", "bridge",
4938                                     "nic_mode", "nic_link",
4939                                     "sda_size", "sdb_size", "vcpus", "tags",
4940                                     "network_port", "beparams",
4941                                     r"(disk)\.(size)/([0-9]+)",
4942                                     r"(disk)\.(sizes)", "disk_usage",
4943                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4944                                     r"(nic)\.(bridge)/([0-9]+)",
4945                                     r"(nic)\.(macs|ips|modes|links|bridges)",
4946                                     r"(disk|nic)\.(count)",
4947                                     "hvparams",
4948                                     ] + _SIMPLE_FIELDS +
4949                                   ["hv/%s" % name
4950                                    for name in constants.HVS_PARAMETERS
4951                                    if name not in constants.HVC_GLOBALS] +
4952                                   ["be/%s" % name
4953                                    for name in constants.BES_PARAMETERS])
4954   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4955
4956
4957   def CheckArguments(self):
4958     _CheckOutputFields(static=self._FIELDS_STATIC,
4959                        dynamic=self._FIELDS_DYNAMIC,
4960                        selected=self.op.output_fields)
4961
4962   def ExpandNames(self):
4963     self.needed_locks = {}
4964     self.share_locks[locking.LEVEL_INSTANCE] = 1
4965     self.share_locks[locking.LEVEL_NODE] = 1
4966
4967     if self.op.names:
4968       self.wanted = _GetWantedInstances(self, self.op.names)
4969     else:
4970       self.wanted = locking.ALL_SET
4971
4972     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4973     self.do_locking = self.do_node_query and self.op.use_locking
4974     if self.do_locking:
4975       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4976       self.needed_locks[locking.LEVEL_NODE] = []
4977       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4978
4979   def DeclareLocks(self, level):
4980     if level == locking.LEVEL_NODE and self.do_locking:
4981       self._LockInstancesNodes()
4982
4983   def Exec(self, feedback_fn):
4984     """Computes the list of nodes and their attributes.
4985
4986     """
4987     # pylint: disable-msg=R0912
4988     # way too many branches here
4989     all_info = self.cfg.GetAllInstancesInfo()
4990     if self.wanted == locking.ALL_SET:
4991       # caller didn't specify instance names, so ordering is not important
4992       if self.do_locking:
4993         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4994       else:
4995         instance_names = all_info.keys()
4996       instance_names = utils.NiceSort(instance_names)
4997     else:
4998       # caller did specify names, so we must keep the ordering
4999       if self.do_locking:
5000         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5001       else:
5002         tgt_set = all_info.keys()
5003       missing = set(self.wanted).difference(tgt_set)
5004       if missing:
5005         raise errors.OpExecError("Some instances were removed before"
5006                                  " retrieving their data: %s" % missing)
5007       instance_names = self.wanted
5008
5009     instance_list = [all_info[iname] for iname in instance_names]
5010
5011     # begin data gathering
5012
5013     nodes = frozenset([inst.primary_node for inst in instance_list])
5014     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5015
5016     bad_nodes = []
5017     off_nodes = []
5018     if self.do_node_query:
5019       live_data = {}
5020       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5021       for name in nodes:
5022         result = node_data[name]
5023         if result.offline:
5024           # offline nodes will be in both lists
5025           off_nodes.append(name)
5026         if result.fail_msg:
5027           bad_nodes.append(name)
5028         else:
5029           if result.payload:
5030             live_data.update(result.payload)
5031           # else no instance is alive
5032     else:
5033       live_data = dict([(name, {}) for name in instance_names])
5034
5035     # end data gathering
5036
5037     HVPREFIX = "hv/"
5038     BEPREFIX = "be/"
5039     output = []
5040     cluster = self.cfg.GetClusterInfo()
5041     for instance in instance_list:
5042       iout = []
5043       i_hv = cluster.FillHV(instance, skip_globals=True)
5044       i_be = cluster.FillBE(instance)
5045       i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5046       for field in self.op.output_fields:
5047         st_match = self._FIELDS_STATIC.Matches(field)
5048         if field in self._SIMPLE_FIELDS:
5049           val = getattr(instance, field)
5050         elif field == "pnode":
5051           val = instance.primary_node
5052         elif field == "snodes":
5053           val = list(instance.secondary_nodes)
5054         elif field == "admin_state":
5055           val = instance.admin_up
5056         elif field == "oper_state":
5057           if instance.primary_node in bad_nodes:
5058             val = None
5059           else:
5060             val = bool(live_data.get(instance.name))
5061         elif field == "status":
5062           if instance.primary_node in off_nodes:
5063             val = "ERROR_nodeoffline"
5064           elif instance.primary_node in bad_nodes:
5065             val = "ERROR_nodedown"
5066           else:
5067             running = bool(live_data.get(instance.name))
5068             if running:
5069               if instance.admin_up:
5070                 val = "running"
5071               else:
5072                 val = "ERROR_up"
5073             else:
5074               if instance.admin_up:
5075                 val = "ERROR_down"
5076               else:
5077                 val = "ADMIN_down"
5078         elif field == "oper_ram":
5079           if instance.primary_node in bad_nodes:
5080             val = None
5081           elif instance.name in live_data:
5082             val = live_data[instance.name].get("memory", "?")
5083           else:
5084             val = "-"
5085         elif field == "vcpus":
5086           val = i_be[constants.BE_VCPUS]
5087         elif field == "disk_template":
5088           val = instance.disk_template
5089         elif field == "ip":
5090           if instance.nics:
5091             val = instance.nics[0].ip
5092           else:
5093             val = None
5094         elif field == "nic_mode":
5095           if instance.nics:
5096             val = i_nicp[0][constants.NIC_MODE]
5097           else:
5098             val = None
5099         elif field == "nic_link":
5100           if instance.nics:
5101             val = i_nicp[0][constants.NIC_LINK]
5102           else:
5103             val = None
5104         elif field == "bridge":
5105           if (instance.nics and
5106               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5107             val = i_nicp[0][constants.NIC_LINK]
5108           else:
5109             val = None
5110         elif field == "mac":
5111           if instance.nics:
5112             val = instance.nics[0].mac
5113           else:
5114             val = None
5115         elif field == "sda_size" or field == "sdb_size":
5116           idx = ord(field[2]) - ord('a')
5117           try:
5118             val = instance.FindDisk(idx).size
5119           except errors.OpPrereqError:
5120             val = None
5121         elif field == "disk_usage": # total disk usage per node
5122           disk_sizes = [{'size': disk.size} for disk in instance.disks]
5123           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5124         elif field == "tags":
5125           val = list(instance.GetTags())
5126         elif field == "hvparams":
5127           val = i_hv
5128         elif (field.startswith(HVPREFIX) and
5129               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5130               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5131           val = i_hv.get(field[len(HVPREFIX):], None)
5132         elif field == "beparams":
5133           val = i_be
5134         elif (field.startswith(BEPREFIX) and
5135               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5136           val = i_be.get(field[len(BEPREFIX):], None)
5137         elif st_match and st_match.groups():
5138           # matches a variable list
5139           st_groups = st_match.groups()
5140           if st_groups and st_groups[0] == "disk":
5141             if st_groups[1] == "count":
5142               val = len(instance.disks)
5143             elif st_groups[1] == "sizes":
5144               val = [disk.size for disk in instance.disks]
5145             elif st_groups[1] == "size":
5146               try:
5147                 val = instance.FindDisk(st_groups[2]).size
5148               except errors.OpPrereqError:
5149                 val = None
5150             else:
5151               assert False, "Unhandled disk parameter"
5152           elif st_groups[0] == "nic":
5153             if st_groups[1] == "count":
5154               val = len(instance.nics)
5155             elif st_groups[1] == "macs":
5156               val = [nic.mac for nic in instance.nics]
5157             elif st_groups[1] == "ips":
5158               val = [nic.ip for nic in instance.nics]
5159             elif st_groups[1] == "modes":
5160               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5161             elif st_groups[1] == "links":
5162               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5163             elif st_groups[1] == "bridges":
5164               val = []
5165               for nicp in i_nicp:
5166                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5167                   val.append(nicp[constants.NIC_LINK])
5168                 else:
5169                   val.append(None)
5170             else:
5171               # index-based item
5172               nic_idx = int(st_groups[2])
5173               if nic_idx >= len(instance.nics):
5174                 val = None
5175               else:
5176                 if st_groups[1] == "mac":
5177                   val = instance.nics[nic_idx].mac
5178                 elif st_groups[1] == "ip":
5179                   val = instance.nics[nic_idx].ip
5180                 elif st_groups[1] == "mode":
5181                   val = i_nicp[nic_idx][constants.NIC_MODE]
5182                 elif st_groups[1] == "link":
5183                   val = i_nicp[nic_idx][constants.NIC_LINK]
5184                 elif st_groups[1] == "bridge":
5185                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5186                   if nic_mode == constants.NIC_MODE_BRIDGED:
5187                     val = i_nicp[nic_idx][constants.NIC_LINK]
5188                   else:
5189                     val = None
5190                 else:
5191                   assert False, "Unhandled NIC parameter"
5192           else:
5193             assert False, ("Declared but unhandled variable parameter '%s'" %
5194                            field)
5195         else:
5196           assert False, "Declared but unhandled parameter '%s'" % field
5197         iout.append(val)
5198       output.append(iout)
5199
5200     return output
5201
5202
5203 class LUFailoverInstance(LogicalUnit):
5204   """Failover an instance.
5205
5206   """
5207   HPATH = "instance-failover"
5208   HTYPE = constants.HTYPE_INSTANCE
5209   _OP_REQP = [
5210     ("instance_name", _TNEString),
5211     ("ignore_consistency", _TBool),
5212     ]
5213   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5214   REQ_BGL = False
5215
5216   def ExpandNames(self):
5217     self._ExpandAndLockInstance()
5218     self.needed_locks[locking.LEVEL_NODE] = []
5219     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5220
5221   def DeclareLocks(self, level):
5222     if level == locking.LEVEL_NODE:
5223       self._LockInstancesNodes()
5224
5225   def BuildHooksEnv(self):
5226     """Build hooks env.
5227
5228     This runs on master, primary and secondary nodes of the instance.
5229
5230     """
5231     instance = self.instance
5232     source_node = instance.primary_node
5233     target_node = instance.secondary_nodes[0]
5234     env = {
5235       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5236       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5237       "OLD_PRIMARY": source_node,
5238       "OLD_SECONDARY": target_node,
5239       "NEW_PRIMARY": target_node,
5240       "NEW_SECONDARY": source_node,
5241       }
5242     env.update(_BuildInstanceHookEnvByObject(self, instance))
5243     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5244     nl_post = list(nl)
5245     nl_post.append(source_node)
5246     return env, nl, nl_post
5247
5248   def CheckPrereq(self):
5249     """Check prerequisites.
5250
5251     This checks that the instance is in the cluster.
5252
5253     """
5254     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5255     assert self.instance is not None, \
5256       "Cannot retrieve locked instance %s" % self.op.instance_name
5257
5258     bep = self.cfg.GetClusterInfo().FillBE(instance)
5259     if instance.disk_template not in constants.DTS_NET_MIRROR:
5260       raise errors.OpPrereqError("Instance's disk layout is not"
5261                                  " network mirrored, cannot failover.",
5262                                  errors.ECODE_STATE)
5263
5264     secondary_nodes = instance.secondary_nodes
5265     if not secondary_nodes:
5266       raise errors.ProgrammerError("no secondary node but using "
5267                                    "a mirrored disk template")
5268
5269     target_node = secondary_nodes[0]
5270     _CheckNodeOnline(self, target_node)
5271     _CheckNodeNotDrained(self, target_node)
5272     if instance.admin_up:
5273       # check memory requirements on the secondary node
5274       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5275                            instance.name, bep[constants.BE_MEMORY],
5276                            instance.hypervisor)
5277     else:
5278       self.LogInfo("Not checking memory on the secondary node as"
5279                    " instance will not be started")
5280
5281     # check bridge existance
5282     _CheckInstanceBridgesExist(self, instance, node=target_node)
5283
5284   def Exec(self, feedback_fn):
5285     """Failover an instance.
5286
5287     The failover is done by shutting it down on its present node and
5288     starting it on the secondary.
5289
5290     """
5291     instance = self.instance
5292
5293     source_node = instance.primary_node
5294     target_node = instance.secondary_nodes[0]
5295
5296     if instance.admin_up:
5297       feedback_fn("* checking disk consistency between source and target")
5298       for dev in instance.disks:
5299         # for drbd, these are drbd over lvm
5300         if not _CheckDiskConsistency(self, dev, target_node, False):
5301           if not self.op.ignore_consistency:
5302             raise errors.OpExecError("Disk %s is degraded on target node,"
5303                                      " aborting failover." % dev.iv_name)
5304     else:
5305       feedback_fn("* not checking disk consistency as instance is not running")
5306
5307     feedback_fn("* shutting down instance on source node")
5308     logging.info("Shutting down instance %s on node %s",
5309                  instance.name, source_node)
5310
5311     result = self.rpc.call_instance_shutdown(source_node, instance,
5312                                              self.op.shutdown_timeout)
5313     msg = result.fail_msg
5314     if msg:
5315       if self.op.ignore_consistency:
5316         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5317                              " Proceeding anyway. Please make sure node"
5318                              " %s is down. Error details: %s",
5319                              instance.name, source_node, source_node, msg)
5320       else:
5321         raise errors.OpExecError("Could not shutdown instance %s on"
5322                                  " node %s: %s" %
5323                                  (instance.name, source_node, msg))
5324
5325     feedback_fn("* deactivating the instance's disks on source node")
5326     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5327       raise errors.OpExecError("Can't shut down the instance's disks.")
5328
5329     instance.primary_node = target_node
5330     # distribute new instance config to the other nodes
5331     self.cfg.Update(instance, feedback_fn)
5332
5333     # Only start the instance if it's marked as up
5334     if instance.admin_up:
5335       feedback_fn("* activating the instance's disks on target node")
5336       logging.info("Starting instance %s on node %s",
5337                    instance.name, target_node)
5338
5339       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5340                                            ignore_secondaries=True)
5341       if not disks_ok:
5342         _ShutdownInstanceDisks(self, instance)
5343         raise errors.OpExecError("Can't activate the instance's disks")
5344
5345       feedback_fn("* starting the instance on the target node")
5346       result = self.rpc.call_instance_start(target_node, instance, None, None)
5347       msg = result.fail_msg
5348       if msg:
5349         _ShutdownInstanceDisks(self, instance)
5350         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5351                                  (instance.name, target_node, msg))
5352
5353
5354 class LUMigrateInstance(LogicalUnit):
5355   """Migrate an instance.
5356
5357   This is migration without shutting down, compared to the failover,
5358   which is done with shutdown.
5359
5360   """
5361   HPATH = "instance-migrate"
5362   HTYPE = constants.HTYPE_INSTANCE
5363   _OP_REQP = [
5364     ("instance_name", _TNEString),
5365     ("live", _TBool),
5366     ("cleanup", _TBool),
5367     ]
5368
5369   REQ_BGL = False
5370
5371   def ExpandNames(self):
5372     self._ExpandAndLockInstance()
5373
5374     self.needed_locks[locking.LEVEL_NODE] = []
5375     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5376
5377     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5378                                        self.op.live, self.op.cleanup)
5379     self.tasklets = [self._migrater]
5380
5381   def DeclareLocks(self, level):
5382     if level == locking.LEVEL_NODE:
5383       self._LockInstancesNodes()
5384
5385   def BuildHooksEnv(self):
5386     """Build hooks env.
5387
5388     This runs on master, primary and secondary nodes of the instance.
5389
5390     """
5391     instance = self._migrater.instance
5392     source_node = instance.primary_node
5393     target_node = instance.secondary_nodes[0]
5394     env = _BuildInstanceHookEnvByObject(self, instance)
5395     env["MIGRATE_LIVE"] = self.op.live
5396     env["MIGRATE_CLEANUP"] = self.op.cleanup
5397     env.update({
5398         "OLD_PRIMARY": source_node,
5399         "OLD_SECONDARY": target_node,
5400         "NEW_PRIMARY": target_node,
5401         "NEW_SECONDARY": source_node,
5402         })
5403     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5404     nl_post = list(nl)
5405     nl_post.append(source_node)
5406     return env, nl, nl_post
5407
5408
5409 class LUMoveInstance(LogicalUnit):
5410   """Move an instance by data-copying.
5411
5412   """
5413   HPATH = "instance-move"
5414   HTYPE = constants.HTYPE_INSTANCE
5415   _OP_REQP = [
5416     ("instance_name", _TNEString),
5417     ("target_node", _TNEString),
5418     ]
5419   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5420   REQ_BGL = False
5421
5422   def ExpandNames(self):
5423     self._ExpandAndLockInstance()
5424     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5425     self.op.target_node = target_node
5426     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5427     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5428
5429   def DeclareLocks(self, level):
5430     if level == locking.LEVEL_NODE:
5431       self._LockInstancesNodes(primary_only=True)
5432
5433   def BuildHooksEnv(self):
5434     """Build hooks env.
5435
5436     This runs on master, primary and secondary nodes of the instance.
5437
5438     """
5439     env = {
5440       "TARGET_NODE": self.op.target_node,
5441       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5442       }
5443     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5444     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5445                                        self.op.target_node]
5446     return env, nl, nl
5447
5448   def CheckPrereq(self):
5449     """Check prerequisites.
5450
5451     This checks that the instance is in the cluster.
5452
5453     """
5454     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5455     assert self.instance is not None, \
5456       "Cannot retrieve locked instance %s" % self.op.instance_name
5457
5458     node = self.cfg.GetNodeInfo(self.op.target_node)
5459     assert node is not None, \
5460       "Cannot retrieve locked node %s" % self.op.target_node
5461
5462     self.target_node = target_node = node.name
5463
5464     if target_node == instance.primary_node:
5465       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5466                                  (instance.name, target_node),
5467                                  errors.ECODE_STATE)
5468
5469     bep = self.cfg.GetClusterInfo().FillBE(instance)
5470
5471     for idx, dsk in enumerate(instance.disks):
5472       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5473         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5474                                    " cannot copy" % idx, errors.ECODE_STATE)
5475
5476     _CheckNodeOnline(self, target_node)
5477     _CheckNodeNotDrained(self, target_node)
5478
5479     if instance.admin_up:
5480       # check memory requirements on the secondary node
5481       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5482                            instance.name, bep[constants.BE_MEMORY],
5483                            instance.hypervisor)
5484     else:
5485       self.LogInfo("Not checking memory on the secondary node as"
5486                    " instance will not be started")
5487
5488     # check bridge existance
5489     _CheckInstanceBridgesExist(self, instance, node=target_node)
5490
5491   def Exec(self, feedback_fn):
5492     """Move an instance.
5493
5494     The move is done by shutting it down on its present node, copying
5495     the data over (slow) and starting it on the new node.
5496
5497     """
5498     instance = self.instance
5499
5500     source_node = instance.primary_node
5501     target_node = self.target_node
5502
5503     self.LogInfo("Shutting down instance %s on source node %s",
5504                  instance.name, source_node)
5505
5506     result = self.rpc.call_instance_shutdown(source_node, instance,
5507                                              self.op.shutdown_timeout)
5508     msg = result.fail_msg
5509     if msg:
5510       if self.op.ignore_consistency:
5511         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5512                              " Proceeding anyway. Please make sure node"
5513                              " %s is down. Error details: %s",
5514                              instance.name, source_node, source_node, msg)
5515       else:
5516         raise errors.OpExecError("Could not shutdown instance %s on"
5517                                  " node %s: %s" %
5518                                  (instance.name, source_node, msg))
5519
5520     # create the target disks
5521     try:
5522       _CreateDisks(self, instance, target_node=target_node)
5523     except errors.OpExecError:
5524       self.LogWarning("Device creation failed, reverting...")
5525       try:
5526         _RemoveDisks(self, instance, target_node=target_node)
5527       finally:
5528         self.cfg.ReleaseDRBDMinors(instance.name)
5529         raise
5530
5531     cluster_name = self.cfg.GetClusterInfo().cluster_name
5532
5533     errs = []
5534     # activate, get path, copy the data over
5535     for idx, disk in enumerate(instance.disks):
5536       self.LogInfo("Copying data for disk %d", idx)
5537       result = self.rpc.call_blockdev_assemble(target_node, disk,
5538                                                instance.name, True)
5539       if result.fail_msg:
5540         self.LogWarning("Can't assemble newly created disk %d: %s",
5541                         idx, result.fail_msg)
5542         errs.append(result.fail_msg)
5543         break
5544       dev_path = result.payload
5545       result = self.rpc.call_blockdev_export(source_node, disk,
5546                                              target_node, dev_path,
5547                                              cluster_name)
5548       if result.fail_msg:
5549         self.LogWarning("Can't copy data over for disk %d: %s",
5550                         idx, result.fail_msg)
5551         errs.append(result.fail_msg)
5552         break
5553
5554     if errs:
5555       self.LogWarning("Some disks failed to copy, aborting")
5556       try:
5557         _RemoveDisks(self, instance, target_node=target_node)
5558       finally:
5559         self.cfg.ReleaseDRBDMinors(instance.name)
5560         raise errors.OpExecError("Errors during disk copy: %s" %
5561                                  (",".join(errs),))
5562
5563     instance.primary_node = target_node
5564     self.cfg.Update(instance, feedback_fn)
5565
5566     self.LogInfo("Removing the disks on the original node")
5567     _RemoveDisks(self, instance, target_node=source_node)
5568
5569     # Only start the instance if it's marked as up
5570     if instance.admin_up:
5571       self.LogInfo("Starting instance %s on node %s",
5572                    instance.name, target_node)
5573
5574       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5575                                            ignore_secondaries=True)
5576       if not disks_ok:
5577         _ShutdownInstanceDisks(self, instance)
5578         raise errors.OpExecError("Can't activate the instance's disks")
5579
5580       result = self.rpc.call_instance_start(target_node, instance, None, None)
5581       msg = result.fail_msg
5582       if msg:
5583         _ShutdownInstanceDisks(self, instance)
5584         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5585                                  (instance.name, target_node, msg))
5586
5587
5588 class LUMigrateNode(LogicalUnit):
5589   """Migrate all instances from a node.
5590
5591   """
5592   HPATH = "node-migrate"
5593   HTYPE = constants.HTYPE_NODE
5594   _OP_REQP = [
5595     ("node_name", _TNEString),
5596     ("live", _TBool),
5597     ]
5598   REQ_BGL = False
5599
5600   def ExpandNames(self):
5601     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5602
5603     self.needed_locks = {
5604       locking.LEVEL_NODE: [self.op.node_name],
5605       }
5606
5607     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5608
5609     # Create tasklets for migrating instances for all instances on this node
5610     names = []
5611     tasklets = []
5612
5613     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5614       logging.debug("Migrating instance %s", inst.name)
5615       names.append(inst.name)
5616
5617       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5618
5619     self.tasklets = tasklets
5620
5621     # Declare instance locks
5622     self.needed_locks[locking.LEVEL_INSTANCE] = names
5623
5624   def DeclareLocks(self, level):
5625     if level == locking.LEVEL_NODE:
5626       self._LockInstancesNodes()
5627
5628   def BuildHooksEnv(self):
5629     """Build hooks env.
5630
5631     This runs on the master, the primary and all the secondaries.
5632
5633     """
5634     env = {
5635       "NODE_NAME": self.op.node_name,
5636       }
5637
5638     nl = [self.cfg.GetMasterNode()]
5639
5640     return (env, nl, nl)
5641
5642
5643 class TLMigrateInstance(Tasklet):
5644   def __init__(self, lu, instance_name, live, cleanup):
5645     """Initializes this class.
5646
5647     """
5648     Tasklet.__init__(self, lu)
5649
5650     # Parameters
5651     self.instance_name = instance_name
5652     self.live = live
5653     self.cleanup = cleanup
5654
5655   def CheckPrereq(self):
5656     """Check prerequisites.
5657
5658     This checks that the instance is in the cluster.
5659
5660     """
5661     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5662     instance = self.cfg.GetInstanceInfo(instance_name)
5663     assert instance is not None
5664
5665     if instance.disk_template != constants.DT_DRBD8:
5666       raise errors.OpPrereqError("Instance's disk layout is not"
5667                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5668
5669     secondary_nodes = instance.secondary_nodes
5670     if not secondary_nodes:
5671       raise errors.ConfigurationError("No secondary node but using"
5672                                       " drbd8 disk template")
5673
5674     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5675
5676     target_node = secondary_nodes[0]
5677     # check memory requirements on the secondary node
5678     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5679                          instance.name, i_be[constants.BE_MEMORY],
5680                          instance.hypervisor)
5681
5682     # check bridge existance
5683     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5684
5685     if not self.cleanup:
5686       _CheckNodeNotDrained(self.lu, target_node)
5687       result = self.rpc.call_instance_migratable(instance.primary_node,
5688                                                  instance)
5689       result.Raise("Can't migrate, please use failover",
5690                    prereq=True, ecode=errors.ECODE_STATE)
5691
5692     self.instance = instance
5693
5694   def _WaitUntilSync(self):
5695     """Poll with custom rpc for disk sync.
5696
5697     This uses our own step-based rpc call.
5698
5699     """
5700     self.feedback_fn("* wait until resync is done")
5701     all_done = False
5702     while not all_done:
5703       all_done = True
5704       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5705                                             self.nodes_ip,
5706                                             self.instance.disks)
5707       min_percent = 100
5708       for node, nres in result.items():
5709         nres.Raise("Cannot resync disks on node %s" % node)
5710         node_done, node_percent = nres.payload
5711         all_done = all_done and node_done
5712         if node_percent is not None:
5713           min_percent = min(min_percent, node_percent)
5714       if not all_done:
5715         if min_percent < 100:
5716           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5717         time.sleep(2)
5718
5719   def _EnsureSecondary(self, node):
5720     """Demote a node to secondary.
5721
5722     """
5723     self.feedback_fn("* switching node %s to secondary mode" % node)
5724
5725     for dev in self.instance.disks:
5726       self.cfg.SetDiskID(dev, node)
5727
5728     result = self.rpc.call_blockdev_close(node, self.instance.name,
5729                                           self.instance.disks)
5730     result.Raise("Cannot change disk to secondary on node %s" % node)
5731
5732   def _GoStandalone(self):
5733     """Disconnect from the network.
5734
5735     """
5736     self.feedback_fn("* changing into standalone mode")
5737     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5738                                                self.instance.disks)
5739     for node, nres in result.items():
5740       nres.Raise("Cannot disconnect disks node %s" % node)
5741
5742   def _GoReconnect(self, multimaster):
5743     """Reconnect to the network.
5744
5745     """
5746     if multimaster:
5747       msg = "dual-master"
5748     else:
5749       msg = "single-master"
5750     self.feedback_fn("* changing disks into %s mode" % msg)
5751     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5752                                            self.instance.disks,
5753                                            self.instance.name, multimaster)
5754     for node, nres in result.items():
5755       nres.Raise("Cannot change disks config on node %s" % node)
5756
5757   def _ExecCleanup(self):
5758     """Try to cleanup after a failed migration.
5759
5760     The cleanup is done by:
5761       - check that the instance is running only on one node
5762         (and update the config if needed)
5763       - change disks on its secondary node to secondary
5764       - wait until disks are fully synchronized
5765       - disconnect from the network
5766       - change disks into single-master mode
5767       - wait again until disks are fully synchronized
5768
5769     """
5770     instance = self.instance
5771     target_node = self.target_node
5772     source_node = self.source_node
5773
5774     # check running on only one node
5775     self.feedback_fn("* checking where the instance actually runs"
5776                      " (if this hangs, the hypervisor might be in"
5777                      " a bad state)")
5778     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5779     for node, result in ins_l.items():
5780       result.Raise("Can't contact node %s" % node)
5781
5782     runningon_source = instance.name in ins_l[source_node].payload
5783     runningon_target = instance.name in ins_l[target_node].payload
5784
5785     if runningon_source and runningon_target:
5786       raise errors.OpExecError("Instance seems to be running on two nodes,"
5787                                " or the hypervisor is confused. You will have"
5788                                " to ensure manually that it runs only on one"
5789                                " and restart this operation.")
5790
5791     if not (runningon_source or runningon_target):
5792       raise errors.OpExecError("Instance does not seem to be running at all."
5793                                " In this case, it's safer to repair by"
5794                                " running 'gnt-instance stop' to ensure disk"
5795                                " shutdown, and then restarting it.")
5796
5797     if runningon_target:
5798       # the migration has actually succeeded, we need to update the config
5799       self.feedback_fn("* instance running on secondary node (%s),"
5800                        " updating config" % target_node)
5801       instance.primary_node = target_node
5802       self.cfg.Update(instance, self.feedback_fn)
5803       demoted_node = source_node
5804     else:
5805       self.feedback_fn("* instance confirmed to be running on its"
5806                        " primary node (%s)" % source_node)
5807       demoted_node = target_node
5808
5809     self._EnsureSecondary(demoted_node)
5810     try:
5811       self._WaitUntilSync()
5812     except errors.OpExecError:
5813       # we ignore here errors, since if the device is standalone, it
5814       # won't be able to sync
5815       pass
5816     self._GoStandalone()
5817     self._GoReconnect(False)
5818     self._WaitUntilSync()
5819
5820     self.feedback_fn("* done")
5821
5822   def _RevertDiskStatus(self):
5823     """Try to revert the disk status after a failed migration.
5824
5825     """
5826     target_node = self.target_node
5827     try:
5828       self._EnsureSecondary(target_node)
5829       self._GoStandalone()
5830       self._GoReconnect(False)
5831       self._WaitUntilSync()
5832     except errors.OpExecError, err:
5833       self.lu.LogWarning("Migration failed and I can't reconnect the"
5834                          " drives: error '%s'\n"
5835                          "Please look and recover the instance status" %
5836                          str(err))
5837
5838   def _AbortMigration(self):
5839     """Call the hypervisor code to abort a started migration.
5840
5841     """
5842     instance = self.instance
5843     target_node = self.target_node
5844     migration_info = self.migration_info
5845
5846     abort_result = self.rpc.call_finalize_migration(target_node,
5847                                                     instance,
5848                                                     migration_info,
5849                                                     False)
5850     abort_msg = abort_result.fail_msg
5851     if abort_msg:
5852       logging.error("Aborting migration failed on target node %s: %s",
5853                     target_node, abort_msg)
5854       # Don't raise an exception here, as we stil have to try to revert the
5855       # disk status, even if this step failed.
5856
5857   def _ExecMigration(self):
5858     """Migrate an instance.
5859
5860     The migrate is done by:
5861       - change the disks into dual-master mode
5862       - wait until disks are fully synchronized again
5863       - migrate the instance
5864       - change disks on the new secondary node (the old primary) to secondary
5865       - wait until disks are fully synchronized
5866       - change disks into single-master mode
5867
5868     """
5869     instance = self.instance
5870     target_node = self.target_node
5871     source_node = self.source_node
5872
5873     self.feedback_fn("* checking disk consistency between source and target")
5874     for dev in instance.disks:
5875       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5876         raise errors.OpExecError("Disk %s is degraded or not fully"
5877                                  " synchronized on target node,"
5878                                  " aborting migrate." % dev.iv_name)
5879
5880     # First get the migration information from the remote node
5881     result = self.rpc.call_migration_info(source_node, instance)
5882     msg = result.fail_msg
5883     if msg:
5884       log_err = ("Failed fetching source migration information from %s: %s" %
5885                  (source_node, msg))
5886       logging.error(log_err)
5887       raise errors.OpExecError(log_err)
5888
5889     self.migration_info = migration_info = result.payload
5890
5891     # Then switch the disks to master/master mode
5892     self._EnsureSecondary(target_node)
5893     self._GoStandalone()
5894     self._GoReconnect(True)
5895     self._WaitUntilSync()
5896
5897     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5898     result = self.rpc.call_accept_instance(target_node,
5899                                            instance,
5900                                            migration_info,
5901                                            self.nodes_ip[target_node])
5902
5903     msg = result.fail_msg
5904     if msg:
5905       logging.error("Instance pre-migration failed, trying to revert"
5906                     " disk status: %s", msg)
5907       self.feedback_fn("Pre-migration failed, aborting")
5908       self._AbortMigration()
5909       self._RevertDiskStatus()
5910       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5911                                (instance.name, msg))
5912
5913     self.feedback_fn("* migrating instance to %s" % target_node)
5914     time.sleep(10)
5915     result = self.rpc.call_instance_migrate(source_node, instance,
5916                                             self.nodes_ip[target_node],
5917                                             self.live)
5918     msg = result.fail_msg
5919     if msg:
5920       logging.error("Instance migration failed, trying to revert"
5921                     " disk status: %s", msg)
5922       self.feedback_fn("Migration failed, aborting")
5923       self._AbortMigration()
5924       self._RevertDiskStatus()
5925       raise errors.OpExecError("Could not migrate instance %s: %s" %
5926                                (instance.name, msg))
5927     time.sleep(10)
5928
5929     instance.primary_node = target_node
5930     # distribute new instance config to the other nodes
5931     self.cfg.Update(instance, self.feedback_fn)
5932
5933     result = self.rpc.call_finalize_migration(target_node,
5934                                               instance,
5935                                               migration_info,
5936                                               True)
5937     msg = result.fail_msg
5938     if msg:
5939       logging.error("Instance migration succeeded, but finalization failed:"
5940                     " %s", msg)
5941       raise errors.OpExecError("Could not finalize instance migration: %s" %
5942                                msg)
5943
5944     self._EnsureSecondary(source_node)
5945     self._WaitUntilSync()
5946     self._GoStandalone()
5947     self._GoReconnect(False)
5948     self._WaitUntilSync()
5949
5950     self.feedback_fn("* done")
5951
5952   def Exec(self, feedback_fn):
5953     """Perform the migration.
5954
5955     """
5956     feedback_fn("Migrating instance %s" % self.instance.name)
5957
5958     self.feedback_fn = feedback_fn
5959
5960     self.source_node = self.instance.primary_node
5961     self.target_node = self.instance.secondary_nodes[0]
5962     self.all_nodes = [self.source_node, self.target_node]
5963     self.nodes_ip = {
5964       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5965       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5966       }
5967
5968     if self.cleanup:
5969       return self._ExecCleanup()
5970     else:
5971       return self._ExecMigration()
5972
5973
5974 def _CreateBlockDev(lu, node, instance, device, force_create,
5975                     info, force_open):
5976   """Create a tree of block devices on a given node.
5977
5978   If this device type has to be created on secondaries, create it and
5979   all its children.
5980
5981   If not, just recurse to children keeping the same 'force' value.
5982
5983   @param lu: the lu on whose behalf we execute
5984   @param node: the node on which to create the device
5985   @type instance: L{objects.Instance}
5986   @param instance: the instance which owns the device
5987   @type device: L{objects.Disk}
5988   @param device: the device to create
5989   @type force_create: boolean
5990   @param force_create: whether to force creation of this device; this
5991       will be change to True whenever we find a device which has
5992       CreateOnSecondary() attribute
5993   @param info: the extra 'metadata' we should attach to the device
5994       (this will be represented as a LVM tag)
5995   @type force_open: boolean
5996   @param force_open: this parameter will be passes to the
5997       L{backend.BlockdevCreate} function where it specifies
5998       whether we run on primary or not, and it affects both
5999       the child assembly and the device own Open() execution
6000
6001   """
6002   if device.CreateOnSecondary():
6003     force_create = True
6004
6005   if device.children:
6006     for child in device.children:
6007       _CreateBlockDev(lu, node, instance, child, force_create,
6008                       info, force_open)
6009
6010   if not force_create:
6011     return
6012
6013   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6014
6015
6016 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6017   """Create a single block device on a given node.
6018
6019   This will not recurse over children of the device, so they must be
6020   created in advance.
6021
6022   @param lu: the lu on whose behalf we execute
6023   @param node: the node on which to create the device
6024   @type instance: L{objects.Instance}
6025   @param instance: the instance which owns the device
6026   @type device: L{objects.Disk}
6027   @param device: the device to create
6028   @param info: the extra 'metadata' we should attach to the device
6029       (this will be represented as a LVM tag)
6030   @type force_open: boolean
6031   @param force_open: this parameter will be passes to the
6032       L{backend.BlockdevCreate} function where it specifies
6033       whether we run on primary or not, and it affects both
6034       the child assembly and the device own Open() execution
6035
6036   """
6037   lu.cfg.SetDiskID(device, node)
6038   result = lu.rpc.call_blockdev_create(node, device, device.size,
6039                                        instance.name, force_open, info)
6040   result.Raise("Can't create block device %s on"
6041                " node %s for instance %s" % (device, node, instance.name))
6042   if device.physical_id is None:
6043     device.physical_id = result.payload
6044
6045
6046 def _GenerateUniqueNames(lu, exts):
6047   """Generate a suitable LV name.
6048
6049   This will generate a logical volume name for the given instance.
6050
6051   """
6052   results = []
6053   for val in exts:
6054     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6055     results.append("%s%s" % (new_id, val))
6056   return results
6057
6058
6059 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6060                          p_minor, s_minor):
6061   """Generate a drbd8 device complete with its children.
6062
6063   """
6064   port = lu.cfg.AllocatePort()
6065   vgname = lu.cfg.GetVGName()
6066   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6067   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6068                           logical_id=(vgname, names[0]))
6069   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6070                           logical_id=(vgname, names[1]))
6071   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6072                           logical_id=(primary, secondary, port,
6073                                       p_minor, s_minor,
6074                                       shared_secret),
6075                           children=[dev_data, dev_meta],
6076                           iv_name=iv_name)
6077   return drbd_dev
6078
6079
6080 def _GenerateDiskTemplate(lu, template_name,
6081                           instance_name, primary_node,
6082                           secondary_nodes, disk_info,
6083                           file_storage_dir, file_driver,
6084                           base_index):
6085   """Generate the entire disk layout for a given template type.
6086
6087   """
6088   #TODO: compute space requirements
6089
6090   vgname = lu.cfg.GetVGName()
6091   disk_count = len(disk_info)
6092   disks = []
6093   if template_name == constants.DT_DISKLESS:
6094     pass
6095   elif template_name == constants.DT_PLAIN:
6096     if len(secondary_nodes) != 0:
6097       raise errors.ProgrammerError("Wrong template configuration")
6098
6099     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6100                                       for i in range(disk_count)])
6101     for idx, disk in enumerate(disk_info):
6102       disk_index = idx + base_index
6103       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6104                               logical_id=(vgname, names[idx]),
6105                               iv_name="disk/%d" % disk_index,
6106                               mode=disk["mode"])
6107       disks.append(disk_dev)
6108   elif template_name == constants.DT_DRBD8:
6109     if len(secondary_nodes) != 1:
6110       raise errors.ProgrammerError("Wrong template configuration")
6111     remote_node = secondary_nodes[0]
6112     minors = lu.cfg.AllocateDRBDMinor(
6113       [primary_node, remote_node] * len(disk_info), instance_name)
6114
6115     names = []
6116     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6117                                                for i in range(disk_count)]):
6118       names.append(lv_prefix + "_data")
6119       names.append(lv_prefix + "_meta")
6120     for idx, disk in enumerate(disk_info):
6121       disk_index = idx + base_index
6122       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6123                                       disk["size"], names[idx*2:idx*2+2],
6124                                       "disk/%d" % disk_index,
6125                                       minors[idx*2], minors[idx*2+1])
6126       disk_dev.mode = disk["mode"]
6127       disks.append(disk_dev)
6128   elif template_name == constants.DT_FILE:
6129     if len(secondary_nodes) != 0:
6130       raise errors.ProgrammerError("Wrong template configuration")
6131
6132     _RequireFileStorage()
6133
6134     for idx, disk in enumerate(disk_info):
6135       disk_index = idx + base_index
6136       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6137                               iv_name="disk/%d" % disk_index,
6138                               logical_id=(file_driver,
6139                                           "%s/disk%d" % (file_storage_dir,
6140                                                          disk_index)),
6141                               mode=disk["mode"])
6142       disks.append(disk_dev)
6143   else:
6144     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6145   return disks
6146
6147
6148 def _GetInstanceInfoText(instance):
6149   """Compute that text that should be added to the disk's metadata.
6150
6151   """
6152   return "originstname+%s" % instance.name
6153
6154
6155 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6156   """Create all disks for an instance.
6157
6158   This abstracts away some work from AddInstance.
6159
6160   @type lu: L{LogicalUnit}
6161   @param lu: the logical unit on whose behalf we execute
6162   @type instance: L{objects.Instance}
6163   @param instance: the instance whose disks we should create
6164   @type to_skip: list
6165   @param to_skip: list of indices to skip
6166   @type target_node: string
6167   @param target_node: if passed, overrides the target node for creation
6168   @rtype: boolean
6169   @return: the success of the creation
6170
6171   """
6172   info = _GetInstanceInfoText(instance)
6173   if target_node is None:
6174     pnode = instance.primary_node
6175     all_nodes = instance.all_nodes
6176   else:
6177     pnode = target_node
6178     all_nodes = [pnode]
6179
6180   if instance.disk_template == constants.DT_FILE:
6181     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6182     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6183
6184     result.Raise("Failed to create directory '%s' on"
6185                  " node %s" % (file_storage_dir, pnode))
6186
6187   # Note: this needs to be kept in sync with adding of disks in
6188   # LUSetInstanceParams
6189   for idx, device in enumerate(instance.disks):
6190     if to_skip and idx in to_skip:
6191       continue
6192     logging.info("Creating volume %s for instance %s",
6193                  device.iv_name, instance.name)
6194     #HARDCODE
6195     for node in all_nodes:
6196       f_create = node == pnode
6197       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6198
6199
6200 def _RemoveDisks(lu, instance, target_node=None):
6201   """Remove all disks for an instance.
6202
6203   This abstracts away some work from `AddInstance()` and
6204   `RemoveInstance()`. Note that in case some of the devices couldn't
6205   be removed, the removal will continue with the other ones (compare
6206   with `_CreateDisks()`).
6207
6208   @type lu: L{LogicalUnit}
6209   @param lu: the logical unit on whose behalf we execute
6210   @type instance: L{objects.Instance}
6211   @param instance: the instance whose disks we should remove
6212   @type target_node: string
6213   @param target_node: used to override the node on which to remove the disks
6214   @rtype: boolean
6215   @return: the success of the removal
6216
6217   """
6218   logging.info("Removing block devices for instance %s", instance.name)
6219
6220   all_result = True
6221   for device in instance.disks:
6222     if target_node:
6223       edata = [(target_node, device)]
6224     else:
6225       edata = device.ComputeNodeTree(instance.primary_node)
6226     for node, disk in edata:
6227       lu.cfg.SetDiskID(disk, node)
6228       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6229       if msg:
6230         lu.LogWarning("Could not remove block device %s on node %s,"
6231                       " continuing anyway: %s", device.iv_name, node, msg)
6232         all_result = False
6233
6234   if instance.disk_template == constants.DT_FILE:
6235     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6236     if target_node:
6237       tgt = target_node
6238     else:
6239       tgt = instance.primary_node
6240     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6241     if result.fail_msg:
6242       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6243                     file_storage_dir, instance.primary_node, result.fail_msg)
6244       all_result = False
6245
6246   return all_result
6247
6248
6249 def _ComputeDiskSize(disk_template, disks):
6250   """Compute disk size requirements in the volume group
6251
6252   """
6253   # Required free disk space as a function of disk and swap space
6254   req_size_dict = {
6255     constants.DT_DISKLESS: None,
6256     constants.DT_PLAIN: sum(d["size"] for d in disks),
6257     # 128 MB are added for drbd metadata for each disk
6258     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6259     constants.DT_FILE: None,
6260   }
6261
6262   if disk_template not in req_size_dict:
6263     raise errors.ProgrammerError("Disk template '%s' size requirement"
6264                                  " is unknown" %  disk_template)
6265
6266   return req_size_dict[disk_template]
6267
6268
6269 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6270   """Hypervisor parameter validation.
6271
6272   This function abstract the hypervisor parameter validation to be
6273   used in both instance create and instance modify.
6274
6275   @type lu: L{LogicalUnit}
6276   @param lu: the logical unit for which we check
6277   @type nodenames: list
6278   @param nodenames: the list of nodes on which we should check
6279   @type hvname: string
6280   @param hvname: the name of the hypervisor we should use
6281   @type hvparams: dict
6282   @param hvparams: the parameters which we need to check
6283   @raise errors.OpPrereqError: if the parameters are not valid
6284
6285   """
6286   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6287                                                   hvname,
6288                                                   hvparams)
6289   for node in nodenames:
6290     info = hvinfo[node]
6291     if info.offline:
6292       continue
6293     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6294
6295
6296 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6297   """OS parameters validation.
6298
6299   @type lu: L{LogicalUnit}
6300   @param lu: the logical unit for which we check
6301   @type required: boolean
6302   @param required: whether the validation should fail if the OS is not
6303       found
6304   @type nodenames: list
6305   @param nodenames: the list of nodes on which we should check
6306   @type osname: string
6307   @param osname: the name of the hypervisor we should use
6308   @type osparams: dict
6309   @param osparams: the parameters which we need to check
6310   @raise errors.OpPrereqError: if the parameters are not valid
6311
6312   """
6313   result = lu.rpc.call_os_validate(required, nodenames, osname,
6314                                    [constants.OS_VALIDATE_PARAMETERS],
6315                                    osparams)
6316   for node, nres in result.items():
6317     # we don't check for offline cases since this should be run only
6318     # against the master node and/or an instance's nodes
6319     nres.Raise("OS Parameters validation failed on node %s" % node)
6320     if not nres.payload:
6321       lu.LogInfo("OS %s not found on node %s, validation skipped",
6322                  osname, node)
6323
6324
6325 class LUCreateInstance(LogicalUnit):
6326   """Create an instance.
6327
6328   """
6329   HPATH = "instance-add"
6330   HTYPE = constants.HTYPE_INSTANCE
6331   _OP_REQP = [
6332     ("instance_name", _TNEString),
6333     ("mode", _TElemOf(constants.INSTANCE_CREATE_MODES)),
6334     ("start", _TBool),
6335     ("wait_for_sync", _TBool),
6336     ("ip_check", _TBool),
6337     ("disks", _TListOf(_TDict)),
6338     ("nics", _TListOf(_TDict)),
6339     ("hvparams", _TDict),
6340     ("beparams", _TDict),
6341     ("osparams", _TDict),
6342     ]
6343   _OP_DEFS = [
6344     ("name_check", True),
6345     ("no_install", False),
6346     ("os_type", None),
6347     ("force_variant", False),
6348     ("source_handshake", None),
6349     ("source_x509_ca", None),
6350     ("source_instance_name", None),
6351     ("src_node", None),
6352     ("src_path", None),
6353     ("pnode", None),
6354     ("snode", None),
6355     ("iallocator", None),
6356     ("hypervisor", None),
6357     ("disk_template", None),
6358     ("identify_defaults", None),
6359     ]
6360   REQ_BGL = False
6361
6362   def CheckArguments(self):
6363     """Check arguments.
6364
6365     """
6366     # do not require name_check to ease forward/backward compatibility
6367     # for tools
6368     if self.op.no_install and self.op.start:
6369       self.LogInfo("No-installation mode selected, disabling startup")
6370       self.op.start = False
6371     # validate/normalize the instance name
6372     self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6373     if self.op.ip_check and not self.op.name_check:
6374       # TODO: make the ip check more flexible and not depend on the name check
6375       raise errors.OpPrereqError("Cannot do ip checks without a name check",
6376                                  errors.ECODE_INVAL)
6377
6378     # check nics' parameter names
6379     for nic in self.op.nics:
6380       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6381
6382     # check disks. parameter names and consistent adopt/no-adopt strategy
6383     has_adopt = has_no_adopt = False
6384     for disk in self.op.disks:
6385       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6386       if "adopt" in disk:
6387         has_adopt = True
6388       else:
6389         has_no_adopt = True
6390     if has_adopt and has_no_adopt:
6391       raise errors.OpPrereqError("Either all disks are adopted or none is",
6392                                  errors.ECODE_INVAL)
6393     if has_adopt:
6394       if self.op.disk_template != constants.DT_PLAIN:
6395         raise errors.OpPrereqError("Disk adoption is only supported for the"
6396                                    " 'plain' disk template",
6397                                    errors.ECODE_INVAL)
6398       if self.op.iallocator is not None:
6399         raise errors.OpPrereqError("Disk adoption not allowed with an"
6400                                    " iallocator script", errors.ECODE_INVAL)
6401       if self.op.mode == constants.INSTANCE_IMPORT:
6402         raise errors.OpPrereqError("Disk adoption not allowed for"
6403                                    " instance import", errors.ECODE_INVAL)
6404
6405     self.adopt_disks = has_adopt
6406
6407     # instance name verification
6408     if self.op.name_check:
6409       self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6410       self.op.instance_name = self.hostname1.name
6411       # used in CheckPrereq for ip ping check
6412       self.check_ip = self.hostname1.ip
6413     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6414       raise errors.OpPrereqError("Remote imports require names to be checked" %
6415                                  errors.ECODE_INVAL)
6416     else:
6417       self.check_ip = None
6418
6419     # file storage checks
6420     if (self.op.file_driver and
6421         not self.op.file_driver in constants.FILE_DRIVER):
6422       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6423                                  self.op.file_driver, errors.ECODE_INVAL)
6424
6425     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6426       raise errors.OpPrereqError("File storage directory path not absolute",
6427                                  errors.ECODE_INVAL)
6428
6429     ### Node/iallocator related checks
6430     if [self.op.iallocator, self.op.pnode].count(None) != 1:
6431       raise errors.OpPrereqError("One and only one of iallocator and primary"
6432                                  " node must be given",
6433                                  errors.ECODE_INVAL)
6434
6435     self._cds = _GetClusterDomainSecret()
6436
6437     if self.op.mode == constants.INSTANCE_IMPORT:
6438       # On import force_variant must be True, because if we forced it at
6439       # initial install, our only chance when importing it back is that it
6440       # works again!
6441       self.op.force_variant = True
6442
6443       if self.op.no_install:
6444         self.LogInfo("No-installation mode has no effect during import")
6445
6446     elif self.op.mode == constants.INSTANCE_CREATE:
6447       if self.op.os_type is None:
6448         raise errors.OpPrereqError("No guest OS specified",
6449                                    errors.ECODE_INVAL)
6450       if self.op.disk_template is None:
6451         raise errors.OpPrereqError("No disk template specified",
6452                                    errors.ECODE_INVAL)
6453
6454     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6455       # Check handshake to ensure both clusters have the same domain secret
6456       src_handshake = self.op.source_handshake
6457       if not src_handshake:
6458         raise errors.OpPrereqError("Missing source handshake",
6459                                    errors.ECODE_INVAL)
6460
6461       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6462                                                            src_handshake)
6463       if errmsg:
6464         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6465                                    errors.ECODE_INVAL)
6466
6467       # Load and check source CA
6468       self.source_x509_ca_pem = self.op.source_x509_ca
6469       if not self.source_x509_ca_pem:
6470         raise errors.OpPrereqError("Missing source X509 CA",
6471                                    errors.ECODE_INVAL)
6472
6473       try:
6474         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6475                                                     self._cds)
6476       except OpenSSL.crypto.Error, err:
6477         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6478                                    (err, ), errors.ECODE_INVAL)
6479
6480       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6481       if errcode is not None:
6482         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6483                                    errors.ECODE_INVAL)
6484
6485       self.source_x509_ca = cert
6486
6487       src_instance_name = self.op.source_instance_name
6488       if not src_instance_name:
6489         raise errors.OpPrereqError("Missing source instance name",
6490                                    errors.ECODE_INVAL)
6491
6492       self.source_instance_name = \
6493         utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6494
6495     else:
6496       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6497                                  self.op.mode, errors.ECODE_INVAL)
6498
6499   def ExpandNames(self):
6500     """ExpandNames for CreateInstance.
6501
6502     Figure out the right locks for instance creation.
6503
6504     """
6505     self.needed_locks = {}
6506
6507     instance_name = self.op.instance_name
6508     # this is just a preventive check, but someone might still add this
6509     # instance in the meantime, and creation will fail at lock-add time
6510     if instance_name in self.cfg.GetInstanceList():
6511       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6512                                  instance_name, errors.ECODE_EXISTS)
6513
6514     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6515
6516     if self.op.iallocator:
6517       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6518     else:
6519       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6520       nodelist = [self.op.pnode]
6521       if self.op.snode is not None:
6522         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6523         nodelist.append(self.op.snode)
6524       self.needed_locks[locking.LEVEL_NODE] = nodelist
6525
6526     # in case of import lock the source node too
6527     if self.op.mode == constants.INSTANCE_IMPORT:
6528       src_node = self.op.src_node
6529       src_path = self.op.src_path
6530
6531       if src_path is None:
6532         self.op.src_path = src_path = self.op.instance_name
6533
6534       if src_node is None:
6535         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6536         self.op.src_node = None
6537         if os.path.isabs(src_path):
6538           raise errors.OpPrereqError("Importing an instance from an absolute"
6539                                      " path requires a source node option.",
6540                                      errors.ECODE_INVAL)
6541       else:
6542         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6543         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6544           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6545         if not os.path.isabs(src_path):
6546           self.op.src_path = src_path = \
6547             utils.PathJoin(constants.EXPORT_DIR, src_path)
6548
6549   def _RunAllocator(self):
6550     """Run the allocator based on input opcode.
6551
6552     """
6553     nics = [n.ToDict() for n in self.nics]
6554     ial = IAllocator(self.cfg, self.rpc,
6555                      mode=constants.IALLOCATOR_MODE_ALLOC,
6556                      name=self.op.instance_name,
6557                      disk_template=self.op.disk_template,
6558                      tags=[],
6559                      os=self.op.os_type,
6560                      vcpus=self.be_full[constants.BE_VCPUS],
6561                      mem_size=self.be_full[constants.BE_MEMORY],
6562                      disks=self.disks,
6563                      nics=nics,
6564                      hypervisor=self.op.hypervisor,
6565                      )
6566
6567     ial.Run(self.op.iallocator)
6568
6569     if not ial.success:
6570       raise errors.OpPrereqError("Can't compute nodes using"
6571                                  " iallocator '%s': %s" %
6572                                  (self.op.iallocator, ial.info),
6573                                  errors.ECODE_NORES)
6574     if len(ial.result) != ial.required_nodes:
6575       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6576                                  " of nodes (%s), required %s" %
6577                                  (self.op.iallocator, len(ial.result),
6578                                   ial.required_nodes), errors.ECODE_FAULT)
6579     self.op.pnode = ial.result[0]
6580     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6581                  self.op.instance_name, self.op.iallocator,
6582                  utils.CommaJoin(ial.result))
6583     if ial.required_nodes == 2:
6584       self.op.snode = ial.result[1]
6585
6586   def BuildHooksEnv(self):
6587     """Build hooks env.
6588
6589     This runs on master, primary and secondary nodes of the instance.
6590
6591     """
6592     env = {
6593       "ADD_MODE": self.op.mode,
6594       }
6595     if self.op.mode == constants.INSTANCE_IMPORT:
6596       env["SRC_NODE"] = self.op.src_node
6597       env["SRC_PATH"] = self.op.src_path
6598       env["SRC_IMAGES"] = self.src_images
6599
6600     env.update(_BuildInstanceHookEnv(
6601       name=self.op.instance_name,
6602       primary_node=self.op.pnode,
6603       secondary_nodes=self.secondaries,
6604       status=self.op.start,
6605       os_type=self.op.os_type,
6606       memory=self.be_full[constants.BE_MEMORY],
6607       vcpus=self.be_full[constants.BE_VCPUS],
6608       nics=_NICListToTuple(self, self.nics),
6609       disk_template=self.op.disk_template,
6610       disks=[(d["size"], d["mode"]) for d in self.disks],
6611       bep=self.be_full,
6612       hvp=self.hv_full,
6613       hypervisor_name=self.op.hypervisor,
6614     ))
6615
6616     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6617           self.secondaries)
6618     return env, nl, nl
6619
6620   def _ReadExportInfo(self):
6621     """Reads the export information from disk.
6622
6623     It will override the opcode source node and path with the actual
6624     information, if these two were not specified before.
6625
6626     @return: the export information
6627
6628     """
6629     assert self.op.mode == constants.INSTANCE_IMPORT
6630
6631     src_node = self.op.src_node
6632     src_path = self.op.src_path
6633
6634     if src_node is None:
6635       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6636       exp_list = self.rpc.call_export_list(locked_nodes)
6637       found = False
6638       for node in exp_list:
6639         if exp_list[node].fail_msg:
6640           continue
6641         if src_path in exp_list[node].payload:
6642           found = True
6643           self.op.src_node = src_node = node
6644           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6645                                                        src_path)
6646           break
6647       if not found:
6648         raise errors.OpPrereqError("No export found for relative path %s" %
6649                                     src_path, errors.ECODE_INVAL)
6650
6651     _CheckNodeOnline(self, src_node)
6652     result = self.rpc.call_export_info(src_node, src_path)
6653     result.Raise("No export or invalid export found in dir %s" % src_path)
6654
6655     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6656     if not export_info.has_section(constants.INISECT_EXP):
6657       raise errors.ProgrammerError("Corrupted export config",
6658                                    errors.ECODE_ENVIRON)
6659
6660     ei_version = export_info.get(constants.INISECT_EXP, "version")
6661     if (int(ei_version) != constants.EXPORT_VERSION):
6662       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6663                                  (ei_version, constants.EXPORT_VERSION),
6664                                  errors.ECODE_ENVIRON)
6665     return export_info
6666
6667   def _ReadExportParams(self, einfo):
6668     """Use export parameters as defaults.
6669
6670     In case the opcode doesn't specify (as in override) some instance
6671     parameters, then try to use them from the export information, if
6672     that declares them.
6673
6674     """
6675     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6676
6677     if self.op.disk_template is None:
6678       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6679         self.op.disk_template = einfo.get(constants.INISECT_INS,
6680                                           "disk_template")
6681       else:
6682         raise errors.OpPrereqError("No disk template specified and the export"
6683                                    " is missing the disk_template information",
6684                                    errors.ECODE_INVAL)
6685
6686     if not self.op.disks:
6687       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6688         disks = []
6689         # TODO: import the disk iv_name too
6690         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6691           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6692           disks.append({"size": disk_sz})
6693         self.op.disks = disks
6694       else:
6695         raise errors.OpPrereqError("No disk info specified and the export"
6696                                    " is missing the disk information",
6697                                    errors.ECODE_INVAL)
6698
6699     if (not self.op.nics and
6700         einfo.has_option(constants.INISECT_INS, "nic_count")):
6701       nics = []
6702       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6703         ndict = {}
6704         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6705           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6706           ndict[name] = v
6707         nics.append(ndict)
6708       self.op.nics = nics
6709
6710     if (self.op.hypervisor is None and
6711         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6712       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6713     if einfo.has_section(constants.INISECT_HYP):
6714       # use the export parameters but do not override the ones
6715       # specified by the user
6716       for name, value in einfo.items(constants.INISECT_HYP):
6717         if name not in self.op.hvparams:
6718           self.op.hvparams[name] = value
6719
6720     if einfo.has_section(constants.INISECT_BEP):
6721       # use the parameters, without overriding
6722       for name, value in einfo.items(constants.INISECT_BEP):
6723         if name not in self.op.beparams:
6724           self.op.beparams[name] = value
6725     else:
6726       # try to read the parameters old style, from the main section
6727       for name in constants.BES_PARAMETERS:
6728         if (name not in self.op.beparams and
6729             einfo.has_option(constants.INISECT_INS, name)):
6730           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6731
6732     if einfo.has_section(constants.INISECT_OSP):
6733       # use the parameters, without overriding
6734       for name, value in einfo.items(constants.INISECT_OSP):
6735         if name not in self.op.osparams:
6736           self.op.osparams[name] = value
6737
6738   def _RevertToDefaults(self, cluster):
6739     """Revert the instance parameters to the default values.
6740
6741     """
6742     # hvparams
6743     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6744     for name in self.op.hvparams.keys():
6745       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6746         del self.op.hvparams[name]
6747     # beparams
6748     be_defs = cluster.SimpleFillBE({})
6749     for name in self.op.beparams.keys():
6750       if name in be_defs and be_defs[name] == self.op.beparams[name]:
6751         del self.op.beparams[name]
6752     # nic params
6753     nic_defs = cluster.SimpleFillNIC({})
6754     for nic in self.op.nics:
6755       for name in constants.NICS_PARAMETERS:
6756         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6757           del nic[name]
6758     # osparams
6759     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6760     for name in self.op.osparams.keys():
6761       if name in os_defs and os_defs[name] == self.op.osparams[name]:
6762         del self.op.osparams[name]
6763
6764   def CheckPrereq(self):
6765     """Check prerequisites.
6766
6767     """
6768     if self.op.mode == constants.INSTANCE_IMPORT:
6769       export_info = self._ReadExportInfo()
6770       self._ReadExportParams(export_info)
6771
6772     _CheckDiskTemplate(self.op.disk_template)
6773
6774     if (not self.cfg.GetVGName() and
6775         self.op.disk_template not in constants.DTS_NOT_LVM):
6776       raise errors.OpPrereqError("Cluster does not support lvm-based"
6777                                  " instances", errors.ECODE_STATE)
6778
6779     if self.op.hypervisor is None:
6780       self.op.hypervisor = self.cfg.GetHypervisorType()
6781
6782     cluster = self.cfg.GetClusterInfo()
6783     enabled_hvs = cluster.enabled_hypervisors
6784     if self.op.hypervisor not in enabled_hvs:
6785       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6786                                  " cluster (%s)" % (self.op.hypervisor,
6787                                   ",".join(enabled_hvs)),
6788                                  errors.ECODE_STATE)
6789
6790     # check hypervisor parameter syntax (locally)
6791     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6792     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6793                                       self.op.hvparams)
6794     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6795     hv_type.CheckParameterSyntax(filled_hvp)
6796     self.hv_full = filled_hvp
6797     # check that we don't specify global parameters on an instance
6798     _CheckGlobalHvParams(self.op.hvparams)
6799
6800     # fill and remember the beparams dict
6801     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6802     self.be_full = cluster.SimpleFillBE(self.op.beparams)
6803
6804     # build os parameters
6805     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6806
6807     # now that hvp/bep are in final format, let's reset to defaults,
6808     # if told to do so
6809     if self.op.identify_defaults:
6810       self._RevertToDefaults(cluster)
6811
6812     # NIC buildup
6813     self.nics = []
6814     for idx, nic in enumerate(self.op.nics):
6815       nic_mode_req = nic.get("mode", None)
6816       nic_mode = nic_mode_req
6817       if nic_mode is None:
6818         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6819
6820       # in routed mode, for the first nic, the default ip is 'auto'
6821       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6822         default_ip_mode = constants.VALUE_AUTO
6823       else:
6824         default_ip_mode = constants.VALUE_NONE
6825
6826       # ip validity checks
6827       ip = nic.get("ip", default_ip_mode)
6828       if ip is None or ip.lower() == constants.VALUE_NONE:
6829         nic_ip = None
6830       elif ip.lower() == constants.VALUE_AUTO:
6831         if not self.op.name_check:
6832           raise errors.OpPrereqError("IP address set to auto but name checks"
6833                                      " have been skipped. Aborting.",
6834                                      errors.ECODE_INVAL)
6835         nic_ip = self.hostname1.ip
6836       else:
6837         if not utils.IsValidIP(ip):
6838           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6839                                      " like a valid IP" % ip,
6840                                      errors.ECODE_INVAL)
6841         nic_ip = ip
6842
6843       # TODO: check the ip address for uniqueness
6844       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6845         raise errors.OpPrereqError("Routed nic mode requires an ip address",
6846                                    errors.ECODE_INVAL)
6847
6848       # MAC address verification
6849       mac = nic.get("mac", constants.VALUE_AUTO)
6850       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6851         mac = utils.NormalizeAndValidateMac(mac)
6852
6853         try:
6854           self.cfg.ReserveMAC(mac, self.proc.GetECId())
6855         except errors.ReservationError:
6856           raise errors.OpPrereqError("MAC address %s already in use"
6857                                      " in cluster" % mac,
6858                                      errors.ECODE_NOTUNIQUE)
6859
6860       # bridge verification
6861       bridge = nic.get("bridge", None)
6862       link = nic.get("link", None)
6863       if bridge and link:
6864         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6865                                    " at the same time", errors.ECODE_INVAL)
6866       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6867         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6868                                    errors.ECODE_INVAL)
6869       elif bridge:
6870         link = bridge
6871
6872       nicparams = {}
6873       if nic_mode_req:
6874         nicparams[constants.NIC_MODE] = nic_mode_req
6875       if link:
6876         nicparams[constants.NIC_LINK] = link
6877
6878       check_params = cluster.SimpleFillNIC(nicparams)
6879       objects.NIC.CheckParameterSyntax(check_params)
6880       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6881
6882     # disk checks/pre-build
6883     self.disks = []
6884     for disk in self.op.disks:
6885       mode = disk.get("mode", constants.DISK_RDWR)
6886       if mode not in constants.DISK_ACCESS_SET:
6887         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6888                                    mode, errors.ECODE_INVAL)
6889       size = disk.get("size", None)
6890       if size is None:
6891         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6892       try:
6893         size = int(size)
6894       except (TypeError, ValueError):
6895         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6896                                    errors.ECODE_INVAL)
6897       new_disk = {"size": size, "mode": mode}
6898       if "adopt" in disk:
6899         new_disk["adopt"] = disk["adopt"]
6900       self.disks.append(new_disk)
6901
6902     if self.op.mode == constants.INSTANCE_IMPORT:
6903
6904       # Check that the new instance doesn't have less disks than the export
6905       instance_disks = len(self.disks)
6906       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6907       if instance_disks < export_disks:
6908         raise errors.OpPrereqError("Not enough disks to import."
6909                                    " (instance: %d, export: %d)" %
6910                                    (instance_disks, export_disks),
6911                                    errors.ECODE_INVAL)
6912
6913       disk_images = []
6914       for idx in range(export_disks):
6915         option = 'disk%d_dump' % idx
6916         if export_info.has_option(constants.INISECT_INS, option):
6917           # FIXME: are the old os-es, disk sizes, etc. useful?
6918           export_name = export_info.get(constants.INISECT_INS, option)
6919           image = utils.PathJoin(self.op.src_path, export_name)
6920           disk_images.append(image)
6921         else:
6922           disk_images.append(False)
6923
6924       self.src_images = disk_images
6925
6926       old_name = export_info.get(constants.INISECT_INS, 'name')
6927       try:
6928         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6929       except (TypeError, ValueError), err:
6930         raise errors.OpPrereqError("Invalid export file, nic_count is not"
6931                                    " an integer: %s" % str(err),
6932                                    errors.ECODE_STATE)
6933       if self.op.instance_name == old_name:
6934         for idx, nic in enumerate(self.nics):
6935           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6936             nic_mac_ini = 'nic%d_mac' % idx
6937             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6938
6939     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6940
6941     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6942     if self.op.ip_check:
6943       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6944         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6945                                    (self.check_ip, self.op.instance_name),
6946                                    errors.ECODE_NOTUNIQUE)
6947
6948     #### mac address generation
6949     # By generating here the mac address both the allocator and the hooks get
6950     # the real final mac address rather than the 'auto' or 'generate' value.
6951     # There is a race condition between the generation and the instance object
6952     # creation, which means that we know the mac is valid now, but we're not
6953     # sure it will be when we actually add the instance. If things go bad
6954     # adding the instance will abort because of a duplicate mac, and the
6955     # creation job will fail.
6956     for nic in self.nics:
6957       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6958         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6959
6960     #### allocator run
6961
6962     if self.op.iallocator is not None:
6963       self._RunAllocator()
6964
6965     #### node related checks
6966
6967     # check primary node
6968     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6969     assert self.pnode is not None, \
6970       "Cannot retrieve locked node %s" % self.op.pnode
6971     if pnode.offline:
6972       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6973                                  pnode.name, errors.ECODE_STATE)
6974     if pnode.drained:
6975       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6976                                  pnode.name, errors.ECODE_STATE)
6977
6978     self.secondaries = []
6979
6980     # mirror node verification
6981     if self.op.disk_template in constants.DTS_NET_MIRROR:
6982       if self.op.snode is None:
6983         raise errors.OpPrereqError("The networked disk templates need"
6984                                    " a mirror node", errors.ECODE_INVAL)
6985       if self.op.snode == pnode.name:
6986         raise errors.OpPrereqError("The secondary node cannot be the"
6987                                    " primary node.", errors.ECODE_INVAL)
6988       _CheckNodeOnline(self, self.op.snode)
6989       _CheckNodeNotDrained(self, self.op.snode)
6990       self.secondaries.append(self.op.snode)
6991
6992     nodenames = [pnode.name] + self.secondaries
6993
6994     req_size = _ComputeDiskSize(self.op.disk_template,
6995                                 self.disks)
6996
6997     # Check lv size requirements, if not adopting
6998     if req_size is not None and not self.adopt_disks:
6999       _CheckNodesFreeDisk(self, nodenames, req_size)
7000
7001     if self.adopt_disks: # instead, we must check the adoption data
7002       all_lvs = set([i["adopt"] for i in self.disks])
7003       if len(all_lvs) != len(self.disks):
7004         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7005                                    errors.ECODE_INVAL)
7006       for lv_name in all_lvs:
7007         try:
7008           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7009         except errors.ReservationError:
7010           raise errors.OpPrereqError("LV named %s used by another instance" %
7011                                      lv_name, errors.ECODE_NOTUNIQUE)
7012
7013       node_lvs = self.rpc.call_lv_list([pnode.name],
7014                                        self.cfg.GetVGName())[pnode.name]
7015       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7016       node_lvs = node_lvs.payload
7017       delta = all_lvs.difference(node_lvs.keys())
7018       if delta:
7019         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7020                                    utils.CommaJoin(delta),
7021                                    errors.ECODE_INVAL)
7022       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7023       if online_lvs:
7024         raise errors.OpPrereqError("Online logical volumes found, cannot"
7025                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7026                                    errors.ECODE_STATE)
7027       # update the size of disk based on what is found
7028       for dsk in self.disks:
7029         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7030
7031     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7032
7033     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7034     # check OS parameters (remotely)
7035     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7036
7037     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7038
7039     # memory check on primary node
7040     if self.op.start:
7041       _CheckNodeFreeMemory(self, self.pnode.name,
7042                            "creating instance %s" % self.op.instance_name,
7043                            self.be_full[constants.BE_MEMORY],
7044                            self.op.hypervisor)
7045
7046     self.dry_run_result = list(nodenames)
7047
7048   def Exec(self, feedback_fn):
7049     """Create and add the instance to the cluster.
7050
7051     """
7052     instance = self.op.instance_name
7053     pnode_name = self.pnode.name
7054
7055     ht_kind = self.op.hypervisor
7056     if ht_kind in constants.HTS_REQ_PORT:
7057       network_port = self.cfg.AllocatePort()
7058     else:
7059       network_port = None
7060
7061     if constants.ENABLE_FILE_STORAGE:
7062       # this is needed because os.path.join does not accept None arguments
7063       if self.op.file_storage_dir is None:
7064         string_file_storage_dir = ""
7065       else:
7066         string_file_storage_dir = self.op.file_storage_dir
7067
7068       # build the full file storage dir path
7069       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7070                                         string_file_storage_dir, instance)
7071     else:
7072       file_storage_dir = ""
7073
7074     disks = _GenerateDiskTemplate(self,
7075                                   self.op.disk_template,
7076                                   instance, pnode_name,
7077                                   self.secondaries,
7078                                   self.disks,
7079                                   file_storage_dir,
7080                                   self.op.file_driver,
7081                                   0)
7082
7083     iobj = objects.Instance(name=instance, os=self.op.os_type,
7084                             primary_node=pnode_name,
7085                             nics=self.nics, disks=disks,
7086                             disk_template=self.op.disk_template,
7087                             admin_up=False,
7088                             network_port=network_port,
7089                             beparams=self.op.beparams,
7090                             hvparams=self.op.hvparams,
7091                             hypervisor=self.op.hypervisor,
7092                             osparams=self.op.osparams,
7093                             )
7094
7095     if self.adopt_disks:
7096       # rename LVs to the newly-generated names; we need to construct
7097       # 'fake' LV disks with the old data, plus the new unique_id
7098       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7099       rename_to = []
7100       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7101         rename_to.append(t_dsk.logical_id)
7102         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7103         self.cfg.SetDiskID(t_dsk, pnode_name)
7104       result = self.rpc.call_blockdev_rename(pnode_name,
7105                                              zip(tmp_disks, rename_to))
7106       result.Raise("Failed to rename adoped LVs")
7107     else:
7108       feedback_fn("* creating instance disks...")
7109       try:
7110         _CreateDisks(self, iobj)
7111       except errors.OpExecError:
7112         self.LogWarning("Device creation failed, reverting...")
7113         try:
7114           _RemoveDisks(self, iobj)
7115         finally:
7116           self.cfg.ReleaseDRBDMinors(instance)
7117           raise
7118
7119     feedback_fn("adding instance %s to cluster config" % instance)
7120
7121     self.cfg.AddInstance(iobj, self.proc.GetECId())
7122
7123     # Declare that we don't want to remove the instance lock anymore, as we've
7124     # added the instance to the config
7125     del self.remove_locks[locking.LEVEL_INSTANCE]
7126     # Unlock all the nodes
7127     if self.op.mode == constants.INSTANCE_IMPORT:
7128       nodes_keep = [self.op.src_node]
7129       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7130                        if node != self.op.src_node]
7131       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7132       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7133     else:
7134       self.context.glm.release(locking.LEVEL_NODE)
7135       del self.acquired_locks[locking.LEVEL_NODE]
7136
7137     if self.op.wait_for_sync:
7138       disk_abort = not _WaitForSync(self, iobj)
7139     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7140       # make sure the disks are not degraded (still sync-ing is ok)
7141       time.sleep(15)
7142       feedback_fn("* checking mirrors status")
7143       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7144     else:
7145       disk_abort = False
7146
7147     if disk_abort:
7148       _RemoveDisks(self, iobj)
7149       self.cfg.RemoveInstance(iobj.name)
7150       # Make sure the instance lock gets removed
7151       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7152       raise errors.OpExecError("There are some degraded disks for"
7153                                " this instance")
7154
7155     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7156       if self.op.mode == constants.INSTANCE_CREATE:
7157         if not self.op.no_install:
7158           feedback_fn("* running the instance OS create scripts...")
7159           # FIXME: pass debug option from opcode to backend
7160           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7161                                                  self.op.debug_level)
7162           result.Raise("Could not add os for instance %s"
7163                        " on node %s" % (instance, pnode_name))
7164
7165       elif self.op.mode == constants.INSTANCE_IMPORT:
7166         feedback_fn("* running the instance OS import scripts...")
7167
7168         transfers = []
7169
7170         for idx, image in enumerate(self.src_images):
7171           if not image:
7172             continue
7173
7174           # FIXME: pass debug option from opcode to backend
7175           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7176                                              constants.IEIO_FILE, (image, ),
7177                                              constants.IEIO_SCRIPT,
7178                                              (iobj.disks[idx], idx),
7179                                              None)
7180           transfers.append(dt)
7181
7182         import_result = \
7183           masterd.instance.TransferInstanceData(self, feedback_fn,
7184                                                 self.op.src_node, pnode_name,
7185                                                 self.pnode.secondary_ip,
7186                                                 iobj, transfers)
7187         if not compat.all(import_result):
7188           self.LogWarning("Some disks for instance %s on node %s were not"
7189                           " imported successfully" % (instance, pnode_name))
7190
7191       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7192         feedback_fn("* preparing remote import...")
7193         connect_timeout = constants.RIE_CONNECT_TIMEOUT
7194         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7195
7196         disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7197                                                      self.source_x509_ca,
7198                                                      self._cds, timeouts)
7199         if not compat.all(disk_results):
7200           # TODO: Should the instance still be started, even if some disks
7201           # failed to import (valid for local imports, too)?
7202           self.LogWarning("Some disks for instance %s on node %s were not"
7203                           " imported successfully" % (instance, pnode_name))
7204
7205         # Run rename script on newly imported instance
7206         assert iobj.name == instance
7207         feedback_fn("Running rename script for %s" % instance)
7208         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7209                                                    self.source_instance_name,
7210                                                    self.op.debug_level)
7211         if result.fail_msg:
7212           self.LogWarning("Failed to run rename script for %s on node"
7213                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7214
7215       else:
7216         # also checked in the prereq part
7217         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7218                                      % self.op.mode)
7219
7220     if self.op.start:
7221       iobj.admin_up = True
7222       self.cfg.Update(iobj, feedback_fn)
7223       logging.info("Starting instance %s on node %s", instance, pnode_name)
7224       feedback_fn("* starting instance...")
7225       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7226       result.Raise("Could not start instance")
7227
7228     return list(iobj.all_nodes)
7229
7230
7231 class LUConnectConsole(NoHooksLU):
7232   """Connect to an instance's console.
7233
7234   This is somewhat special in that it returns the command line that
7235   you need to run on the master node in order to connect to the
7236   console.
7237
7238   """
7239   _OP_REQP = [("instance_name", _TNEString)]
7240   REQ_BGL = False
7241
7242   def ExpandNames(self):
7243     self._ExpandAndLockInstance()
7244
7245   def CheckPrereq(self):
7246     """Check prerequisites.
7247
7248     This checks that the instance is in the cluster.
7249
7250     """
7251     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7252     assert self.instance is not None, \
7253       "Cannot retrieve locked instance %s" % self.op.instance_name
7254     _CheckNodeOnline(self, self.instance.primary_node)
7255
7256   def Exec(self, feedback_fn):
7257     """Connect to the console of an instance
7258
7259     """
7260     instance = self.instance
7261     node = instance.primary_node
7262
7263     node_insts = self.rpc.call_instance_list([node],
7264                                              [instance.hypervisor])[node]
7265     node_insts.Raise("Can't get node information from %s" % node)
7266
7267     if instance.name not in node_insts.payload:
7268       raise errors.OpExecError("Instance %s is not running." % instance.name)
7269
7270     logging.debug("Connecting to console of %s on %s", instance.name, node)
7271
7272     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7273     cluster = self.cfg.GetClusterInfo()
7274     # beparams and hvparams are passed separately, to avoid editing the
7275     # instance and then saving the defaults in the instance itself.
7276     hvparams = cluster.FillHV(instance)
7277     beparams = cluster.FillBE(instance)
7278     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7279
7280     # build ssh cmdline
7281     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7282
7283
7284 class LUReplaceDisks(LogicalUnit):
7285   """Replace the disks of an instance.
7286
7287   """
7288   HPATH = "mirrors-replace"
7289   HTYPE = constants.HTYPE_INSTANCE
7290   _OP_REQP = [
7291     ("instance_name", _TNEString),
7292     ("mode", _TElemOf(constants.REPLACE_MODES)),
7293     ("disks", _TListOf(_TPInt)),
7294     ]
7295   _OP_DEFS = [
7296     ("remote_node", None),
7297     ("iallocator", None),
7298     ("early_release", None),
7299     ]
7300   REQ_BGL = False
7301
7302   def CheckArguments(self):
7303     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7304                                   self.op.iallocator)
7305
7306   def ExpandNames(self):
7307     self._ExpandAndLockInstance()
7308
7309     if self.op.iallocator is not None:
7310       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7311
7312     elif self.op.remote_node is not None:
7313       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7314       self.op.remote_node = remote_node
7315
7316       # Warning: do not remove the locking of the new secondary here
7317       # unless DRBD8.AddChildren is changed to work in parallel;
7318       # currently it doesn't since parallel invocations of
7319       # FindUnusedMinor will conflict
7320       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7321       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7322
7323     else:
7324       self.needed_locks[locking.LEVEL_NODE] = []
7325       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7326
7327     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7328                                    self.op.iallocator, self.op.remote_node,
7329                                    self.op.disks, False, self.op.early_release)
7330
7331     self.tasklets = [self.replacer]
7332
7333   def DeclareLocks(self, level):
7334     # If we're not already locking all nodes in the set we have to declare the
7335     # instance's primary/secondary nodes.
7336     if (level == locking.LEVEL_NODE and
7337         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7338       self._LockInstancesNodes()
7339
7340   def BuildHooksEnv(self):
7341     """Build hooks env.
7342
7343     This runs on the master, the primary and all the secondaries.
7344
7345     """
7346     instance = self.replacer.instance
7347     env = {
7348       "MODE": self.op.mode,
7349       "NEW_SECONDARY": self.op.remote_node,
7350       "OLD_SECONDARY": instance.secondary_nodes[0],
7351       }
7352     env.update(_BuildInstanceHookEnvByObject(self, instance))
7353     nl = [
7354       self.cfg.GetMasterNode(),
7355       instance.primary_node,
7356       ]
7357     if self.op.remote_node is not None:
7358       nl.append(self.op.remote_node)
7359     return env, nl, nl
7360
7361
7362 class TLReplaceDisks(Tasklet):
7363   """Replaces disks for an instance.
7364
7365   Note: Locking is not within the scope of this class.
7366
7367   """
7368   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7369                disks, delay_iallocator, early_release):
7370     """Initializes this class.
7371
7372     """
7373     Tasklet.__init__(self, lu)
7374
7375     # Parameters
7376     self.instance_name = instance_name
7377     self.mode = mode
7378     self.iallocator_name = iallocator_name
7379     self.remote_node = remote_node
7380     self.disks = disks
7381     self.delay_iallocator = delay_iallocator
7382     self.early_release = early_release
7383
7384     # Runtime data
7385     self.instance = None
7386     self.new_node = None
7387     self.target_node = None
7388     self.other_node = None
7389     self.remote_node_info = None
7390     self.node_secondary_ip = None
7391
7392   @staticmethod
7393   def CheckArguments(mode, remote_node, iallocator):
7394     """Helper function for users of this class.
7395
7396     """
7397     # check for valid parameter combination
7398     if mode == constants.REPLACE_DISK_CHG:
7399       if remote_node is None and iallocator is None:
7400         raise errors.OpPrereqError("When changing the secondary either an"
7401                                    " iallocator script must be used or the"
7402                                    " new node given", errors.ECODE_INVAL)
7403
7404       if remote_node is not None and iallocator is not None:
7405         raise errors.OpPrereqError("Give either the iallocator or the new"
7406                                    " secondary, not both", errors.ECODE_INVAL)
7407
7408     elif remote_node is not None or iallocator is not None:
7409       # Not replacing the secondary
7410       raise errors.OpPrereqError("The iallocator and new node options can"
7411                                  " only be used when changing the"
7412                                  " secondary node", errors.ECODE_INVAL)
7413
7414   @staticmethod
7415   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7416     """Compute a new secondary node using an IAllocator.
7417
7418     """
7419     ial = IAllocator(lu.cfg, lu.rpc,
7420                      mode=constants.IALLOCATOR_MODE_RELOC,
7421                      name=instance_name,
7422                      relocate_from=relocate_from)
7423
7424     ial.Run(iallocator_name)
7425
7426     if not ial.success:
7427       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7428                                  " %s" % (iallocator_name, ial.info),
7429                                  errors.ECODE_NORES)
7430
7431     if len(ial.result) != ial.required_nodes:
7432       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7433                                  " of nodes (%s), required %s" %
7434                                  (iallocator_name,
7435                                   len(ial.result), ial.required_nodes),
7436                                  errors.ECODE_FAULT)
7437
7438     remote_node_name = ial.result[0]
7439
7440     lu.LogInfo("Selected new secondary for instance '%s': %s",
7441                instance_name, remote_node_name)
7442
7443     return remote_node_name
7444
7445   def _FindFaultyDisks(self, node_name):
7446     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7447                                     node_name, True)
7448
7449   def CheckPrereq(self):
7450     """Check prerequisites.
7451
7452     This checks that the instance is in the cluster.
7453
7454     """
7455     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7456     assert instance is not None, \
7457       "Cannot retrieve locked instance %s" % self.instance_name
7458
7459     if instance.disk_template != constants.DT_DRBD8:
7460       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7461                                  " instances", errors.ECODE_INVAL)
7462
7463     if len(instance.secondary_nodes) != 1:
7464       raise errors.OpPrereqError("The instance has a strange layout,"
7465                                  " expected one secondary but found %d" %
7466                                  len(instance.secondary_nodes),
7467                                  errors.ECODE_FAULT)
7468
7469     if not self.delay_iallocator:
7470       self._CheckPrereq2()
7471
7472   def _CheckPrereq2(self):
7473     """Check prerequisites, second part.
7474
7475     This function should always be part of CheckPrereq. It was separated and is
7476     now called from Exec because during node evacuation iallocator was only
7477     called with an unmodified cluster model, not taking planned changes into
7478     account.
7479
7480     """
7481     instance = self.instance
7482     secondary_node = instance.secondary_nodes[0]
7483
7484     if self.iallocator_name is None:
7485       remote_node = self.remote_node
7486     else:
7487       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7488                                        instance.name, instance.secondary_nodes)
7489
7490     if remote_node is not None:
7491       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7492       assert self.remote_node_info is not None, \
7493         "Cannot retrieve locked node %s" % remote_node
7494     else:
7495       self.remote_node_info = None
7496
7497     if remote_node == self.instance.primary_node:
7498       raise errors.OpPrereqError("The specified node is the primary node of"
7499                                  " the instance.", errors.ECODE_INVAL)
7500
7501     if remote_node == secondary_node:
7502       raise errors.OpPrereqError("The specified node is already the"
7503                                  " secondary node of the instance.",
7504                                  errors.ECODE_INVAL)
7505
7506     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7507                                     constants.REPLACE_DISK_CHG):
7508       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7509                                  errors.ECODE_INVAL)
7510
7511     if self.mode == constants.REPLACE_DISK_AUTO:
7512       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7513       faulty_secondary = self._FindFaultyDisks(secondary_node)
7514
7515       if faulty_primary and faulty_secondary:
7516         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7517                                    " one node and can not be repaired"
7518                                    " automatically" % self.instance_name,
7519                                    errors.ECODE_STATE)
7520
7521       if faulty_primary:
7522         self.disks = faulty_primary
7523         self.target_node = instance.primary_node
7524         self.other_node = secondary_node
7525         check_nodes = [self.target_node, self.other_node]
7526       elif faulty_secondary:
7527         self.disks = faulty_secondary
7528         self.target_node = secondary_node
7529         self.other_node = instance.primary_node
7530         check_nodes = [self.target_node, self.other_node]
7531       else:
7532         self.disks = []
7533         check_nodes = []
7534
7535     else:
7536       # Non-automatic modes
7537       if self.mode == constants.REPLACE_DISK_PRI:
7538         self.target_node = instance.primary_node
7539         self.other_node = secondary_node
7540         check_nodes = [self.target_node, self.other_node]
7541
7542       elif self.mode == constants.REPLACE_DISK_SEC:
7543         self.target_node = secondary_node
7544         self.other_node = instance.primary_node
7545         check_nodes = [self.target_node, self.other_node]
7546
7547       elif self.mode == constants.REPLACE_DISK_CHG:
7548         self.new_node = remote_node
7549         self.other_node = instance.primary_node
7550         self.target_node = secondary_node
7551         check_nodes = [self.new_node, self.other_node]
7552
7553         _CheckNodeNotDrained(self.lu, remote_node)
7554
7555         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7556         assert old_node_info is not None
7557         if old_node_info.offline and not self.early_release:
7558           # doesn't make sense to delay the release
7559           self.early_release = True
7560           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7561                           " early-release mode", secondary_node)
7562
7563       else:
7564         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7565                                      self.mode)
7566
7567       # If not specified all disks should be replaced
7568       if not self.disks:
7569         self.disks = range(len(self.instance.disks))
7570
7571     for node in check_nodes:
7572       _CheckNodeOnline(self.lu, node)
7573
7574     # Check whether disks are valid
7575     for disk_idx in self.disks:
7576       instance.FindDisk(disk_idx)
7577
7578     # Get secondary node IP addresses
7579     node_2nd_ip = {}
7580
7581     for node_name in [self.target_node, self.other_node, self.new_node]:
7582       if node_name is not None:
7583         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7584
7585     self.node_secondary_ip = node_2nd_ip
7586
7587   def Exec(self, feedback_fn):
7588     """Execute disk replacement.
7589
7590     This dispatches the disk replacement to the appropriate handler.
7591
7592     """
7593     if self.delay_iallocator:
7594       self._CheckPrereq2()
7595
7596     if not self.disks:
7597       feedback_fn("No disks need replacement")
7598       return
7599
7600     feedback_fn("Replacing disk(s) %s for %s" %
7601                 (utils.CommaJoin(self.disks), self.instance.name))
7602
7603     activate_disks = (not self.instance.admin_up)
7604
7605     # Activate the instance disks if we're replacing them on a down instance
7606     if activate_disks:
7607       _StartInstanceDisks(self.lu, self.instance, True)
7608
7609     try:
7610       # Should we replace the secondary node?
7611       if self.new_node is not None:
7612         fn = self._ExecDrbd8Secondary
7613       else:
7614         fn = self._ExecDrbd8DiskOnly
7615
7616       return fn(feedback_fn)
7617
7618     finally:
7619       # Deactivate the instance disks if we're replacing them on a
7620       # down instance
7621       if activate_disks:
7622         _SafeShutdownInstanceDisks(self.lu, self.instance)
7623
7624   def _CheckVolumeGroup(self, nodes):
7625     self.lu.LogInfo("Checking volume groups")
7626
7627     vgname = self.cfg.GetVGName()
7628
7629     # Make sure volume group exists on all involved nodes
7630     results = self.rpc.call_vg_list(nodes)
7631     if not results:
7632       raise errors.OpExecError("Can't list volume groups on the nodes")
7633
7634     for node in nodes:
7635       res = results[node]
7636       res.Raise("Error checking node %s" % node)
7637       if vgname not in res.payload:
7638         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7639                                  (vgname, node))
7640
7641   def _CheckDisksExistence(self, nodes):
7642     # Check disk existence
7643     for idx, dev in enumerate(self.instance.disks):
7644       if idx not in self.disks:
7645         continue
7646
7647       for node in nodes:
7648         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7649         self.cfg.SetDiskID(dev, node)
7650
7651         result = self.rpc.call_blockdev_find(node, dev)
7652
7653         msg = result.fail_msg
7654         if msg or not result.payload:
7655           if not msg:
7656             msg = "disk not found"
7657           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7658                                    (idx, node, msg))
7659
7660   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7661     for idx, dev in enumerate(self.instance.disks):
7662       if idx not in self.disks:
7663         continue
7664
7665       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7666                       (idx, node_name))
7667
7668       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7669                                    ldisk=ldisk):
7670         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7671                                  " replace disks for instance %s" %
7672                                  (node_name, self.instance.name))
7673
7674   def _CreateNewStorage(self, node_name):
7675     vgname = self.cfg.GetVGName()
7676     iv_names = {}
7677
7678     for idx, dev in enumerate(self.instance.disks):
7679       if idx not in self.disks:
7680         continue
7681
7682       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7683
7684       self.cfg.SetDiskID(dev, node_name)
7685
7686       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7687       names = _GenerateUniqueNames(self.lu, lv_names)
7688
7689       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7690                              logical_id=(vgname, names[0]))
7691       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7692                              logical_id=(vgname, names[1]))
7693
7694       new_lvs = [lv_data, lv_meta]
7695       old_lvs = dev.children
7696       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7697
7698       # we pass force_create=True to force the LVM creation
7699       for new_lv in new_lvs:
7700         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7701                         _GetInstanceInfoText(self.instance), False)
7702
7703     return iv_names
7704
7705   def _CheckDevices(self, node_name, iv_names):
7706     for name, (dev, _, _) in iv_names.iteritems():
7707       self.cfg.SetDiskID(dev, node_name)
7708
7709       result = self.rpc.call_blockdev_find(node_name, dev)
7710
7711       msg = result.fail_msg
7712       if msg or not result.payload:
7713         if not msg:
7714           msg = "disk not found"
7715         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7716                                  (name, msg))
7717
7718       if result.payload.is_degraded:
7719         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7720
7721   def _RemoveOldStorage(self, node_name, iv_names):
7722     for name, (_, old_lvs, _) in iv_names.iteritems():
7723       self.lu.LogInfo("Remove logical volumes for %s" % name)
7724
7725       for lv in old_lvs:
7726         self.cfg.SetDiskID(lv, node_name)
7727
7728         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7729         if msg:
7730           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7731                              hint="remove unused LVs manually")
7732
7733   def _ReleaseNodeLock(self, node_name):
7734     """Releases the lock for a given node."""
7735     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7736
7737   def _ExecDrbd8DiskOnly(self, feedback_fn):
7738     """Replace a disk on the primary or secondary for DRBD 8.
7739
7740     The algorithm for replace is quite complicated:
7741
7742       1. for each disk to be replaced:
7743
7744         1. create new LVs on the target node with unique names
7745         1. detach old LVs from the drbd device
7746         1. rename old LVs to name_replaced.<time_t>
7747         1. rename new LVs to old LVs
7748         1. attach the new LVs (with the old names now) to the drbd device
7749
7750       1. wait for sync across all devices
7751
7752       1. for each modified disk:
7753
7754         1. remove old LVs (which have the name name_replaces.<time_t>)
7755
7756     Failures are not very well handled.
7757
7758     """
7759     steps_total = 6
7760
7761     # Step: check device activation
7762     self.lu.LogStep(1, steps_total, "Check device existence")
7763     self._CheckDisksExistence([self.other_node, self.target_node])
7764     self._CheckVolumeGroup([self.target_node, self.other_node])
7765
7766     # Step: check other node consistency
7767     self.lu.LogStep(2, steps_total, "Check peer consistency")
7768     self._CheckDisksConsistency(self.other_node,
7769                                 self.other_node == self.instance.primary_node,
7770                                 False)
7771
7772     # Step: create new storage
7773     self.lu.LogStep(3, steps_total, "Allocate new storage")
7774     iv_names = self._CreateNewStorage(self.target_node)
7775
7776     # Step: for each lv, detach+rename*2+attach
7777     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7778     for dev, old_lvs, new_lvs in iv_names.itervalues():
7779       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7780
7781       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7782                                                      old_lvs)
7783       result.Raise("Can't detach drbd from local storage on node"
7784                    " %s for device %s" % (self.target_node, dev.iv_name))
7785       #dev.children = []
7786       #cfg.Update(instance)
7787
7788       # ok, we created the new LVs, so now we know we have the needed
7789       # storage; as such, we proceed on the target node to rename
7790       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7791       # using the assumption that logical_id == physical_id (which in
7792       # turn is the unique_id on that node)
7793
7794       # FIXME(iustin): use a better name for the replaced LVs
7795       temp_suffix = int(time.time())
7796       ren_fn = lambda d, suff: (d.physical_id[0],
7797                                 d.physical_id[1] + "_replaced-%s" % suff)
7798
7799       # Build the rename list based on what LVs exist on the node
7800       rename_old_to_new = []
7801       for to_ren in old_lvs:
7802         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7803         if not result.fail_msg and result.payload:
7804           # device exists
7805           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7806
7807       self.lu.LogInfo("Renaming the old LVs on the target node")
7808       result = self.rpc.call_blockdev_rename(self.target_node,
7809                                              rename_old_to_new)
7810       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7811
7812       # Now we rename the new LVs to the old LVs
7813       self.lu.LogInfo("Renaming the new LVs on the target node")
7814       rename_new_to_old = [(new, old.physical_id)
7815                            for old, new in zip(old_lvs, new_lvs)]
7816       result = self.rpc.call_blockdev_rename(self.target_node,
7817                                              rename_new_to_old)
7818       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7819
7820       for old, new in zip(old_lvs, new_lvs):
7821         new.logical_id = old.logical_id
7822         self.cfg.SetDiskID(new, self.target_node)
7823
7824       for disk in old_lvs:
7825         disk.logical_id = ren_fn(disk, temp_suffix)
7826         self.cfg.SetDiskID(disk, self.target_node)
7827
7828       # Now that the new lvs have the old name, we can add them to the device
7829       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7830       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7831                                                   new_lvs)
7832       msg = result.fail_msg
7833       if msg:
7834         for new_lv in new_lvs:
7835           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7836                                                new_lv).fail_msg
7837           if msg2:
7838             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7839                                hint=("cleanup manually the unused logical"
7840                                      "volumes"))
7841         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7842
7843       dev.children = new_lvs
7844
7845       self.cfg.Update(self.instance, feedback_fn)
7846
7847     cstep = 5
7848     if self.early_release:
7849       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7850       cstep += 1
7851       self._RemoveOldStorage(self.target_node, iv_names)
7852       # WARNING: we release both node locks here, do not do other RPCs
7853       # than WaitForSync to the primary node
7854       self._ReleaseNodeLock([self.target_node, self.other_node])
7855
7856     # Wait for sync
7857     # This can fail as the old devices are degraded and _WaitForSync
7858     # does a combined result over all disks, so we don't check its return value
7859     self.lu.LogStep(cstep, steps_total, "Sync devices")
7860     cstep += 1
7861     _WaitForSync(self.lu, self.instance)
7862
7863     # Check all devices manually
7864     self._CheckDevices(self.instance.primary_node, iv_names)
7865
7866     # Step: remove old storage
7867     if not self.early_release:
7868       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7869       cstep += 1
7870       self._RemoveOldStorage(self.target_node, iv_names)
7871
7872   def _ExecDrbd8Secondary(self, feedback_fn):
7873     """Replace the secondary node for DRBD 8.
7874
7875     The algorithm for replace is quite complicated:
7876       - for all disks of the instance:
7877         - create new LVs on the new node with same names
7878         - shutdown the drbd device on the old secondary
7879         - disconnect the drbd network on the primary
7880         - create the drbd device on the new secondary
7881         - network attach the drbd on the primary, using an artifice:
7882           the drbd code for Attach() will connect to the network if it
7883           finds a device which is connected to the good local disks but
7884           not network enabled
7885       - wait for sync across all devices
7886       - remove all disks from the old secondary
7887
7888     Failures are not very well handled.
7889
7890     """
7891     steps_total = 6
7892
7893     # Step: check device activation
7894     self.lu.LogStep(1, steps_total, "Check device existence")
7895     self._CheckDisksExistence([self.instance.primary_node])
7896     self._CheckVolumeGroup([self.instance.primary_node])
7897
7898     # Step: check other node consistency
7899     self.lu.LogStep(2, steps_total, "Check peer consistency")
7900     self._CheckDisksConsistency(self.instance.primary_node, True, True)
7901
7902     # Step: create new storage
7903     self.lu.LogStep(3, steps_total, "Allocate new storage")
7904     for idx, dev in enumerate(self.instance.disks):
7905       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7906                       (self.new_node, idx))
7907       # we pass force_create=True to force LVM creation
7908       for new_lv in dev.children:
7909         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7910                         _GetInstanceInfoText(self.instance), False)
7911
7912     # Step 4: dbrd minors and drbd setups changes
7913     # after this, we must manually remove the drbd minors on both the
7914     # error and the success paths
7915     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7916     minors = self.cfg.AllocateDRBDMinor([self.new_node
7917                                          for dev in self.instance.disks],
7918                                         self.instance.name)
7919     logging.debug("Allocated minors %r", minors)
7920
7921     iv_names = {}
7922     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7923       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7924                       (self.new_node, idx))
7925       # create new devices on new_node; note that we create two IDs:
7926       # one without port, so the drbd will be activated without
7927       # networking information on the new node at this stage, and one
7928       # with network, for the latter activation in step 4
7929       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7930       if self.instance.primary_node == o_node1:
7931         p_minor = o_minor1
7932       else:
7933         assert self.instance.primary_node == o_node2, "Three-node instance?"
7934         p_minor = o_minor2
7935
7936       new_alone_id = (self.instance.primary_node, self.new_node, None,
7937                       p_minor, new_minor, o_secret)
7938       new_net_id = (self.instance.primary_node, self.new_node, o_port,
7939                     p_minor, new_minor, o_secret)
7940
7941       iv_names[idx] = (dev, dev.children, new_net_id)
7942       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7943                     new_net_id)
7944       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7945                               logical_id=new_alone_id,
7946                               children=dev.children,
7947                               size=dev.size)
7948       try:
7949         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7950                               _GetInstanceInfoText(self.instance), False)
7951       except errors.GenericError:
7952         self.cfg.ReleaseDRBDMinors(self.instance.name)
7953         raise
7954
7955     # We have new devices, shutdown the drbd on the old secondary
7956     for idx, dev in enumerate(self.instance.disks):
7957       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7958       self.cfg.SetDiskID(dev, self.target_node)
7959       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7960       if msg:
7961         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7962                            "node: %s" % (idx, msg),
7963                            hint=("Please cleanup this device manually as"
7964                                  " soon as possible"))
7965
7966     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7967     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7968                                                self.node_secondary_ip,
7969                                                self.instance.disks)\
7970                                               [self.instance.primary_node]
7971
7972     msg = result.fail_msg
7973     if msg:
7974       # detaches didn't succeed (unlikely)
7975       self.cfg.ReleaseDRBDMinors(self.instance.name)
7976       raise errors.OpExecError("Can't detach the disks from the network on"
7977                                " old node: %s" % (msg,))
7978
7979     # if we managed to detach at least one, we update all the disks of
7980     # the instance to point to the new secondary
7981     self.lu.LogInfo("Updating instance configuration")
7982     for dev, _, new_logical_id in iv_names.itervalues():
7983       dev.logical_id = new_logical_id
7984       self.cfg.SetDiskID(dev, self.instance.primary_node)
7985
7986     self.cfg.Update(self.instance, feedback_fn)
7987
7988     # and now perform the drbd attach
7989     self.lu.LogInfo("Attaching primary drbds to new secondary"
7990                     " (standalone => connected)")
7991     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7992                                             self.new_node],
7993                                            self.node_secondary_ip,
7994                                            self.instance.disks,
7995                                            self.instance.name,
7996                                            False)
7997     for to_node, to_result in result.items():
7998       msg = to_result.fail_msg
7999       if msg:
8000         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8001                            to_node, msg,
8002                            hint=("please do a gnt-instance info to see the"
8003                                  " status of disks"))
8004     cstep = 5
8005     if self.early_release:
8006       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8007       cstep += 1
8008       self._RemoveOldStorage(self.target_node, iv_names)
8009       # WARNING: we release all node locks here, do not do other RPCs
8010       # than WaitForSync to the primary node
8011       self._ReleaseNodeLock([self.instance.primary_node,
8012                              self.target_node,
8013                              self.new_node])
8014
8015     # Wait for sync
8016     # This can fail as the old devices are degraded and _WaitForSync
8017     # does a combined result over all disks, so we don't check its return value
8018     self.lu.LogStep(cstep, steps_total, "Sync devices")
8019     cstep += 1
8020     _WaitForSync(self.lu, self.instance)
8021
8022     # Check all devices manually
8023     self._CheckDevices(self.instance.primary_node, iv_names)
8024
8025     # Step: remove old storage
8026     if not self.early_release:
8027       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8028       self._RemoveOldStorage(self.target_node, iv_names)
8029
8030
8031 class LURepairNodeStorage(NoHooksLU):
8032   """Repairs the volume group on a node.
8033
8034   """
8035   _OP_REQP = [("node_name", _TNEString)]
8036   REQ_BGL = False
8037
8038   def CheckArguments(self):
8039     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8040
8041     _CheckStorageType(self.op.storage_type)
8042
8043     storage_type = self.op.storage_type
8044
8045     if (constants.SO_FIX_CONSISTENCY not in
8046         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8047       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8048                                  " repaired" % storage_type,
8049                                  errors.ECODE_INVAL)
8050
8051   def ExpandNames(self):
8052     self.needed_locks = {
8053       locking.LEVEL_NODE: [self.op.node_name],
8054       }
8055
8056   def _CheckFaultyDisks(self, instance, node_name):
8057     """Ensure faulty disks abort the opcode or at least warn."""
8058     try:
8059       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8060                                   node_name, True):
8061         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8062                                    " node '%s'" % (instance.name, node_name),
8063                                    errors.ECODE_STATE)
8064     except errors.OpPrereqError, err:
8065       if self.op.ignore_consistency:
8066         self.proc.LogWarning(str(err.args[0]))
8067       else:
8068         raise
8069
8070   def CheckPrereq(self):
8071     """Check prerequisites.
8072
8073     """
8074     # Check whether any instance on this node has faulty disks
8075     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8076       if not inst.admin_up:
8077         continue
8078       check_nodes = set(inst.all_nodes)
8079       check_nodes.discard(self.op.node_name)
8080       for inst_node_name in check_nodes:
8081         self._CheckFaultyDisks(inst, inst_node_name)
8082
8083   def Exec(self, feedback_fn):
8084     feedback_fn("Repairing storage unit '%s' on %s ..." %
8085                 (self.op.name, self.op.node_name))
8086
8087     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8088     result = self.rpc.call_storage_execute(self.op.node_name,
8089                                            self.op.storage_type, st_args,
8090                                            self.op.name,
8091                                            constants.SO_FIX_CONSISTENCY)
8092     result.Raise("Failed to repair storage unit '%s' on %s" %
8093                  (self.op.name, self.op.node_name))
8094
8095
8096 class LUNodeEvacuationStrategy(NoHooksLU):
8097   """Computes the node evacuation strategy.
8098
8099   """
8100   _OP_REQP = [("nodes", _TListOf(_TNEString))]
8101   _OP_DEFS = [
8102     ("remote_node", None),
8103     ("iallocator", None),
8104     ]
8105   REQ_BGL = False
8106
8107   def CheckArguments(self):
8108     if self.op.remote_node is not None and self.op.iallocator is not None:
8109       raise errors.OpPrereqError("Give either the iallocator or the new"
8110                                  " secondary, not both", errors.ECODE_INVAL)
8111
8112   def ExpandNames(self):
8113     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8114     self.needed_locks = locks = {}
8115     if self.op.remote_node is None:
8116       locks[locking.LEVEL_NODE] = locking.ALL_SET
8117     else:
8118       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8119       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8120
8121   def Exec(self, feedback_fn):
8122     if self.op.remote_node is not None:
8123       instances = []
8124       for node in self.op.nodes:
8125         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8126       result = []
8127       for i in instances:
8128         if i.primary_node == self.op.remote_node:
8129           raise errors.OpPrereqError("Node %s is the primary node of"
8130                                      " instance %s, cannot use it as"
8131                                      " secondary" %
8132                                      (self.op.remote_node, i.name),
8133                                      errors.ECODE_INVAL)
8134         result.append([i.name, self.op.remote_node])
8135     else:
8136       ial = IAllocator(self.cfg, self.rpc,
8137                        mode=constants.IALLOCATOR_MODE_MEVAC,
8138                        evac_nodes=self.op.nodes)
8139       ial.Run(self.op.iallocator, validate=True)
8140       if not ial.success:
8141         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8142                                  errors.ECODE_NORES)
8143       result = ial.result
8144     return result
8145
8146
8147 class LUGrowDisk(LogicalUnit):
8148   """Grow a disk of an instance.
8149
8150   """
8151   HPATH = "disk-grow"
8152   HTYPE = constants.HTYPE_INSTANCE
8153   _OP_REQP = [
8154     ("instance_name", _TNEString),
8155     ("disk", _TInt),
8156     ("amount", _TInt),
8157     ("wait_for_sync", _TBool),
8158     ]
8159   REQ_BGL = False
8160
8161   def ExpandNames(self):
8162     self._ExpandAndLockInstance()
8163     self.needed_locks[locking.LEVEL_NODE] = []
8164     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8165
8166   def DeclareLocks(self, level):
8167     if level == locking.LEVEL_NODE:
8168       self._LockInstancesNodes()
8169
8170   def BuildHooksEnv(self):
8171     """Build hooks env.
8172
8173     This runs on the master, the primary and all the secondaries.
8174
8175     """
8176     env = {
8177       "DISK": self.op.disk,
8178       "AMOUNT": self.op.amount,
8179       }
8180     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8181     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8182     return env, nl, nl
8183
8184   def CheckPrereq(self):
8185     """Check prerequisites.
8186
8187     This checks that the instance is in the cluster.
8188
8189     """
8190     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8191     assert instance is not None, \
8192       "Cannot retrieve locked instance %s" % self.op.instance_name
8193     nodenames = list(instance.all_nodes)
8194     for node in nodenames:
8195       _CheckNodeOnline(self, node)
8196
8197     self.instance = instance
8198
8199     if instance.disk_template not in constants.DTS_GROWABLE:
8200       raise errors.OpPrereqError("Instance's disk layout does not support"
8201                                  " growing.", errors.ECODE_INVAL)
8202
8203     self.disk = instance.FindDisk(self.op.disk)
8204
8205     if instance.disk_template != constants.DT_FILE:
8206       # TODO: check the free disk space for file, when that feature will be
8207       # supported
8208       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8209
8210   def Exec(self, feedback_fn):
8211     """Execute disk grow.
8212
8213     """
8214     instance = self.instance
8215     disk = self.disk
8216
8217     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8218     if not disks_ok:
8219       raise errors.OpExecError("Cannot activate block device to grow")
8220
8221     for node in instance.all_nodes:
8222       self.cfg.SetDiskID(disk, node)
8223       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8224       result.Raise("Grow request failed to node %s" % node)
8225
8226       # TODO: Rewrite code to work properly
8227       # DRBD goes into sync mode for a short amount of time after executing the
8228       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8229       # calling "resize" in sync mode fails. Sleeping for a short amount of
8230       # time is a work-around.
8231       time.sleep(5)
8232
8233     disk.RecordGrow(self.op.amount)
8234     self.cfg.Update(instance, feedback_fn)
8235     if self.op.wait_for_sync:
8236       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8237       if disk_abort:
8238         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8239                              " status.\nPlease check the instance.")
8240       if not instance.admin_up:
8241         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8242     elif not instance.admin_up:
8243       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8244                            " not supposed to be running because no wait for"
8245                            " sync mode was requested.")
8246
8247
8248 class LUQueryInstanceData(NoHooksLU):
8249   """Query runtime instance data.
8250
8251   """
8252   _OP_REQP = [
8253     ("instances", _TListOf(_TNEString)),
8254     ("static", _TBool),
8255     ]
8256   REQ_BGL = False
8257
8258   def ExpandNames(self):
8259     self.needed_locks = {}
8260     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8261
8262     if self.op.instances:
8263       self.wanted_names = []
8264       for name in self.op.instances:
8265         full_name = _ExpandInstanceName(self.cfg, name)
8266         self.wanted_names.append(full_name)
8267       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8268     else:
8269       self.wanted_names = None
8270       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8271
8272     self.needed_locks[locking.LEVEL_NODE] = []
8273     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8274
8275   def DeclareLocks(self, level):
8276     if level == locking.LEVEL_NODE:
8277       self._LockInstancesNodes()
8278
8279   def CheckPrereq(self):
8280     """Check prerequisites.
8281
8282     This only checks the optional instance list against the existing names.
8283
8284     """
8285     if self.wanted_names is None:
8286       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8287
8288     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8289                              in self.wanted_names]
8290
8291   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8292     """Returns the status of a block device
8293
8294     """
8295     if self.op.static or not node:
8296       return None
8297
8298     self.cfg.SetDiskID(dev, node)
8299
8300     result = self.rpc.call_blockdev_find(node, dev)
8301     if result.offline:
8302       return None
8303
8304     result.Raise("Can't compute disk status for %s" % instance_name)
8305
8306     status = result.payload
8307     if status is None:
8308       return None
8309
8310     return (status.dev_path, status.major, status.minor,
8311             status.sync_percent, status.estimated_time,
8312             status.is_degraded, status.ldisk_status)
8313
8314   def _ComputeDiskStatus(self, instance, snode, dev):
8315     """Compute block device status.
8316
8317     """
8318     if dev.dev_type in constants.LDS_DRBD:
8319       # we change the snode then (otherwise we use the one passed in)
8320       if dev.logical_id[0] == instance.primary_node:
8321         snode = dev.logical_id[1]
8322       else:
8323         snode = dev.logical_id[0]
8324
8325     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8326                                               instance.name, dev)
8327     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8328
8329     if dev.children:
8330       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8331                       for child in dev.children]
8332     else:
8333       dev_children = []
8334
8335     data = {
8336       "iv_name": dev.iv_name,
8337       "dev_type": dev.dev_type,
8338       "logical_id": dev.logical_id,
8339       "physical_id": dev.physical_id,
8340       "pstatus": dev_pstatus,
8341       "sstatus": dev_sstatus,
8342       "children": dev_children,
8343       "mode": dev.mode,
8344       "size": dev.size,
8345       }
8346
8347     return data
8348
8349   def Exec(self, feedback_fn):
8350     """Gather and return data"""
8351     result = {}
8352
8353     cluster = self.cfg.GetClusterInfo()
8354
8355     for instance in self.wanted_instances:
8356       if not self.op.static:
8357         remote_info = self.rpc.call_instance_info(instance.primary_node,
8358                                                   instance.name,
8359                                                   instance.hypervisor)
8360         remote_info.Raise("Error checking node %s" % instance.primary_node)
8361         remote_info = remote_info.payload
8362         if remote_info and "state" in remote_info:
8363           remote_state = "up"
8364         else:
8365           remote_state = "down"
8366       else:
8367         remote_state = None
8368       if instance.admin_up:
8369         config_state = "up"
8370       else:
8371         config_state = "down"
8372
8373       disks = [self._ComputeDiskStatus(instance, None, device)
8374                for device in instance.disks]
8375
8376       idict = {
8377         "name": instance.name,
8378         "config_state": config_state,
8379         "run_state": remote_state,
8380         "pnode": instance.primary_node,
8381         "snodes": instance.secondary_nodes,
8382         "os": instance.os,
8383         # this happens to be the same format used for hooks
8384         "nics": _NICListToTuple(self, instance.nics),
8385         "disk_template": instance.disk_template,
8386         "disks": disks,
8387         "hypervisor": instance.hypervisor,
8388         "network_port": instance.network_port,
8389         "hv_instance": instance.hvparams,
8390         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8391         "be_instance": instance.beparams,
8392         "be_actual": cluster.FillBE(instance),
8393         "os_instance": instance.osparams,
8394         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8395         "serial_no": instance.serial_no,
8396         "mtime": instance.mtime,
8397         "ctime": instance.ctime,
8398         "uuid": instance.uuid,
8399         }
8400
8401       result[instance.name] = idict
8402
8403     return result
8404
8405
8406 class LUSetInstanceParams(LogicalUnit):
8407   """Modifies an instances's parameters.
8408
8409   """
8410   HPATH = "instance-modify"
8411   HTYPE = constants.HTYPE_INSTANCE
8412   _OP_REQP = [("instance_name", _TNEString)]
8413   _OP_DEFS = [
8414     ("nics", _EmptyList),
8415     ("disks", _EmptyList),
8416     ("beparams", _EmptyDict),
8417     ("hvparams", _EmptyDict),
8418     ("disk_template", None),
8419     ("remote_node", None),
8420     ("os_name", None),
8421     ("force_variant", False),
8422     ("osparams", None),
8423     ("force", False),
8424     ]
8425   REQ_BGL = False
8426
8427   def CheckArguments(self):
8428     if not (self.op.nics or self.op.disks or self.op.disk_template or
8429             self.op.hvparams or self.op.beparams or self.op.os_name):
8430       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8431
8432     if self.op.hvparams:
8433       _CheckGlobalHvParams(self.op.hvparams)
8434
8435     # Disk validation
8436     disk_addremove = 0
8437     for disk_op, disk_dict in self.op.disks:
8438       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8439       if disk_op == constants.DDM_REMOVE:
8440         disk_addremove += 1
8441         continue
8442       elif disk_op == constants.DDM_ADD:
8443         disk_addremove += 1
8444       else:
8445         if not isinstance(disk_op, int):
8446           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8447         if not isinstance(disk_dict, dict):
8448           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8449           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8450
8451       if disk_op == constants.DDM_ADD:
8452         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8453         if mode not in constants.DISK_ACCESS_SET:
8454           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8455                                      errors.ECODE_INVAL)
8456         size = disk_dict.get('size', None)
8457         if size is None:
8458           raise errors.OpPrereqError("Required disk parameter size missing",
8459                                      errors.ECODE_INVAL)
8460         try:
8461           size = int(size)
8462         except (TypeError, ValueError), err:
8463           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8464                                      str(err), errors.ECODE_INVAL)
8465         disk_dict['size'] = size
8466       else:
8467         # modification of disk
8468         if 'size' in disk_dict:
8469           raise errors.OpPrereqError("Disk size change not possible, use"
8470                                      " grow-disk", errors.ECODE_INVAL)
8471
8472     if disk_addremove > 1:
8473       raise errors.OpPrereqError("Only one disk add or remove operation"
8474                                  " supported at a time", errors.ECODE_INVAL)
8475
8476     if self.op.disks and self.op.disk_template is not None:
8477       raise errors.OpPrereqError("Disk template conversion and other disk"
8478                                  " changes not supported at the same time",
8479                                  errors.ECODE_INVAL)
8480
8481     if self.op.disk_template:
8482       _CheckDiskTemplate(self.op.disk_template)
8483       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8484           self.op.remote_node is None):
8485         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8486                                    " one requires specifying a secondary node",
8487                                    errors.ECODE_INVAL)
8488
8489     # NIC validation
8490     nic_addremove = 0
8491     for nic_op, nic_dict in self.op.nics:
8492       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8493       if nic_op == constants.DDM_REMOVE:
8494         nic_addremove += 1
8495         continue
8496       elif nic_op == constants.DDM_ADD:
8497         nic_addremove += 1
8498       else:
8499         if not isinstance(nic_op, int):
8500           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8501         if not isinstance(nic_dict, dict):
8502           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8503           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8504
8505       # nic_dict should be a dict
8506       nic_ip = nic_dict.get('ip', None)
8507       if nic_ip is not None:
8508         if nic_ip.lower() == constants.VALUE_NONE:
8509           nic_dict['ip'] = None
8510         else:
8511           if not utils.IsValidIP(nic_ip):
8512             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8513                                        errors.ECODE_INVAL)
8514
8515       nic_bridge = nic_dict.get('bridge', None)
8516       nic_link = nic_dict.get('link', None)
8517       if nic_bridge and nic_link:
8518         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8519                                    " at the same time", errors.ECODE_INVAL)
8520       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8521         nic_dict['bridge'] = None
8522       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8523         nic_dict['link'] = None
8524
8525       if nic_op == constants.DDM_ADD:
8526         nic_mac = nic_dict.get('mac', None)
8527         if nic_mac is None:
8528           nic_dict['mac'] = constants.VALUE_AUTO
8529
8530       if 'mac' in nic_dict:
8531         nic_mac = nic_dict['mac']
8532         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8533           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8534
8535         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8536           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8537                                      " modifying an existing nic",
8538                                      errors.ECODE_INVAL)
8539
8540     if nic_addremove > 1:
8541       raise errors.OpPrereqError("Only one NIC add or remove operation"
8542                                  " supported at a time", errors.ECODE_INVAL)
8543
8544   def ExpandNames(self):
8545     self._ExpandAndLockInstance()
8546     self.needed_locks[locking.LEVEL_NODE] = []
8547     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8548
8549   def DeclareLocks(self, level):
8550     if level == locking.LEVEL_NODE:
8551       self._LockInstancesNodes()
8552       if self.op.disk_template and self.op.remote_node:
8553         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8554         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8555
8556   def BuildHooksEnv(self):
8557     """Build hooks env.
8558
8559     This runs on the master, primary and secondaries.
8560
8561     """
8562     args = dict()
8563     if constants.BE_MEMORY in self.be_new:
8564       args['memory'] = self.be_new[constants.BE_MEMORY]
8565     if constants.BE_VCPUS in self.be_new:
8566       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8567     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8568     # information at all.
8569     if self.op.nics:
8570       args['nics'] = []
8571       nic_override = dict(self.op.nics)
8572       for idx, nic in enumerate(self.instance.nics):
8573         if idx in nic_override:
8574           this_nic_override = nic_override[idx]
8575         else:
8576           this_nic_override = {}
8577         if 'ip' in this_nic_override:
8578           ip = this_nic_override['ip']
8579         else:
8580           ip = nic.ip
8581         if 'mac' in this_nic_override:
8582           mac = this_nic_override['mac']
8583         else:
8584           mac = nic.mac
8585         if idx in self.nic_pnew:
8586           nicparams = self.nic_pnew[idx]
8587         else:
8588           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8589         mode = nicparams[constants.NIC_MODE]
8590         link = nicparams[constants.NIC_LINK]
8591         args['nics'].append((ip, mac, mode, link))
8592       if constants.DDM_ADD in nic_override:
8593         ip = nic_override[constants.DDM_ADD].get('ip', None)
8594         mac = nic_override[constants.DDM_ADD]['mac']
8595         nicparams = self.nic_pnew[constants.DDM_ADD]
8596         mode = nicparams[constants.NIC_MODE]
8597         link = nicparams[constants.NIC_LINK]
8598         args['nics'].append((ip, mac, mode, link))
8599       elif constants.DDM_REMOVE in nic_override:
8600         del args['nics'][-1]
8601
8602     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8603     if self.op.disk_template:
8604       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8605     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8606     return env, nl, nl
8607
8608   def CheckPrereq(self):
8609     """Check prerequisites.
8610
8611     This only checks the instance list against the existing names.
8612
8613     """
8614     # checking the new params on the primary/secondary nodes
8615
8616     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8617     cluster = self.cluster = self.cfg.GetClusterInfo()
8618     assert self.instance is not None, \
8619       "Cannot retrieve locked instance %s" % self.op.instance_name
8620     pnode = instance.primary_node
8621     nodelist = list(instance.all_nodes)
8622
8623     # OS change
8624     if self.op.os_name and not self.op.force:
8625       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8626                       self.op.force_variant)
8627       instance_os = self.op.os_name
8628     else:
8629       instance_os = instance.os
8630
8631     if self.op.disk_template:
8632       if instance.disk_template == self.op.disk_template:
8633         raise errors.OpPrereqError("Instance already has disk template %s" %
8634                                    instance.disk_template, errors.ECODE_INVAL)
8635
8636       if (instance.disk_template,
8637           self.op.disk_template) not in self._DISK_CONVERSIONS:
8638         raise errors.OpPrereqError("Unsupported disk template conversion from"
8639                                    " %s to %s" % (instance.disk_template,
8640                                                   self.op.disk_template),
8641                                    errors.ECODE_INVAL)
8642       if self.op.disk_template in constants.DTS_NET_MIRROR:
8643         _CheckNodeOnline(self, self.op.remote_node)
8644         _CheckNodeNotDrained(self, self.op.remote_node)
8645         disks = [{"size": d.size} for d in instance.disks]
8646         required = _ComputeDiskSize(self.op.disk_template, disks)
8647         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8648         _CheckInstanceDown(self, instance, "cannot change disk template")
8649
8650     # hvparams processing
8651     if self.op.hvparams:
8652       hv_type = instance.hypervisor
8653       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8654       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8655       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8656
8657       # local check
8658       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8659       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8660       self.hv_new = hv_new # the new actual values
8661       self.hv_inst = i_hvdict # the new dict (without defaults)
8662     else:
8663       self.hv_new = self.hv_inst = {}
8664
8665     # beparams processing
8666     if self.op.beparams:
8667       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8668                                    use_none=True)
8669       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8670       be_new = cluster.SimpleFillBE(i_bedict)
8671       self.be_new = be_new # the new actual values
8672       self.be_inst = i_bedict # the new dict (without defaults)
8673     else:
8674       self.be_new = self.be_inst = {}
8675
8676     # osparams processing
8677     if self.op.osparams:
8678       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8679       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8680       self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8681       self.os_inst = i_osdict # the new dict (without defaults)
8682     else:
8683       self.os_new = self.os_inst = {}
8684
8685     self.warn = []
8686
8687     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8688       mem_check_list = [pnode]
8689       if be_new[constants.BE_AUTO_BALANCE]:
8690         # either we changed auto_balance to yes or it was from before
8691         mem_check_list.extend(instance.secondary_nodes)
8692       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8693                                                   instance.hypervisor)
8694       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8695                                          instance.hypervisor)
8696       pninfo = nodeinfo[pnode]
8697       msg = pninfo.fail_msg
8698       if msg:
8699         # Assume the primary node is unreachable and go ahead
8700         self.warn.append("Can't get info from primary node %s: %s" %
8701                          (pnode,  msg))
8702       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8703         self.warn.append("Node data from primary node %s doesn't contain"
8704                          " free memory information" % pnode)
8705       elif instance_info.fail_msg:
8706         self.warn.append("Can't get instance runtime information: %s" %
8707                         instance_info.fail_msg)
8708       else:
8709         if instance_info.payload:
8710           current_mem = int(instance_info.payload['memory'])
8711         else:
8712           # Assume instance not running
8713           # (there is a slight race condition here, but it's not very probable,
8714           # and we have no other way to check)
8715           current_mem = 0
8716         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8717                     pninfo.payload['memory_free'])
8718         if miss_mem > 0:
8719           raise errors.OpPrereqError("This change will prevent the instance"
8720                                      " from starting, due to %d MB of memory"
8721                                      " missing on its primary node" % miss_mem,
8722                                      errors.ECODE_NORES)
8723
8724       if be_new[constants.BE_AUTO_BALANCE]:
8725         for node, nres in nodeinfo.items():
8726           if node not in instance.secondary_nodes:
8727             continue
8728           msg = nres.fail_msg
8729           if msg:
8730             self.warn.append("Can't get info from secondary node %s: %s" %
8731                              (node, msg))
8732           elif not isinstance(nres.payload.get('memory_free', None), int):
8733             self.warn.append("Secondary node %s didn't return free"
8734                              " memory information" % node)
8735           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8736             self.warn.append("Not enough memory to failover instance to"
8737                              " secondary node %s" % node)
8738
8739     # NIC processing
8740     self.nic_pnew = {}
8741     self.nic_pinst = {}
8742     for nic_op, nic_dict in self.op.nics:
8743       if nic_op == constants.DDM_REMOVE:
8744         if not instance.nics:
8745           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8746                                      errors.ECODE_INVAL)
8747         continue
8748       if nic_op != constants.DDM_ADD:
8749         # an existing nic
8750         if not instance.nics:
8751           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8752                                      " no NICs" % nic_op,
8753                                      errors.ECODE_INVAL)
8754         if nic_op < 0 or nic_op >= len(instance.nics):
8755           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8756                                      " are 0 to %d" %
8757                                      (nic_op, len(instance.nics) - 1),
8758                                      errors.ECODE_INVAL)
8759         old_nic_params = instance.nics[nic_op].nicparams
8760         old_nic_ip = instance.nics[nic_op].ip
8761       else:
8762         old_nic_params = {}
8763         old_nic_ip = None
8764
8765       update_params_dict = dict([(key, nic_dict[key])
8766                                  for key in constants.NICS_PARAMETERS
8767                                  if key in nic_dict])
8768
8769       if 'bridge' in nic_dict:
8770         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8771
8772       new_nic_params = _GetUpdatedParams(old_nic_params,
8773                                          update_params_dict)
8774       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8775       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8776       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8777       self.nic_pinst[nic_op] = new_nic_params
8778       self.nic_pnew[nic_op] = new_filled_nic_params
8779       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8780
8781       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8782         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8783         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8784         if msg:
8785           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8786           if self.op.force:
8787             self.warn.append(msg)
8788           else:
8789             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8790       if new_nic_mode == constants.NIC_MODE_ROUTED:
8791         if 'ip' in nic_dict:
8792           nic_ip = nic_dict['ip']
8793         else:
8794           nic_ip = old_nic_ip
8795         if nic_ip is None:
8796           raise errors.OpPrereqError('Cannot set the nic ip to None'
8797                                      ' on a routed nic', errors.ECODE_INVAL)
8798       if 'mac' in nic_dict:
8799         nic_mac = nic_dict['mac']
8800         if nic_mac is None:
8801           raise errors.OpPrereqError('Cannot set the nic mac to None',
8802                                      errors.ECODE_INVAL)
8803         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8804           # otherwise generate the mac
8805           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8806         else:
8807           # or validate/reserve the current one
8808           try:
8809             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8810           except errors.ReservationError:
8811             raise errors.OpPrereqError("MAC address %s already in use"
8812                                        " in cluster" % nic_mac,
8813                                        errors.ECODE_NOTUNIQUE)
8814
8815     # DISK processing
8816     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8817       raise errors.OpPrereqError("Disk operations not supported for"
8818                                  " diskless instances",
8819                                  errors.ECODE_INVAL)
8820     for disk_op, _ in self.op.disks:
8821       if disk_op == constants.DDM_REMOVE:
8822         if len(instance.disks) == 1:
8823           raise errors.OpPrereqError("Cannot remove the last disk of"
8824                                      " an instance", errors.ECODE_INVAL)
8825         _CheckInstanceDown(self, instance, "cannot remove disks")
8826
8827       if (disk_op == constants.DDM_ADD and
8828           len(instance.nics) >= constants.MAX_DISKS):
8829         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8830                                    " add more" % constants.MAX_DISKS,
8831                                    errors.ECODE_STATE)
8832       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8833         # an existing disk
8834         if disk_op < 0 or disk_op >= len(instance.disks):
8835           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8836                                      " are 0 to %d" %
8837                                      (disk_op, len(instance.disks)),
8838                                      errors.ECODE_INVAL)
8839
8840     return
8841
8842   def _ConvertPlainToDrbd(self, feedback_fn):
8843     """Converts an instance from plain to drbd.
8844
8845     """
8846     feedback_fn("Converting template to drbd")
8847     instance = self.instance
8848     pnode = instance.primary_node
8849     snode = self.op.remote_node
8850
8851     # create a fake disk info for _GenerateDiskTemplate
8852     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8853     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8854                                       instance.name, pnode, [snode],
8855                                       disk_info, None, None, 0)
8856     info = _GetInstanceInfoText(instance)
8857     feedback_fn("Creating aditional volumes...")
8858     # first, create the missing data and meta devices
8859     for disk in new_disks:
8860       # unfortunately this is... not too nice
8861       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8862                             info, True)
8863       for child in disk.children:
8864         _CreateSingleBlockDev(self, snode, instance, child, info, True)
8865     # at this stage, all new LVs have been created, we can rename the
8866     # old ones
8867     feedback_fn("Renaming original volumes...")
8868     rename_list = [(o, n.children[0].logical_id)
8869                    for (o, n) in zip(instance.disks, new_disks)]
8870     result = self.rpc.call_blockdev_rename(pnode, rename_list)
8871     result.Raise("Failed to rename original LVs")
8872
8873     feedback_fn("Initializing DRBD devices...")
8874     # all child devices are in place, we can now create the DRBD devices
8875     for disk in new_disks:
8876       for node in [pnode, snode]:
8877         f_create = node == pnode
8878         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8879
8880     # at this point, the instance has been modified
8881     instance.disk_template = constants.DT_DRBD8
8882     instance.disks = new_disks
8883     self.cfg.Update(instance, feedback_fn)
8884
8885     # disks are created, waiting for sync
8886     disk_abort = not _WaitForSync(self, instance)
8887     if disk_abort:
8888       raise errors.OpExecError("There are some degraded disks for"
8889                                " this instance, please cleanup manually")
8890
8891   def _ConvertDrbdToPlain(self, feedback_fn):
8892     """Converts an instance from drbd to plain.
8893
8894     """
8895     instance = self.instance
8896     assert len(instance.secondary_nodes) == 1
8897     pnode = instance.primary_node
8898     snode = instance.secondary_nodes[0]
8899     feedback_fn("Converting template to plain")
8900
8901     old_disks = instance.disks
8902     new_disks = [d.children[0] for d in old_disks]
8903
8904     # copy over size and mode
8905     for parent, child in zip(old_disks, new_disks):
8906       child.size = parent.size
8907       child.mode = parent.mode
8908
8909     # update instance structure
8910     instance.disks = new_disks
8911     instance.disk_template = constants.DT_PLAIN
8912     self.cfg.Update(instance, feedback_fn)
8913
8914     feedback_fn("Removing volumes on the secondary node...")
8915     for disk in old_disks:
8916       self.cfg.SetDiskID(disk, snode)
8917       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8918       if msg:
8919         self.LogWarning("Could not remove block device %s on node %s,"
8920                         " continuing anyway: %s", disk.iv_name, snode, msg)
8921
8922     feedback_fn("Removing unneeded volumes on the primary node...")
8923     for idx, disk in enumerate(old_disks):
8924       meta = disk.children[1]
8925       self.cfg.SetDiskID(meta, pnode)
8926       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8927       if msg:
8928         self.LogWarning("Could not remove metadata for disk %d on node %s,"
8929                         " continuing anyway: %s", idx, pnode, msg)
8930
8931
8932   def Exec(self, feedback_fn):
8933     """Modifies an instance.
8934
8935     All parameters take effect only at the next restart of the instance.
8936
8937     """
8938     # Process here the warnings from CheckPrereq, as we don't have a
8939     # feedback_fn there.
8940     for warn in self.warn:
8941       feedback_fn("WARNING: %s" % warn)
8942
8943     result = []
8944     instance = self.instance
8945     # disk changes
8946     for disk_op, disk_dict in self.op.disks:
8947       if disk_op == constants.DDM_REMOVE:
8948         # remove the last disk
8949         device = instance.disks.pop()
8950         device_idx = len(instance.disks)
8951         for node, disk in device.ComputeNodeTree(instance.primary_node):
8952           self.cfg.SetDiskID(disk, node)
8953           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8954           if msg:
8955             self.LogWarning("Could not remove disk/%d on node %s: %s,"
8956                             " continuing anyway", device_idx, node, msg)
8957         result.append(("disk/%d" % device_idx, "remove"))
8958       elif disk_op == constants.DDM_ADD:
8959         # add a new disk
8960         if instance.disk_template == constants.DT_FILE:
8961           file_driver, file_path = instance.disks[0].logical_id
8962           file_path = os.path.dirname(file_path)
8963         else:
8964           file_driver = file_path = None
8965         disk_idx_base = len(instance.disks)
8966         new_disk = _GenerateDiskTemplate(self,
8967                                          instance.disk_template,
8968                                          instance.name, instance.primary_node,
8969                                          instance.secondary_nodes,
8970                                          [disk_dict],
8971                                          file_path,
8972                                          file_driver,
8973                                          disk_idx_base)[0]
8974         instance.disks.append(new_disk)
8975         info = _GetInstanceInfoText(instance)
8976
8977         logging.info("Creating volume %s for instance %s",
8978                      new_disk.iv_name, instance.name)
8979         # Note: this needs to be kept in sync with _CreateDisks
8980         #HARDCODE
8981         for node in instance.all_nodes:
8982           f_create = node == instance.primary_node
8983           try:
8984             _CreateBlockDev(self, node, instance, new_disk,
8985                             f_create, info, f_create)
8986           except errors.OpExecError, err:
8987             self.LogWarning("Failed to create volume %s (%s) on"
8988                             " node %s: %s",
8989                             new_disk.iv_name, new_disk, node, err)
8990         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8991                        (new_disk.size, new_disk.mode)))
8992       else:
8993         # change a given disk
8994         instance.disks[disk_op].mode = disk_dict['mode']
8995         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8996
8997     if self.op.disk_template:
8998       r_shut = _ShutdownInstanceDisks(self, instance)
8999       if not r_shut:
9000         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9001                                  " proceed with disk template conversion")
9002       mode = (instance.disk_template, self.op.disk_template)
9003       try:
9004         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9005       except:
9006         self.cfg.ReleaseDRBDMinors(instance.name)
9007         raise
9008       result.append(("disk_template", self.op.disk_template))
9009
9010     # NIC changes
9011     for nic_op, nic_dict in self.op.nics:
9012       if nic_op == constants.DDM_REMOVE:
9013         # remove the last nic
9014         del instance.nics[-1]
9015         result.append(("nic.%d" % len(instance.nics), "remove"))
9016       elif nic_op == constants.DDM_ADD:
9017         # mac and bridge should be set, by now
9018         mac = nic_dict['mac']
9019         ip = nic_dict.get('ip', None)
9020         nicparams = self.nic_pinst[constants.DDM_ADD]
9021         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9022         instance.nics.append(new_nic)
9023         result.append(("nic.%d" % (len(instance.nics) - 1),
9024                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9025                        (new_nic.mac, new_nic.ip,
9026                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9027                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9028                        )))
9029       else:
9030         for key in 'mac', 'ip':
9031           if key in nic_dict:
9032             setattr(instance.nics[nic_op], key, nic_dict[key])
9033         if nic_op in self.nic_pinst:
9034           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9035         for key, val in nic_dict.iteritems():
9036           result.append(("nic.%s/%d" % (key, nic_op), val))
9037
9038     # hvparams changes
9039     if self.op.hvparams:
9040       instance.hvparams = self.hv_inst
9041       for key, val in self.op.hvparams.iteritems():
9042         result.append(("hv/%s" % key, val))
9043
9044     # beparams changes
9045     if self.op.beparams:
9046       instance.beparams = self.be_inst
9047       for key, val in self.op.beparams.iteritems():
9048         result.append(("be/%s" % key, val))
9049
9050     # OS change
9051     if self.op.os_name:
9052       instance.os = self.op.os_name
9053
9054     # osparams changes
9055     if self.op.osparams:
9056       instance.osparams = self.os_inst
9057       for key, val in self.op.osparams.iteritems():
9058         result.append(("os/%s" % key, val))
9059
9060     self.cfg.Update(instance, feedback_fn)
9061
9062     return result
9063
9064   _DISK_CONVERSIONS = {
9065     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9066     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9067     }
9068
9069
9070 class LUQueryExports(NoHooksLU):
9071   """Query the exports list
9072
9073   """
9074   _OP_REQP = [("nodes", _TListOf(_TNEString))]
9075   REQ_BGL = False
9076
9077   def ExpandNames(self):
9078     self.needed_locks = {}
9079     self.share_locks[locking.LEVEL_NODE] = 1
9080     if not self.op.nodes:
9081       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9082     else:
9083       self.needed_locks[locking.LEVEL_NODE] = \
9084         _GetWantedNodes(self, self.op.nodes)
9085
9086   def Exec(self, feedback_fn):
9087     """Compute the list of all the exported system images.
9088
9089     @rtype: dict
9090     @return: a dictionary with the structure node->(export-list)
9091         where export-list is a list of the instances exported on
9092         that node.
9093
9094     """
9095     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9096     rpcresult = self.rpc.call_export_list(self.nodes)
9097     result = {}
9098     for node in rpcresult:
9099       if rpcresult[node].fail_msg:
9100         result[node] = False
9101       else:
9102         result[node] = rpcresult[node].payload
9103
9104     return result
9105
9106
9107 class LUPrepareExport(NoHooksLU):
9108   """Prepares an instance for an export and returns useful information.
9109
9110   """
9111   _OP_REQP = [
9112     ("instance_name", _TNEString),
9113     ("mode", _TElemOf(constants.EXPORT_MODES)),
9114     ]
9115   REQ_BGL = False
9116
9117   def ExpandNames(self):
9118     self._ExpandAndLockInstance()
9119
9120   def CheckPrereq(self):
9121     """Check prerequisites.
9122
9123     """
9124     instance_name = self.op.instance_name
9125
9126     self.instance = self.cfg.GetInstanceInfo(instance_name)
9127     assert self.instance is not None, \
9128           "Cannot retrieve locked instance %s" % self.op.instance_name
9129     _CheckNodeOnline(self, self.instance.primary_node)
9130
9131     self._cds = _GetClusterDomainSecret()
9132
9133   def Exec(self, feedback_fn):
9134     """Prepares an instance for an export.
9135
9136     """
9137     instance = self.instance
9138
9139     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9140       salt = utils.GenerateSecret(8)
9141
9142       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9143       result = self.rpc.call_x509_cert_create(instance.primary_node,
9144                                               constants.RIE_CERT_VALIDITY)
9145       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9146
9147       (name, cert_pem) = result.payload
9148
9149       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9150                                              cert_pem)
9151
9152       return {
9153         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9154         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9155                           salt),
9156         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9157         }
9158
9159     return None
9160
9161
9162 class LUExportInstance(LogicalUnit):
9163   """Export an instance to an image in the cluster.
9164
9165   """
9166   HPATH = "instance-export"
9167   HTYPE = constants.HTYPE_INSTANCE
9168   _OP_REQP = [
9169     ("instance_name", _TNEString),
9170     ("target_node", _TNEString),
9171     ("shutdown", _TBool),
9172     ("mode", _TElemOf(constants.EXPORT_MODES)),
9173     ]
9174   _OP_DEFS = [
9175     ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT),
9176     ("remove_instance", False),
9177     ("ignore_remove_failures", False),
9178     ("mode", constants.EXPORT_MODE_LOCAL),
9179     ("x509_key_name", None),
9180     ("destination_x509_ca", None),
9181     ]
9182   REQ_BGL = False
9183
9184   def CheckArguments(self):
9185     """Check the arguments.
9186
9187     """
9188     self.x509_key_name = self.op.x509_key_name
9189     self.dest_x509_ca_pem = self.op.destination_x509_ca
9190
9191     if self.op.remove_instance and not self.op.shutdown:
9192       raise errors.OpPrereqError("Can not remove instance without shutting it"
9193                                  " down before")
9194
9195     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9196       if not self.x509_key_name:
9197         raise errors.OpPrereqError("Missing X509 key name for encryption",
9198                                    errors.ECODE_INVAL)
9199
9200       if not self.dest_x509_ca_pem:
9201         raise errors.OpPrereqError("Missing destination X509 CA",
9202                                    errors.ECODE_INVAL)
9203
9204   def ExpandNames(self):
9205     self._ExpandAndLockInstance()
9206
9207     # Lock all nodes for local exports
9208     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9209       # FIXME: lock only instance primary and destination node
9210       #
9211       # Sad but true, for now we have do lock all nodes, as we don't know where
9212       # the previous export might be, and in this LU we search for it and
9213       # remove it from its current node. In the future we could fix this by:
9214       #  - making a tasklet to search (share-lock all), then create the
9215       #    new one, then one to remove, after
9216       #  - removing the removal operation altogether
9217       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9218
9219   def DeclareLocks(self, level):
9220     """Last minute lock declaration."""
9221     # All nodes are locked anyway, so nothing to do here.
9222
9223   def BuildHooksEnv(self):
9224     """Build hooks env.
9225
9226     This will run on the master, primary node and target node.
9227
9228     """
9229     env = {
9230       "EXPORT_MODE": self.op.mode,
9231       "EXPORT_NODE": self.op.target_node,
9232       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9233       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9234       # TODO: Generic function for boolean env variables
9235       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9236       }
9237
9238     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9239
9240     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9241
9242     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9243       nl.append(self.op.target_node)
9244
9245     return env, nl, nl
9246
9247   def CheckPrereq(self):
9248     """Check prerequisites.
9249
9250     This checks that the instance and node names are valid.
9251
9252     """
9253     instance_name = self.op.instance_name
9254
9255     self.instance = self.cfg.GetInstanceInfo(instance_name)
9256     assert self.instance is not None, \
9257           "Cannot retrieve locked instance %s" % self.op.instance_name
9258     _CheckNodeOnline(self, self.instance.primary_node)
9259
9260     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9261       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9262       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9263       assert self.dst_node is not None
9264
9265       _CheckNodeOnline(self, self.dst_node.name)
9266       _CheckNodeNotDrained(self, self.dst_node.name)
9267
9268       self._cds = None
9269       self.dest_disk_info = None
9270       self.dest_x509_ca = None
9271
9272     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9273       self.dst_node = None
9274
9275       if len(self.op.target_node) != len(self.instance.disks):
9276         raise errors.OpPrereqError(("Received destination information for %s"
9277                                     " disks, but instance %s has %s disks") %
9278                                    (len(self.op.target_node), instance_name,
9279                                     len(self.instance.disks)),
9280                                    errors.ECODE_INVAL)
9281
9282       cds = _GetClusterDomainSecret()
9283
9284       # Check X509 key name
9285       try:
9286         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9287       except (TypeError, ValueError), err:
9288         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9289
9290       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9291         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9292                                    errors.ECODE_INVAL)
9293
9294       # Load and verify CA
9295       try:
9296         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9297       except OpenSSL.crypto.Error, err:
9298         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9299                                    (err, ), errors.ECODE_INVAL)
9300
9301       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9302       if errcode is not None:
9303         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9304                                    (msg, ), errors.ECODE_INVAL)
9305
9306       self.dest_x509_ca = cert
9307
9308       # Verify target information
9309       disk_info = []
9310       for idx, disk_data in enumerate(self.op.target_node):
9311         try:
9312           (host, port, magic) = \
9313             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9314         except errors.GenericError, err:
9315           raise errors.OpPrereqError("Target info for disk %s: %s" %
9316                                      (idx, err), errors.ECODE_INVAL)
9317
9318         disk_info.append((host, port, magic))
9319
9320       assert len(disk_info) == len(self.op.target_node)
9321       self.dest_disk_info = disk_info
9322
9323     else:
9324       raise errors.ProgrammerError("Unhandled export mode %r" %
9325                                    self.op.mode)
9326
9327     # instance disk type verification
9328     # TODO: Implement export support for file-based disks
9329     for disk in self.instance.disks:
9330       if disk.dev_type == constants.LD_FILE:
9331         raise errors.OpPrereqError("Export not supported for instances with"
9332                                    " file-based disks", errors.ECODE_INVAL)
9333
9334   def _CleanupExports(self, feedback_fn):
9335     """Removes exports of current instance from all other nodes.
9336
9337     If an instance in a cluster with nodes A..D was exported to node C, its
9338     exports will be removed from the nodes A, B and D.
9339
9340     """
9341     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9342
9343     nodelist = self.cfg.GetNodeList()
9344     nodelist.remove(self.dst_node.name)
9345
9346     # on one-node clusters nodelist will be empty after the removal
9347     # if we proceed the backup would be removed because OpQueryExports
9348     # substitutes an empty list with the full cluster node list.
9349     iname = self.instance.name
9350     if nodelist:
9351       feedback_fn("Removing old exports for instance %s" % iname)
9352       exportlist = self.rpc.call_export_list(nodelist)
9353       for node in exportlist:
9354         if exportlist[node].fail_msg:
9355           continue
9356         if iname in exportlist[node].payload:
9357           msg = self.rpc.call_export_remove(node, iname).fail_msg
9358           if msg:
9359             self.LogWarning("Could not remove older export for instance %s"
9360                             " on node %s: %s", iname, node, msg)
9361
9362   def Exec(self, feedback_fn):
9363     """Export an instance to an image in the cluster.
9364
9365     """
9366     assert self.op.mode in constants.EXPORT_MODES
9367
9368     instance = self.instance
9369     src_node = instance.primary_node
9370
9371     if self.op.shutdown:
9372       # shutdown the instance, but not the disks
9373       feedback_fn("Shutting down instance %s" % instance.name)
9374       result = self.rpc.call_instance_shutdown(src_node, instance,
9375                                                self.op.shutdown_timeout)
9376       # TODO: Maybe ignore failures if ignore_remove_failures is set
9377       result.Raise("Could not shutdown instance %s on"
9378                    " node %s" % (instance.name, src_node))
9379
9380     # set the disks ID correctly since call_instance_start needs the
9381     # correct drbd minor to create the symlinks
9382     for disk in instance.disks:
9383       self.cfg.SetDiskID(disk, src_node)
9384
9385     activate_disks = (not instance.admin_up)
9386
9387     if activate_disks:
9388       # Activate the instance disks if we'exporting a stopped instance
9389       feedback_fn("Activating disks for %s" % instance.name)
9390       _StartInstanceDisks(self, instance, None)
9391
9392     try:
9393       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9394                                                      instance)
9395
9396       helper.CreateSnapshots()
9397       try:
9398         if (self.op.shutdown and instance.admin_up and
9399             not self.op.remove_instance):
9400           assert not activate_disks
9401           feedback_fn("Starting instance %s" % instance.name)
9402           result = self.rpc.call_instance_start(src_node, instance, None, None)
9403           msg = result.fail_msg
9404           if msg:
9405             feedback_fn("Failed to start instance: %s" % msg)
9406             _ShutdownInstanceDisks(self, instance)
9407             raise errors.OpExecError("Could not start instance: %s" % msg)
9408
9409         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9410           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9411         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9412           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9413           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9414
9415           (key_name, _, _) = self.x509_key_name
9416
9417           dest_ca_pem = \
9418             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9419                                             self.dest_x509_ca)
9420
9421           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9422                                                      key_name, dest_ca_pem,
9423                                                      timeouts)
9424       finally:
9425         helper.Cleanup()
9426
9427       # Check for backwards compatibility
9428       assert len(dresults) == len(instance.disks)
9429       assert compat.all(isinstance(i, bool) for i in dresults), \
9430              "Not all results are boolean: %r" % dresults
9431
9432     finally:
9433       if activate_disks:
9434         feedback_fn("Deactivating disks for %s" % instance.name)
9435         _ShutdownInstanceDisks(self, instance)
9436
9437     # Remove instance if requested
9438     if self.op.remove_instance:
9439       if not (compat.all(dresults) and fin_resu):
9440         feedback_fn("Not removing instance %s as parts of the export failed" %
9441                     instance.name)
9442       else:
9443         feedback_fn("Removing instance %s" % instance.name)
9444         _RemoveInstance(self, feedback_fn, instance,
9445                         self.op.ignore_remove_failures)
9446
9447     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9448       self._CleanupExports(feedback_fn)
9449
9450     return fin_resu, dresults
9451
9452
9453 class LURemoveExport(NoHooksLU):
9454   """Remove exports related to the named instance.
9455
9456   """
9457   _OP_REQP = [("instance_name", _TNEString)]
9458   REQ_BGL = False
9459
9460   def ExpandNames(self):
9461     self.needed_locks = {}
9462     # We need all nodes to be locked in order for RemoveExport to work, but we
9463     # don't need to lock the instance itself, as nothing will happen to it (and
9464     # we can remove exports also for a removed instance)
9465     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9466
9467   def Exec(self, feedback_fn):
9468     """Remove any export.
9469
9470     """
9471     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9472     # If the instance was not found we'll try with the name that was passed in.
9473     # This will only work if it was an FQDN, though.
9474     fqdn_warn = False
9475     if not instance_name:
9476       fqdn_warn = True
9477       instance_name = self.op.instance_name
9478
9479     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9480     exportlist = self.rpc.call_export_list(locked_nodes)
9481     found = False
9482     for node in exportlist:
9483       msg = exportlist[node].fail_msg
9484       if msg:
9485         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9486         continue
9487       if instance_name in exportlist[node].payload:
9488         found = True
9489         result = self.rpc.call_export_remove(node, instance_name)
9490         msg = result.fail_msg
9491         if msg:
9492           logging.error("Could not remove export for instance %s"
9493                         " on node %s: %s", instance_name, node, msg)
9494
9495     if fqdn_warn and not found:
9496       feedback_fn("Export not found. If trying to remove an export belonging"
9497                   " to a deleted instance please use its Fully Qualified"
9498                   " Domain Name.")
9499
9500
9501 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9502   """Generic tags LU.
9503
9504   This is an abstract class which is the parent of all the other tags LUs.
9505
9506   """
9507
9508   def ExpandNames(self):
9509     self.needed_locks = {}
9510     if self.op.kind == constants.TAG_NODE:
9511       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9512       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9513     elif self.op.kind == constants.TAG_INSTANCE:
9514       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9515       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9516
9517   def CheckPrereq(self):
9518     """Check prerequisites.
9519
9520     """
9521     if self.op.kind == constants.TAG_CLUSTER:
9522       self.target = self.cfg.GetClusterInfo()
9523     elif self.op.kind == constants.TAG_NODE:
9524       self.target = self.cfg.GetNodeInfo(self.op.name)
9525     elif self.op.kind == constants.TAG_INSTANCE:
9526       self.target = self.cfg.GetInstanceInfo(self.op.name)
9527     else:
9528       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9529                                  str(self.op.kind), errors.ECODE_INVAL)
9530
9531
9532 class LUGetTags(TagsLU):
9533   """Returns the tags of a given object.
9534
9535   """
9536   _OP_REQP = [
9537     ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9538     ("name", _TNEString),
9539     ]
9540   REQ_BGL = False
9541
9542   def Exec(self, feedback_fn):
9543     """Returns the tag list.
9544
9545     """
9546     return list(self.target.GetTags())
9547
9548
9549 class LUSearchTags(NoHooksLU):
9550   """Searches the tags for a given pattern.
9551
9552   """
9553   _OP_REQP = [("pattern", _TNEString)]
9554   REQ_BGL = False
9555
9556   def ExpandNames(self):
9557     self.needed_locks = {}
9558
9559   def CheckPrereq(self):
9560     """Check prerequisites.
9561
9562     This checks the pattern passed for validity by compiling it.
9563
9564     """
9565     try:
9566       self.re = re.compile(self.op.pattern)
9567     except re.error, err:
9568       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9569                                  (self.op.pattern, err), errors.ECODE_INVAL)
9570
9571   def Exec(self, feedback_fn):
9572     """Returns the tag list.
9573
9574     """
9575     cfg = self.cfg
9576     tgts = [("/cluster", cfg.GetClusterInfo())]
9577     ilist = cfg.GetAllInstancesInfo().values()
9578     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9579     nlist = cfg.GetAllNodesInfo().values()
9580     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9581     results = []
9582     for path, target in tgts:
9583       for tag in target.GetTags():
9584         if self.re.search(tag):
9585           results.append((path, tag))
9586     return results
9587
9588
9589 class LUAddTags(TagsLU):
9590   """Sets a tag on a given object.
9591
9592   """
9593   _OP_REQP = [
9594     ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9595     ("name", _TNEString),
9596     ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9597     ]
9598   REQ_BGL = False
9599
9600   def CheckPrereq(self):
9601     """Check prerequisites.
9602
9603     This checks the type and length of the tag name and value.
9604
9605     """
9606     TagsLU.CheckPrereq(self)
9607     for tag in self.op.tags:
9608       objects.TaggableObject.ValidateTag(tag)
9609
9610   def Exec(self, feedback_fn):
9611     """Sets the tag.
9612
9613     """
9614     try:
9615       for tag in self.op.tags:
9616         self.target.AddTag(tag)
9617     except errors.TagError, err:
9618       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9619     self.cfg.Update(self.target, feedback_fn)
9620
9621
9622 class LUDelTags(TagsLU):
9623   """Delete a list of tags from a given object.
9624
9625   """
9626   _OP_REQP = [
9627     ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9628     ("name", _TNEString),
9629     ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9630     ]
9631   REQ_BGL = False
9632
9633   def CheckPrereq(self):
9634     """Check prerequisites.
9635
9636     This checks that we have the given tag.
9637
9638     """
9639     TagsLU.CheckPrereq(self)
9640     for tag in self.op.tags:
9641       objects.TaggableObject.ValidateTag(tag)
9642     del_tags = frozenset(self.op.tags)
9643     cur_tags = self.target.GetTags()
9644     if not del_tags <= cur_tags:
9645       diff_tags = del_tags - cur_tags
9646       diff_names = ["'%s'" % tag for tag in diff_tags]
9647       diff_names.sort()
9648       raise errors.OpPrereqError("Tag(s) %s not found" %
9649                                  (",".join(diff_names)), errors.ECODE_NOENT)
9650
9651   def Exec(self, feedback_fn):
9652     """Remove the tag from the object.
9653
9654     """
9655     for tag in self.op.tags:
9656       self.target.RemoveTag(tag)
9657     self.cfg.Update(self.target, feedback_fn)
9658
9659
9660 class LUTestDelay(NoHooksLU):
9661   """Sleep for a specified amount of time.
9662
9663   This LU sleeps on the master and/or nodes for a specified amount of
9664   time.
9665
9666   """
9667   _OP_REQP = [
9668     ("duration", _TFloat),
9669     ("on_master", _TBool),
9670     ("on_nodes", _TListOf(_TNEString)),
9671     ]
9672   REQ_BGL = False
9673
9674   def CheckArguments(self):
9675     # TODO: convert to the type system
9676     self.op.repeat = getattr(self.op, "repeat", 0)
9677     if self.op.repeat < 0:
9678       raise errors.OpPrereqError("Repetition count cannot be negative")
9679
9680   def ExpandNames(self):
9681     """Expand names and set required locks.
9682
9683     This expands the node list, if any.
9684
9685     """
9686     self.needed_locks = {}
9687     if self.op.on_nodes:
9688       # _GetWantedNodes can be used here, but is not always appropriate to use
9689       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9690       # more information.
9691       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9692       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9693
9694   def _TestDelay(self):
9695     """Do the actual sleep.
9696
9697     """
9698     if self.op.on_master:
9699       if not utils.TestDelay(self.op.duration):
9700         raise errors.OpExecError("Error during master delay test")
9701     if self.op.on_nodes:
9702       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9703       for node, node_result in result.items():
9704         node_result.Raise("Failure during rpc call to node %s" % node)
9705
9706   def Exec(self, feedback_fn):
9707     """Execute the test delay opcode, with the wanted repetitions.
9708
9709     """
9710     if self.op.repeat == 0:
9711       self._TestDelay()
9712     else:
9713       top_value = self.op.repeat - 1
9714       for i in range(self.op.repeat):
9715         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9716         self._TestDelay()
9717
9718
9719 class IAllocator(object):
9720   """IAllocator framework.
9721
9722   An IAllocator instance has three sets of attributes:
9723     - cfg that is needed to query the cluster
9724     - input data (all members of the _KEYS class attribute are required)
9725     - four buffer attributes (in|out_data|text), that represent the
9726       input (to the external script) in text and data structure format,
9727       and the output from it, again in two formats
9728     - the result variables from the script (success, info, nodes) for
9729       easy usage
9730
9731   """
9732   # pylint: disable-msg=R0902
9733   # lots of instance attributes
9734   _ALLO_KEYS = [
9735     "name", "mem_size", "disks", "disk_template",
9736     "os", "tags", "nics", "vcpus", "hypervisor",
9737     ]
9738   _RELO_KEYS = [
9739     "name", "relocate_from",
9740     ]
9741   _EVAC_KEYS = [
9742     "evac_nodes",
9743     ]
9744
9745   def __init__(self, cfg, rpc, mode, **kwargs):
9746     self.cfg = cfg
9747     self.rpc = rpc
9748     # init buffer variables
9749     self.in_text = self.out_text = self.in_data = self.out_data = None
9750     # init all input fields so that pylint is happy
9751     self.mode = mode
9752     self.mem_size = self.disks = self.disk_template = None
9753     self.os = self.tags = self.nics = self.vcpus = None
9754     self.hypervisor = None
9755     self.relocate_from = None
9756     self.name = None
9757     self.evac_nodes = None
9758     # computed fields
9759     self.required_nodes = None
9760     # init result fields
9761     self.success = self.info = self.result = None
9762     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9763       keyset = self._ALLO_KEYS
9764       fn = self._AddNewInstance
9765     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9766       keyset = self._RELO_KEYS
9767       fn = self._AddRelocateInstance
9768     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9769       keyset = self._EVAC_KEYS
9770       fn = self._AddEvacuateNodes
9771     else:
9772       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9773                                    " IAllocator" % self.mode)
9774     for key in kwargs:
9775       if key not in keyset:
9776         raise errors.ProgrammerError("Invalid input parameter '%s' to"
9777                                      " IAllocator" % key)
9778       setattr(self, key, kwargs[key])
9779
9780     for key in keyset:
9781       if key not in kwargs:
9782         raise errors.ProgrammerError("Missing input parameter '%s' to"
9783                                      " IAllocator" % key)
9784     self._BuildInputData(fn)
9785
9786   def _ComputeClusterData(self):
9787     """Compute the generic allocator input data.
9788
9789     This is the data that is independent of the actual operation.
9790
9791     """
9792     cfg = self.cfg
9793     cluster_info = cfg.GetClusterInfo()
9794     # cluster data
9795     data = {
9796       "version": constants.IALLOCATOR_VERSION,
9797       "cluster_name": cfg.GetClusterName(),
9798       "cluster_tags": list(cluster_info.GetTags()),
9799       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9800       # we don't have job IDs
9801       }
9802     iinfo = cfg.GetAllInstancesInfo().values()
9803     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9804
9805     # node data
9806     node_results = {}
9807     node_list = cfg.GetNodeList()
9808
9809     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9810       hypervisor_name = self.hypervisor
9811     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9812       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9813     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9814       hypervisor_name = cluster_info.enabled_hypervisors[0]
9815
9816     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9817                                         hypervisor_name)
9818     node_iinfo = \
9819       self.rpc.call_all_instances_info(node_list,
9820                                        cluster_info.enabled_hypervisors)
9821     for nname, nresult in node_data.items():
9822       # first fill in static (config-based) values
9823       ninfo = cfg.GetNodeInfo(nname)
9824       pnr = {
9825         "tags": list(ninfo.GetTags()),
9826         "primary_ip": ninfo.primary_ip,
9827         "secondary_ip": ninfo.secondary_ip,
9828         "offline": ninfo.offline,
9829         "drained": ninfo.drained,
9830         "master_candidate": ninfo.master_candidate,
9831         }
9832
9833       if not (ninfo.offline or ninfo.drained):
9834         nresult.Raise("Can't get data for node %s" % nname)
9835         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9836                                 nname)
9837         remote_info = nresult.payload
9838
9839         for attr in ['memory_total', 'memory_free', 'memory_dom0',
9840                      'vg_size', 'vg_free', 'cpu_total']:
9841           if attr not in remote_info:
9842             raise errors.OpExecError("Node '%s' didn't return attribute"
9843                                      " '%s'" % (nname, attr))
9844           if not isinstance(remote_info[attr], int):
9845             raise errors.OpExecError("Node '%s' returned invalid value"
9846                                      " for '%s': %s" %
9847                                      (nname, attr, remote_info[attr]))
9848         # compute memory used by primary instances
9849         i_p_mem = i_p_up_mem = 0
9850         for iinfo, beinfo in i_list:
9851           if iinfo.primary_node == nname:
9852             i_p_mem += beinfo[constants.BE_MEMORY]
9853             if iinfo.name not in node_iinfo[nname].payload:
9854               i_used_mem = 0
9855             else:
9856               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9857             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9858             remote_info['memory_free'] -= max(0, i_mem_diff)
9859
9860             if iinfo.admin_up:
9861               i_p_up_mem += beinfo[constants.BE_MEMORY]
9862
9863         # compute memory used by instances
9864         pnr_dyn = {
9865           "total_memory": remote_info['memory_total'],
9866           "reserved_memory": remote_info['memory_dom0'],
9867           "free_memory": remote_info['memory_free'],
9868           "total_disk": remote_info['vg_size'],
9869           "free_disk": remote_info['vg_free'],
9870           "total_cpus": remote_info['cpu_total'],
9871           "i_pri_memory": i_p_mem,
9872           "i_pri_up_memory": i_p_up_mem,
9873           }
9874         pnr.update(pnr_dyn)
9875
9876       node_results[nname] = pnr
9877     data["nodes"] = node_results
9878
9879     # instance data
9880     instance_data = {}
9881     for iinfo, beinfo in i_list:
9882       nic_data = []
9883       for nic in iinfo.nics:
9884         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9885         nic_dict = {"mac": nic.mac,
9886                     "ip": nic.ip,
9887                     "mode": filled_params[constants.NIC_MODE],
9888                     "link": filled_params[constants.NIC_LINK],
9889                    }
9890         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9891           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9892         nic_data.append(nic_dict)
9893       pir = {
9894         "tags": list(iinfo.GetTags()),
9895         "admin_up": iinfo.admin_up,
9896         "vcpus": beinfo[constants.BE_VCPUS],
9897         "memory": beinfo[constants.BE_MEMORY],
9898         "os": iinfo.os,
9899         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9900         "nics": nic_data,
9901         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9902         "disk_template": iinfo.disk_template,
9903         "hypervisor": iinfo.hypervisor,
9904         }
9905       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9906                                                  pir["disks"])
9907       instance_data[iinfo.name] = pir
9908
9909     data["instances"] = instance_data
9910
9911     self.in_data = data
9912
9913   def _AddNewInstance(self):
9914     """Add new instance data to allocator structure.
9915
9916     This in combination with _AllocatorGetClusterData will create the
9917     correct structure needed as input for the allocator.
9918
9919     The checks for the completeness of the opcode must have already been
9920     done.
9921
9922     """
9923     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9924
9925     if self.disk_template in constants.DTS_NET_MIRROR:
9926       self.required_nodes = 2
9927     else:
9928       self.required_nodes = 1
9929     request = {
9930       "name": self.name,
9931       "disk_template": self.disk_template,
9932       "tags": self.tags,
9933       "os": self.os,
9934       "vcpus": self.vcpus,
9935       "memory": self.mem_size,
9936       "disks": self.disks,
9937       "disk_space_total": disk_space,
9938       "nics": self.nics,
9939       "required_nodes": self.required_nodes,
9940       }
9941     return request
9942
9943   def _AddRelocateInstance(self):
9944     """Add relocate instance data to allocator structure.
9945
9946     This in combination with _IAllocatorGetClusterData will create the
9947     correct structure needed as input for the allocator.
9948
9949     The checks for the completeness of the opcode must have already been
9950     done.
9951
9952     """
9953     instance = self.cfg.GetInstanceInfo(self.name)
9954     if instance is None:
9955       raise errors.ProgrammerError("Unknown instance '%s' passed to"
9956                                    " IAllocator" % self.name)
9957
9958     if instance.disk_template not in constants.DTS_NET_MIRROR:
9959       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9960                                  errors.ECODE_INVAL)
9961
9962     if len(instance.secondary_nodes) != 1:
9963       raise errors.OpPrereqError("Instance has not exactly one secondary node",
9964                                  errors.ECODE_STATE)
9965
9966     self.required_nodes = 1
9967     disk_sizes = [{'size': disk.size} for disk in instance.disks]
9968     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9969
9970     request = {
9971       "name": self.name,
9972       "disk_space_total": disk_space,
9973       "required_nodes": self.required_nodes,
9974       "relocate_from": self.relocate_from,
9975       }
9976     return request
9977
9978   def _AddEvacuateNodes(self):
9979     """Add evacuate nodes data to allocator structure.
9980
9981     """
9982     request = {
9983       "evac_nodes": self.evac_nodes
9984       }
9985     return request
9986
9987   def _BuildInputData(self, fn):
9988     """Build input data structures.
9989
9990     """
9991     self._ComputeClusterData()
9992
9993     request = fn()
9994     request["type"] = self.mode
9995     self.in_data["request"] = request
9996
9997     self.in_text = serializer.Dump(self.in_data)
9998
9999   def Run(self, name, validate=True, call_fn=None):
10000     """Run an instance allocator and return the results.
10001
10002     """
10003     if call_fn is None:
10004       call_fn = self.rpc.call_iallocator_runner
10005
10006     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10007     result.Raise("Failure while running the iallocator script")
10008
10009     self.out_text = result.payload
10010     if validate:
10011       self._ValidateResult()
10012
10013   def _ValidateResult(self):
10014     """Process the allocator results.
10015
10016     This will process and if successful save the result in
10017     self.out_data and the other parameters.
10018
10019     """
10020     try:
10021       rdict = serializer.Load(self.out_text)
10022     except Exception, err:
10023       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10024
10025     if not isinstance(rdict, dict):
10026       raise errors.OpExecError("Can't parse iallocator results: not a dict")
10027
10028     # TODO: remove backwards compatiblity in later versions
10029     if "nodes" in rdict and "result" not in rdict:
10030       rdict["result"] = rdict["nodes"]
10031       del rdict["nodes"]
10032
10033     for key in "success", "info", "result":
10034       if key not in rdict:
10035         raise errors.OpExecError("Can't parse iallocator results:"
10036                                  " missing key '%s'" % key)
10037       setattr(self, key, rdict[key])
10038
10039     if not isinstance(rdict["result"], list):
10040       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10041                                " is not a list")
10042     self.out_data = rdict
10043
10044
10045 class LUTestAllocator(NoHooksLU):
10046   """Run allocator tests.
10047
10048   This LU runs the allocator tests
10049
10050   """
10051   _OP_REQP = [
10052     ("direction", _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10053     ("mode", _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10054     ("name", _TNEString),
10055     ("nics", _TOr(_TNone, _TListOf(
10056       _TDictOf(_TElemOf(["mac", "ip", "bridge"]), _TNEString)))),
10057     ("disks", _TOr(_TNone, _TList)),
10058     ]
10059   _OP_DEFS = [
10060     ("hypervisor", None),
10061     ("allocator", None),
10062     ("nics", None),
10063     ("disks", None),
10064     ]
10065
10066   def CheckPrereq(self):
10067     """Check prerequisites.
10068
10069     This checks the opcode parameters depending on the director and mode test.
10070
10071     """
10072     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10073       for attr in ["mem_size", "disks", "disk_template",
10074                    "os", "tags", "nics", "vcpus"]:
10075         if not hasattr(self.op, attr):
10076           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10077                                      attr, errors.ECODE_INVAL)
10078       iname = self.cfg.ExpandInstanceName(self.op.name)
10079       if iname is not None:
10080         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10081                                    iname, errors.ECODE_EXISTS)
10082       if not isinstance(self.op.nics, list):
10083         raise errors.OpPrereqError("Invalid parameter 'nics'",
10084                                    errors.ECODE_INVAL)
10085       if not isinstance(self.op.disks, list):
10086         raise errors.OpPrereqError("Invalid parameter 'disks'",
10087                                    errors.ECODE_INVAL)
10088       for row in self.op.disks:
10089         if (not isinstance(row, dict) or
10090             "size" not in row or
10091             not isinstance(row["size"], int) or
10092             "mode" not in row or
10093             row["mode"] not in ['r', 'w']):
10094           raise errors.OpPrereqError("Invalid contents of the 'disks'"
10095                                      " parameter", errors.ECODE_INVAL)
10096       if self.op.hypervisor is None:
10097         self.op.hypervisor = self.cfg.GetHypervisorType()
10098     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10099       fname = _ExpandInstanceName(self.cfg, self.op.name)
10100       self.op.name = fname
10101       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10102     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10103       if not hasattr(self.op, "evac_nodes"):
10104         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10105                                    " opcode input", errors.ECODE_INVAL)
10106     else:
10107       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10108                                  self.op.mode, errors.ECODE_INVAL)
10109
10110     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10111       if self.op.allocator is None:
10112         raise errors.OpPrereqError("Missing allocator name",
10113                                    errors.ECODE_INVAL)
10114     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10115       raise errors.OpPrereqError("Wrong allocator test '%s'" %
10116                                  self.op.direction, errors.ECODE_INVAL)
10117
10118   def Exec(self, feedback_fn):
10119     """Run the allocator test.
10120
10121     """
10122     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10123       ial = IAllocator(self.cfg, self.rpc,
10124                        mode=self.op.mode,
10125                        name=self.op.name,
10126                        mem_size=self.op.mem_size,
10127                        disks=self.op.disks,
10128                        disk_template=self.op.disk_template,
10129                        os=self.op.os,
10130                        tags=self.op.tags,
10131                        nics=self.op.nics,
10132                        vcpus=self.op.vcpus,
10133                        hypervisor=self.op.hypervisor,
10134                        )
10135     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10136       ial = IAllocator(self.cfg, self.rpc,
10137                        mode=self.op.mode,
10138                        name=self.op.name,
10139                        relocate_from=list(self.relocate_from),
10140                        )
10141     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10142       ial = IAllocator(self.cfg, self.rpc,
10143                        mode=self.op.mode,
10144                        evac_nodes=self.op.evac_nodes)
10145     else:
10146       raise errors.ProgrammerError("Uncatched mode %s in"
10147                                    " LUTestAllocator.Exec", self.op.mode)
10148
10149     if self.op.direction == constants.IALLOCATOR_DIR_IN:
10150       result = ial.in_text
10151     else:
10152       ial.Run(self.op.allocator, validate=False)
10153       result = ial.out_text
10154     return result