code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39
  40 from ganeti import ssh
  41 from ganeti import utils
  42 from ganeti import errors
  43 from ganeti import hypervisor
  44 from ganeti import locking
  45 from ganeti import constants
  46 from ganeti import objects
  47 from ganeti import serializer
  48 from ganeti import ssconf
  49 from ganeti import uidpool
  50 from ganeti import compat
  51 from ganeti import masterd
  52
  53 import ganeti.masterd.instance # pylint: disable-msg=W0611
  54
  55
  56 # Modifiable default values; need to define these here before the
  57 # actual LUs
  58
  59 def _EmptyList():
  60   """Returns an empty list.
  61
  62   """
  63   return []
  64
  65
  66 def _EmptyDict():
  67   """Returns an empty dict.
  68
  69   """
  70   return {}
  71
  72
  73 # Some basic types
  74 def _TNotNone(val):
  75   """Checks if the given value is not None.
  76
  77   """
  78   return val is not None
  79
  80
  81 def _TNone(val):
  82   """Checks if the given value is None.
  83
  84   """
  85   return val is None
  86
  87
  88 def _TBool(val):
  89   """Checks if the given value is a boolean.
  90
  91   """
  92   return isinstance(val, bool)
  93
  94
  95 def _TInt(val):
  96   """Checks if the given value is an integer.
  97
  98   """
  99   return isinstance(val, int)
 100
 101
 102 def _TFloat(val):
 103   """Checks if the given value is a float.
 104
 105   """
 106   return isinstance(val, float)
 107
 108
 109 def _TString(val):
 110   """Checks if the given value is a string.
 111
 112   """
 113   return isinstance(val, basestring)
 114
 115
 116 def _TTrue(val):
 117   """Checks if a given value evaluates to a boolean True value.
 118
 119   """
 120   return bool(val)
 121
 122
 123 def _TElemOf(target_list):
 124   """Builds a function that checks if a given value is a member of a list.
 125
 126   """
 127   return lambda val: val in target_list
 128
 129
 130 # Container types
 131 def _TList(val):
 132   """Checks if the given value is a list.
 133
 134   """
 135   return isinstance(val, list)
 136
 137
 138 def _TDict(val):
 139   """Checks if the given value is a dictionary.
 140
 141   """
 142   return isinstance(val, dict)
 143
 144
 145 # Combinator types
 146 def _TAnd(*args):
 147   """Combine multiple functions using an AND operation.
 148
 149   """
 150   def fn(val):
 151     return compat.all(t(val) for t in args)
 152   return fn
 153
 154
 155 def _TOr(*args):
 156   """Combine multiple functions using an AND operation.
 157
 158   """
 159   def fn(val):
 160     return compat.any(t(val) for t in args)
 161   return fn
 162
 163
 164 # Type aliases
 165
 166 # non-empty string
 167 _TNonEmptyString = _TAnd(_TString, _TTrue)
 168
 169
 170 # positive integer
 171 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
 172
 173
 174 def _TListOf(my_type):
 175   """Checks if a given value is a list with all elements of the same type.
 176
 177   """
 178   return _TAnd(_TList,
 179                lambda lst: compat.all(lst, my_type))
 180
 181
 182 def _TDictOf(key_type, val_type):
 183   """Checks a dict type for the type of its key/values.
 184
 185   """
 186   return _TAnd(_TDict,
 187                lambda my_dict: (compat.all(my_dict.keys(), key_type) and
 188                                 compat.all(my_dict.values(), val_type)))
 189
 190
 191 # End types
 192 class LogicalUnit(object):
 193   """Logical Unit base class.
 194
 195   Subclasses must follow these rules:
 196     - implement ExpandNames
 197     - implement CheckPrereq (except when tasklets are used)
 198     - implement Exec (except when tasklets are used)
 199     - implement BuildHooksEnv
 200     - redefine HPATH and HTYPE
 201     - optionally redefine their run requirements:
 202         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 203
 204   Note that all commands require root permissions.
 205
 206   @ivar dry_run_result: the value (if any) that will be returned to the caller
 207       in dry-run mode (signalled by opcode dry_run parameter)
 208   @cvar _OP_DEFS: a list of opcode attributes and the defaults values
 209       they should get if not already existing
 210
 211   """
 212   HPATH = None
 213   HTYPE = None
 214   _OP_REQP = []
 215   _OP_DEFS = []
 216   REQ_BGL = True
 217
 218   def __init__(self, processor, op, context, rpc):
 219     """Constructor for LogicalUnit.
 220
 221     This needs to be overridden in derived classes in order to check op
 222     validity.
 223
 224     """
 225     self.proc = processor
 226     self.op = op
 227     self.cfg = context.cfg
 228     self.context = context
 229     self.rpc = rpc
 230     # Dicts used to declare locking needs to mcpu
 231     self.needed_locks = None
 232     self.acquired_locks = {}
 233     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 234     self.add_locks = {}
 235     self.remove_locks = {}
 236     # Used to force good behavior when calling helper functions
 237     self.recalculate_locks = {}
 238     self.__ssh = None
 239     # logging
 240     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 241     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 242     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 243     # support for dry-run
 244     self.dry_run_result = None
 245     # support for generic debug attribute
 246     if (not hasattr(self.op, "debug_level") or
 247         not isinstance(self.op.debug_level, int)):
 248       self.op.debug_level = 0
 249
 250     # Tasklets
 251     self.tasklets = None
 252
 253     for aname, aval in self._OP_DEFS:
 254       if not hasattr(self.op, aname):
 255         if callable(aval):
 256           dval = aval()
 257         else:
 258           dval = aval
 259         setattr(self.op, aname, dval)
 260
 261     for attr_name, test in self._OP_REQP:
 262       if not hasattr(op, attr_name):
 263         raise errors.OpPrereqError("Required parameter '%s' missing" %
 264                                    attr_name, errors.ECODE_INVAL)
 265       attr_val = getattr(op, attr_name, None)
 266       if not callable(test):
 267         raise errors.ProgrammerError("Validation for parameter '%s' failed,"
 268                                      " given type is not a proper type (%s)" %
 269                                      (attr_name, test))
 270       if not test(attr_val):
 271         raise errors.OpPrereqError("Parameter '%s' has invalid type" %
 272                                    attr_name, errors.ECODE_INVAL)
 273
 274     self.CheckArguments()
 275
 276   def __GetSSH(self):
 277     """Returns the SshRunner object
 278
 279     """
 280     if not self.__ssh:
 281       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 282     return self.__ssh
 283
 284   ssh = property(fget=__GetSSH)
 285
 286   def CheckArguments(self):
 287     """Check syntactic validity for the opcode arguments.
 288
 289     This method is for doing a simple syntactic check and ensure
 290     validity of opcode parameters, without any cluster-related
 291     checks. While the same can be accomplished in ExpandNames and/or
 292     CheckPrereq, doing these separate is better because:
 293
 294       - ExpandNames is left as as purely a lock-related function
 295       - CheckPrereq is run after we have acquired locks (and possible
 296         waited for them)
 297
 298     The function is allowed to change the self.op attribute so that
 299     later methods can no longer worry about missing parameters.
 300
 301     """
 302     pass
 303
 304   def ExpandNames(self):
 305     """Expand names for this LU.
 306
 307     This method is called before starting to execute the opcode, and it should
 308     update all the parameters of the opcode to their canonical form (e.g. a
 309     short node name must be fully expanded after this method has successfully
 310     completed). This way locking, hooks, logging, ecc. can work correctly.
 311
 312     LUs which implement this method must also populate the self.needed_locks
 313     member, as a dict with lock levels as keys, and a list of needed lock names
 314     as values. Rules:
 315
 316       - use an empty dict if you don't need any lock
 317       - if you don't need any lock at a particular level omit that level
 318       - don't put anything for the BGL level
 319       - if you want all locks at a level use locking.ALL_SET as a value
 320
 321     If you need to share locks (rather than acquire them exclusively) at one
 322     level you can modify self.share_locks, setting a true value (usually 1) for
 323     that level. By default locks are not shared.
 324
 325     This function can also define a list of tasklets, which then will be
 326     executed in order instead of the usual LU-level CheckPrereq and Exec
 327     functions, if those are not defined by the LU.
 328
 329     Examples::
 330
 331       # Acquire all nodes and one instance
 332       self.needed_locks = {
 333         locking.LEVEL_NODE: locking.ALL_SET,
 334         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 335       }
 336       # Acquire just two nodes
 337       self.needed_locks = {
 338         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 339       }
 340       # Acquire no locks
 341       self.needed_locks = {} # No, you can't leave it to the default value None
 342
 343     """
 344     # The implementation of this method is mandatory only if the new LU is
 345     # concurrent, so that old LUs don't need to be changed all at the same
 346     # time.
 347     if self.REQ_BGL:
 348       self.needed_locks = {} # Exclusive LUs don't need locks.
 349     else:
 350       raise NotImplementedError
 351
 352   def DeclareLocks(self, level):
 353     """Declare LU locking needs for a level
 354
 355     While most LUs can just declare their locking needs at ExpandNames time,
 356     sometimes there's the need to calculate some locks after having acquired
 357     the ones before. This function is called just before acquiring locks at a
 358     particular level, but after acquiring the ones at lower levels, and permits
 359     such calculations. It can be used to modify self.needed_locks, and by
 360     default it does nothing.
 361
 362     This function is only called if you have something already set in
 363     self.needed_locks for the level.
 364
 365     @param level: Locking level which is going to be locked
 366     @type level: member of ganeti.locking.LEVELS
 367
 368     """
 369
 370   def CheckPrereq(self):
 371     """Check prerequisites for this LU.
 372
 373     This method should check that the prerequisites for the execution
 374     of this LU are fulfilled. It can do internode communication, but
 375     it should be idempotent - no cluster or system changes are
 376     allowed.
 377
 378     The method should raise errors.OpPrereqError in case something is
 379     not fulfilled. Its return value is ignored.
 380
 381     This method should also update all the parameters of the opcode to
 382     their canonical form if it hasn't been done by ExpandNames before.
 383
 384     """
 385     if self.tasklets is not None:
 386       for (idx, tl) in enumerate(self.tasklets):
 387         logging.debug("Checking prerequisites for tasklet %s/%s",
 388                       idx + 1, len(self.tasklets))
 389         tl.CheckPrereq()
 390     else:
 391       pass
 392
 393   def Exec(self, feedback_fn):
 394     """Execute the LU.
 395
 396     This method should implement the actual work. It should raise
 397     errors.OpExecError for failures that are somewhat dealt with in
 398     code, or expected.
 399
 400     """
 401     if self.tasklets is not None:
 402       for (idx, tl) in enumerate(self.tasklets):
 403         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 404         tl.Exec(feedback_fn)
 405     else:
 406       raise NotImplementedError
 407
 408   def BuildHooksEnv(self):
 409     """Build hooks environment for this LU.
 410
 411     This method should return a three-node tuple consisting of: a dict
 412     containing the environment that will be used for running the
 413     specific hook for this LU, a list of node names on which the hook
 414     should run before the execution, and a list of node names on which
 415     the hook should run after the execution.
 416
 417     The keys of the dict must not have 'GANETI_' prefixed as this will
 418     be handled in the hooks runner. Also note additional keys will be
 419     added by the hooks runner. If the LU doesn't define any
 420     environment, an empty dict (and not None) should be returned.
 421
 422     No nodes should be returned as an empty list (and not None).
 423
 424     Note that if the HPATH for a LU class is None, this function will
 425     not be called.
 426
 427     """
 428     raise NotImplementedError
 429
 430   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 431     """Notify the LU about the results of its hooks.
 432
 433     This method is called every time a hooks phase is executed, and notifies
 434     the Logical Unit about the hooks' result. The LU can then use it to alter
 435     its result based on the hooks.  By default the method does nothing and the
 436     previous result is passed back unchanged but any LU can define it if it
 437     wants to use the local cluster hook-scripts somehow.
 438
 439     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 440         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 441     @param hook_results: the results of the multi-node hooks rpc call
 442     @param feedback_fn: function used send feedback back to the caller
 443     @param lu_result: the previous Exec result this LU had, or None
 444         in the PRE phase
 445     @return: the new Exec result, based on the previous result
 446         and hook results
 447
 448     """
 449     # API must be kept, thus we ignore the unused argument and could
 450     # be a function warnings
 451     # pylint: disable-msg=W0613,R0201
 452     return lu_result
 453
 454   def _ExpandAndLockInstance(self):
 455     """Helper function to expand and lock an instance.
 456
 457     Many LUs that work on an instance take its name in self.op.instance_name
 458     and need to expand it and then declare the expanded name for locking. This
 459     function does it, and then updates self.op.instance_name to the expanded
 460     name. It also initializes needed_locks as a dict, if this hasn't been done
 461     before.
 462
 463     """
 464     if self.needed_locks is None:
 465       self.needed_locks = {}
 466     else:
 467       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 468         "_ExpandAndLockInstance called with instance-level locks set"
 469     self.op.instance_name = _ExpandInstanceName(self.cfg,
 470                                                 self.op.instance_name)
 471     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 472
 473   def _LockInstancesNodes(self, primary_only=False):
 474     """Helper function to declare instances' nodes for locking.
 475
 476     This function should be called after locking one or more instances to lock
 477     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 478     with all primary or secondary nodes for instances already locked and
 479     present in self.needed_locks[locking.LEVEL_INSTANCE].
 480
 481     It should be called from DeclareLocks, and for safety only works if
 482     self.recalculate_locks[locking.LEVEL_NODE] is set.
 483
 484     In the future it may grow parameters to just lock some instance's nodes, or
 485     to just lock primaries or secondary nodes, if needed.
 486
 487     If should be called in DeclareLocks in a way similar to::
 488
 489       if level == locking.LEVEL_NODE:
 490         self._LockInstancesNodes()
 491
 492     @type primary_only: boolean
 493     @param primary_only: only lock primary nodes of locked instances
 494
 495     """
 496     assert locking.LEVEL_NODE in self.recalculate_locks, \
 497       "_LockInstancesNodes helper function called with no nodes to recalculate"
 498
 499     # TODO: check if we're really been called with the instance locks held
 500
 501     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 502     # future we might want to have different behaviors depending on the value
 503     # of self.recalculate_locks[locking.LEVEL_NODE]
 504     wanted_nodes = []
 505     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 506       instance = self.context.cfg.GetInstanceInfo(instance_name)
 507       wanted_nodes.append(instance.primary_node)
 508       if not primary_only:
 509         wanted_nodes.extend(instance.secondary_nodes)
 510
 511     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 512       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 513     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 514       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 515
 516     del self.recalculate_locks[locking.LEVEL_NODE]
 517
 518
 519 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 520   """Simple LU which runs no hooks.
 521
 522   This LU is intended as a parent for other LogicalUnits which will
 523   run no hooks, in order to reduce duplicate code.
 524
 525   """
 526   HPATH = None
 527   HTYPE = None
 528
 529   def BuildHooksEnv(self):
 530     """Empty BuildHooksEnv for NoHooksLu.
 531
 532     This just raises an error.
 533
 534     """
 535     assert False, "BuildHooksEnv called for NoHooksLUs"
 536
 537
 538 class Tasklet:
 539   """Tasklet base class.
 540
 541   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 542   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 543   tasklets know nothing about locks.
 544
 545   Subclasses must follow these rules:
 546     - Implement CheckPrereq
 547     - Implement Exec
 548
 549   """
 550   def __init__(self, lu):
 551     self.lu = lu
 552
 553     # Shortcuts
 554     self.cfg = lu.cfg
 555     self.rpc = lu.rpc
 556
 557   def CheckPrereq(self):
 558     """Check prerequisites for this tasklets.
 559
 560     This method should check whether the prerequisites for the execution of
 561     this tasklet are fulfilled. It can do internode communication, but it
 562     should be idempotent - no cluster or system changes are allowed.
 563
 564     The method should raise errors.OpPrereqError in case something is not
 565     fulfilled. Its return value is ignored.
 566
 567     This method should also update all parameters to their canonical form if it
 568     hasn't been done before.
 569
 570     """
 571     pass
 572
 573   def Exec(self, feedback_fn):
 574     """Execute the tasklet.
 575
 576     This method should implement the actual work. It should raise
 577     errors.OpExecError for failures that are somewhat dealt with in code, or
 578     expected.
 579
 580     """
 581     raise NotImplementedError
 582
 583
 584 def _GetWantedNodes(lu, nodes):
 585   """Returns list of checked and expanded node names.
 586
 587   @type lu: L{LogicalUnit}
 588   @param lu: the logical unit on whose behalf we execute
 589   @type nodes: list
 590   @param nodes: list of node names or None for all nodes
 591   @rtype: list
 592   @return: the list of nodes, sorted
 593   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 594
 595   """
 596   if not nodes:
 597     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 598       " non-empty list of nodes whose name is to be expanded.")
 599
 600   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 601   return utils.NiceSort(wanted)
 602
 603
 604 def _GetWantedInstances(lu, instances):
 605   """Returns list of checked and expanded instance names.
 606
 607   @type lu: L{LogicalUnit}
 608   @param lu: the logical unit on whose behalf we execute
 609   @type instances: list
 610   @param instances: list of instance names or None for all instances
 611   @rtype: list
 612   @return: the list of instances, sorted
 613   @raise errors.OpPrereqError: if the instances parameter is wrong type
 614   @raise errors.OpPrereqError: if any of the passed instances is not found
 615
 616   """
 617   if instances:
 618     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 619   else:
 620     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 621   return wanted
 622
 623
 624 def _GetUpdatedParams(old_params, update_dict,
 625                       use_default=True, use_none=False):
 626   """Return the new version of a parameter dictionary.
 627
 628   @type old_params: dict
 629   @param old_params: old parameters
 630   @type update_dict: dict
 631   @param update_dict: dict containing new parameter values, or
 632       constants.VALUE_DEFAULT to reset the parameter to its default
 633       value
 634   @param use_default: boolean
 635   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 636       values as 'to be deleted' values
 637   @param use_none: boolean
 638   @type use_none: whether to recognise C{None} values as 'to be
 639       deleted' values
 640   @rtype: dict
 641   @return: the new parameter dictionary
 642
 643   """
 644   params_copy = copy.deepcopy(old_params)
 645   for key, val in update_dict.iteritems():
 646     if ((use_default and val == constants.VALUE_DEFAULT) or
 647         (use_none and val is None)):
 648       try:
 649         del params_copy[key]
 650       except KeyError:
 651         pass
 652     else:
 653       params_copy[key] = val
 654   return params_copy
 655
 656
 657 def _CheckOutputFields(static, dynamic, selected):
 658   """Checks whether all selected fields are valid.
 659
 660   @type static: L{utils.FieldSet}
 661   @param static: static fields set
 662   @type dynamic: L{utils.FieldSet}
 663   @param dynamic: dynamic fields set
 664
 665   """
 666   f = utils.FieldSet()
 667   f.Extend(static)
 668   f.Extend(dynamic)
 669
 670   delta = f.NonMatching(selected)
 671   if delta:
 672     raise errors.OpPrereqError("Unknown output fields selected: %s"
 673                                % ",".join(delta), errors.ECODE_INVAL)
 674
 675
 676 def _CheckBooleanOpField(op, name):
 677   """Validates boolean opcode parameters.
 678
 679   This will ensure that an opcode parameter is either a boolean value,
 680   or None (but that it always exists).
 681
 682   """
 683   val = getattr(op, name, None)
 684   if not (val is None or isinstance(val, bool)):
 685     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 686                                (name, str(val)), errors.ECODE_INVAL)
 687   setattr(op, name, val)
 688
 689
 690 def _CheckGlobalHvParams(params):
 691   """Validates that given hypervisor params are not global ones.
 692
 693   This will ensure that instances don't get customised versions of
 694   global params.
 695
 696   """
 697   used_globals = constants.HVC_GLOBALS.intersection(params)
 698   if used_globals:
 699     msg = ("The following hypervisor parameters are global and cannot"
 700            " be customized at instance level, please modify them at"
 701            " cluster level: %s" % utils.CommaJoin(used_globals))
 702     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 703
 704
 705 def _CheckNodeOnline(lu, node):
 706   """Ensure that a given node is online.
 707
 708   @param lu: the LU on behalf of which we make the check
 709   @param node: the node to check
 710   @raise errors.OpPrereqError: if the node is offline
 711
 712   """
 713   if lu.cfg.GetNodeInfo(node).offline:
 714     raise errors.OpPrereqError("Can't use offline node %s" % node,
 715                                errors.ECODE_INVAL)
 716
 717
 718 def _CheckNodeNotDrained(lu, node):
 719   """Ensure that a given node is not drained.
 720
 721   @param lu: the LU on behalf of which we make the check
 722   @param node: the node to check
 723   @raise errors.OpPrereqError: if the node is drained
 724
 725   """
 726   if lu.cfg.GetNodeInfo(node).drained:
 727     raise errors.OpPrereqError("Can't use drained node %s" % node,
 728                                errors.ECODE_INVAL)
 729
 730
 731 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 732   """Ensure that a node supports a given OS.
 733
 734   @param lu: the LU on behalf of which we make the check
 735   @param node: the node to check
 736   @param os_name: the OS to query about
 737   @param force_variant: whether to ignore variant errors
 738   @raise errors.OpPrereqError: if the node is not supporting the OS
 739
 740   """
 741   result = lu.rpc.call_os_get(node, os_name)
 742   result.Raise("OS '%s' not in supported OS list for node %s" %
 743                (os_name, node),
 744                prereq=True, ecode=errors.ECODE_INVAL)
 745   if not force_variant:
 746     _CheckOSVariant(result.payload, os_name)
 747
 748
 749 def _RequireFileStorage():
 750   """Checks that file storage is enabled.
 751
 752   @raise errors.OpPrereqError: when file storage is disabled
 753
 754   """
 755   if not constants.ENABLE_FILE_STORAGE:
 756     raise errors.OpPrereqError("File storage disabled at configure time",
 757                                errors.ECODE_INVAL)
 758
 759
 760 def _CheckDiskTemplate(template):
 761   """Ensure a given disk template is valid.
 762
 763   """
 764   if template not in constants.DISK_TEMPLATES:
 765     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 766            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 767     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 768   if template == constants.DT_FILE:
 769     _RequireFileStorage()
 770
 771
 772 def _CheckStorageType(storage_type):
 773   """Ensure a given storage type is valid.
 774
 775   """
 776   if storage_type not in constants.VALID_STORAGE_TYPES:
 777     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 778                                errors.ECODE_INVAL)
 779   if storage_type == constants.ST_FILE:
 780     _RequireFileStorage()
 781   return True
 782
 783
 784 def _GetClusterDomainSecret():
 785   """Reads the cluster domain secret.
 786
 787   """
 788   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 789                                strict=True)
 790
 791
 792 def _CheckInstanceDown(lu, instance, reason):
 793   """Ensure that an instance is not running."""
 794   if instance.admin_up:
 795     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 796                                (instance.name, reason), errors.ECODE_STATE)
 797
 798   pnode = instance.primary_node
 799   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 800   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 801               prereq=True, ecode=errors.ECODE_ENVIRON)
 802
 803   if instance.name in ins_l.payload:
 804     raise errors.OpPrereqError("Instance %s is running, %s" %
 805                                (instance.name, reason), errors.ECODE_STATE)
 806
 807
 808 def _ExpandItemName(fn, name, kind):
 809   """Expand an item name.
 810
 811   @param fn: the function to use for expansion
 812   @param name: requested item name
 813   @param kind: text description ('Node' or 'Instance')
 814   @return: the resolved (full) name
 815   @raise errors.OpPrereqError: if the item is not found
 816
 817   """
 818   full_name = fn(name)
 819   if full_name is None:
 820     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 821                                errors.ECODE_NOENT)
 822   return full_name
 823
 824
 825 def _ExpandNodeName(cfg, name):
 826   """Wrapper over L{_ExpandItemName} for nodes."""
 827   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 828
 829
 830 def _ExpandInstanceName(cfg, name):
 831   """Wrapper over L{_ExpandItemName} for instance."""
 832   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 833
 834
 835 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 836                           memory, vcpus, nics, disk_template, disks,
 837                           bep, hvp, hypervisor_name):
 838   """Builds instance related env variables for hooks
 839
 840   This builds the hook environment from individual variables.
 841
 842   @type name: string
 843   @param name: the name of the instance
 844   @type primary_node: string
 845   @param primary_node: the name of the instance's primary node
 846   @type secondary_nodes: list
 847   @param secondary_nodes: list of secondary nodes as strings
 848   @type os_type: string
 849   @param os_type: the name of the instance's OS
 850   @type status: boolean
 851   @param status: the should_run status of the instance
 852   @type memory: string
 853   @param memory: the memory size of the instance
 854   @type vcpus: string
 855   @param vcpus: the count of VCPUs the instance has
 856   @type nics: list
 857   @param nics: list of tuples (ip, mac, mode, link) representing
 858       the NICs the instance has
 859   @type disk_template: string
 860   @param disk_template: the disk template of the instance
 861   @type disks: list
 862   @param disks: the list of (size, mode) pairs
 863   @type bep: dict
 864   @param bep: the backend parameters for the instance
 865   @type hvp: dict
 866   @param hvp: the hypervisor parameters for the instance
 867   @type hypervisor_name: string
 868   @param hypervisor_name: the hypervisor for the instance
 869   @rtype: dict
 870   @return: the hook environment for this instance
 871
 872   """
 873   if status:
 874     str_status = "up"
 875   else:
 876     str_status = "down"
 877   env = {
 878     "OP_TARGET": name,
 879     "INSTANCE_NAME": name,
 880     "INSTANCE_PRIMARY": primary_node,
 881     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 882     "INSTANCE_OS_TYPE": os_type,
 883     "INSTANCE_STATUS": str_status,
 884     "INSTANCE_MEMORY": memory,
 885     "INSTANCE_VCPUS": vcpus,
 886     "INSTANCE_DISK_TEMPLATE": disk_template,
 887     "INSTANCE_HYPERVISOR": hypervisor_name,
 888   }
 889
 890   if nics:
 891     nic_count = len(nics)
 892     for idx, (ip, mac, mode, link) in enumerate(nics):
 893       if ip is None:
 894         ip = ""
 895       env["INSTANCE_NIC%d_IP" % idx] = ip
 896       env["INSTANCE_NIC%d_MAC" % idx] = mac
 897       env["INSTANCE_NIC%d_MODE" % idx] = mode
 898       env["INSTANCE_NIC%d_LINK" % idx] = link
 899       if mode == constants.NIC_MODE_BRIDGED:
 900         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 901   else:
 902     nic_count = 0
 903
 904   env["INSTANCE_NIC_COUNT"] = nic_count
 905
 906   if disks:
 907     disk_count = len(disks)
 908     for idx, (size, mode) in enumerate(disks):
 909       env["INSTANCE_DISK%d_SIZE" % idx] = size
 910       env["INSTANCE_DISK%d_MODE" % idx] = mode
 911   else:
 912     disk_count = 0
 913
 914   env["INSTANCE_DISK_COUNT"] = disk_count
 915
 916   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 917     for key, value in source.items():
 918       env["INSTANCE_%s_%s" % (kind, key)] = value
 919
 920   return env
 921
 922
 923 def _NICListToTuple(lu, nics):
 924   """Build a list of nic information tuples.
 925
 926   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 927   value in LUQueryInstanceData.
 928
 929   @type lu:  L{LogicalUnit}
 930   @param lu: the logical unit on whose behalf we execute
 931   @type nics: list of L{objects.NIC}
 932   @param nics: list of nics to convert to hooks tuples
 933
 934   """
 935   hooks_nics = []
 936   cluster = lu.cfg.GetClusterInfo()
 937   for nic in nics:
 938     ip = nic.ip
 939     mac = nic.mac
 940     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 941     mode = filled_params[constants.NIC_MODE]
 942     link = filled_params[constants.NIC_LINK]
 943     hooks_nics.append((ip, mac, mode, link))
 944   return hooks_nics
 945
 946
 947 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 948   """Builds instance related env variables for hooks from an object.
 949
 950   @type lu: L{LogicalUnit}
 951   @param lu: the logical unit on whose behalf we execute
 952   @type instance: L{objects.Instance}
 953   @param instance: the instance for which we should build the
 954       environment
 955   @type override: dict
 956   @param override: dictionary with key/values that will override
 957       our values
 958   @rtype: dict
 959   @return: the hook environment dictionary
 960
 961   """
 962   cluster = lu.cfg.GetClusterInfo()
 963   bep = cluster.FillBE(instance)
 964   hvp = cluster.FillHV(instance)
 965   args = {
 966     'name': instance.name,
 967     'primary_node': instance.primary_node,
 968     'secondary_nodes': instance.secondary_nodes,
 969     'os_type': instance.os,
 970     'status': instance.admin_up,
 971     'memory': bep[constants.BE_MEMORY],
 972     'vcpus': bep[constants.BE_VCPUS],
 973     'nics': _NICListToTuple(lu, instance.nics),
 974     'disk_template': instance.disk_template,
 975     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 976     'bep': bep,
 977     'hvp': hvp,
 978     'hypervisor_name': instance.hypervisor,
 979   }
 980   if override:
 981     args.update(override)
 982   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 983
 984
 985 def _AdjustCandidatePool(lu, exceptions):
 986   """Adjust the candidate pool after node operations.
 987
 988   """
 989   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 990   if mod_list:
 991     lu.LogInfo("Promoted nodes to master candidate role: %s",
 992                utils.CommaJoin(node.name for node in mod_list))
 993     for name in mod_list:
 994       lu.context.ReaddNode(name)
 995   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 996   if mc_now > mc_max:
 997     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 998                (mc_now, mc_max))
 999
1000
1001 def _DecideSelfPromotion(lu, exceptions=None):
1002   """Decide whether I should promote myself as a master candidate.
1003
1004   """
1005   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1006   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1007   # the new node will increase mc_max with one, so:
1008   mc_should = min(mc_should + 1, cp_size)
1009   return mc_now < mc_should
1010
1011
1012 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1013   """Check that the brigdes needed by a list of nics exist.
1014
1015   """
1016   cluster = lu.cfg.GetClusterInfo()
1017   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1018   brlist = [params[constants.NIC_LINK] for params in paramslist
1019             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1020   if brlist:
1021     result = lu.rpc.call_bridges_exist(target_node, brlist)
1022     result.Raise("Error checking bridges on destination node '%s'" %
1023                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1024
1025
1026 def _CheckInstanceBridgesExist(lu, instance, node=None):
1027   """Check that the brigdes needed by an instance exist.
1028
1029   """
1030   if node is None:
1031     node = instance.primary_node
1032   _CheckNicsBridgesExist(lu, instance.nics, node)
1033
1034
1035 def _CheckOSVariant(os_obj, name):
1036   """Check whether an OS name conforms to the os variants specification.
1037
1038   @type os_obj: L{objects.OS}
1039   @param os_obj: OS object to check
1040   @type name: string
1041   @param name: OS name passed by the user, to check for validity
1042
1043   """
1044   if not os_obj.supported_variants:
1045     return
1046   try:
1047     variant = name.split("+", 1)[1]
1048   except IndexError:
1049     raise errors.OpPrereqError("OS name must include a variant",
1050                                errors.ECODE_INVAL)
1051
1052   if variant not in os_obj.supported_variants:
1053     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1054
1055
1056 def _GetNodeInstancesInner(cfg, fn):
1057   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1058
1059
1060 def _GetNodeInstances(cfg, node_name):
1061   """Returns a list of all primary and secondary instances on a node.
1062
1063   """
1064
1065   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1066
1067
1068 def _GetNodePrimaryInstances(cfg, node_name):
1069   """Returns primary instances on a node.
1070
1071   """
1072   return _GetNodeInstancesInner(cfg,
1073                                 lambda inst: node_name == inst.primary_node)
1074
1075
1076 def _GetNodeSecondaryInstances(cfg, node_name):
1077   """Returns secondary instances on a node.
1078
1079   """
1080   return _GetNodeInstancesInner(cfg,
1081                                 lambda inst: node_name in inst.secondary_nodes)
1082
1083
1084 def _GetStorageTypeArgs(cfg, storage_type):
1085   """Returns the arguments for a storage type.
1086
1087   """
1088   # Special case for file storage
1089   if storage_type == constants.ST_FILE:
1090     # storage.FileStorage wants a list of storage directories
1091     return [[cfg.GetFileStorageDir()]]
1092
1093   return []
1094
1095
1096 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1097   faulty = []
1098
1099   for dev in instance.disks:
1100     cfg.SetDiskID(dev, node_name)
1101
1102   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1103   result.Raise("Failed to get disk status from node %s" % node_name,
1104                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1105
1106   for idx, bdev_status in enumerate(result.payload):
1107     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1108       faulty.append(idx)
1109
1110   return faulty
1111
1112
1113 class LUPostInitCluster(LogicalUnit):
1114   """Logical unit for running hooks after cluster initialization.
1115
1116   """
1117   HPATH = "cluster-init"
1118   HTYPE = constants.HTYPE_CLUSTER
1119   _OP_REQP = []
1120
1121   def BuildHooksEnv(self):
1122     """Build hooks env.
1123
1124     """
1125     env = {"OP_TARGET": self.cfg.GetClusterName()}
1126     mn = self.cfg.GetMasterNode()
1127     return env, [], [mn]
1128
1129   def Exec(self, feedback_fn):
1130     """Nothing to do.
1131
1132     """
1133     return True
1134
1135
1136 class LUDestroyCluster(LogicalUnit):
1137   """Logical unit for destroying the cluster.
1138
1139   """
1140   HPATH = "cluster-destroy"
1141   HTYPE = constants.HTYPE_CLUSTER
1142   _OP_REQP = []
1143
1144   def BuildHooksEnv(self):
1145     """Build hooks env.
1146
1147     """
1148     env = {"OP_TARGET": self.cfg.GetClusterName()}
1149     return env, [], []
1150
1151   def CheckPrereq(self):
1152     """Check prerequisites.
1153
1154     This checks whether the cluster is empty.
1155
1156     Any errors are signaled by raising errors.OpPrereqError.
1157
1158     """
1159     master = self.cfg.GetMasterNode()
1160
1161     nodelist = self.cfg.GetNodeList()
1162     if len(nodelist) != 1 or nodelist[0] != master:
1163       raise errors.OpPrereqError("There are still %d node(s) in"
1164                                  " this cluster." % (len(nodelist) - 1),
1165                                  errors.ECODE_INVAL)
1166     instancelist = self.cfg.GetInstanceList()
1167     if instancelist:
1168       raise errors.OpPrereqError("There are still %d instance(s) in"
1169                                  " this cluster." % len(instancelist),
1170                                  errors.ECODE_INVAL)
1171
1172   def Exec(self, feedback_fn):
1173     """Destroys the cluster.
1174
1175     """
1176     master = self.cfg.GetMasterNode()
1177     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1178
1179     # Run post hooks on master node before it's removed
1180     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1181     try:
1182       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1183     except:
1184       # pylint: disable-msg=W0702
1185       self.LogWarning("Errors occurred running hooks on %s" % master)
1186
1187     result = self.rpc.call_node_stop_master(master, False)
1188     result.Raise("Could not disable the master role")
1189
1190     if modify_ssh_setup:
1191       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1192       utils.CreateBackup(priv_key)
1193       utils.CreateBackup(pub_key)
1194
1195     return master
1196
1197
1198 def _VerifyCertificate(filename):
1199   """Verifies a certificate for LUVerifyCluster.
1200
1201   @type filename: string
1202   @param filename: Path to PEM file
1203
1204   """
1205   try:
1206     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1207                                            utils.ReadFile(filename))
1208   except Exception, err: # pylint: disable-msg=W0703
1209     return (LUVerifyCluster.ETYPE_ERROR,
1210             "Failed to load X509 certificate %s: %s" % (filename, err))
1211
1212   (errcode, msg) = \
1213     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1214                                 constants.SSL_CERT_EXPIRATION_ERROR)
1215
1216   if msg:
1217     fnamemsg = "While verifying %s: %s" % (filename, msg)
1218   else:
1219     fnamemsg = None
1220
1221   if errcode is None:
1222     return (None, fnamemsg)
1223   elif errcode == utils.CERT_WARNING:
1224     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1225   elif errcode == utils.CERT_ERROR:
1226     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1227
1228   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1229
1230
1231 class LUVerifyCluster(LogicalUnit):
1232   """Verifies the cluster status.
1233
1234   """
1235   HPATH = "cluster-verify"
1236   HTYPE = constants.HTYPE_CLUSTER
1237   _OP_REQP = [
1238     ("skip_checks", _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1239     ("verbose", _TBool),
1240     ("error_codes", _TBool),
1241     ("debug_simulate_errors", _TBool),
1242     ]
1243   REQ_BGL = False
1244
1245   TCLUSTER = "cluster"
1246   TNODE = "node"
1247   TINSTANCE = "instance"
1248
1249   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1250   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1251   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1252   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1253   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1254   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1255   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1256   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1257   ENODEDRBD = (TNODE, "ENODEDRBD")
1258   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1259   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1260   ENODEHV = (TNODE, "ENODEHV")
1261   ENODELVM = (TNODE, "ENODELVM")
1262   ENODEN1 = (TNODE, "ENODEN1")
1263   ENODENET = (TNODE, "ENODENET")
1264   ENODEOS = (TNODE, "ENODEOS")
1265   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1266   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1267   ENODERPC = (TNODE, "ENODERPC")
1268   ENODESSH = (TNODE, "ENODESSH")
1269   ENODEVERSION = (TNODE, "ENODEVERSION")
1270   ENODESETUP = (TNODE, "ENODESETUP")
1271   ENODETIME = (TNODE, "ENODETIME")
1272
1273   ETYPE_FIELD = "code"
1274   ETYPE_ERROR = "ERROR"
1275   ETYPE_WARNING = "WARNING"
1276
1277   class NodeImage(object):
1278     """A class representing the logical and physical status of a node.
1279
1280     @type name: string
1281     @ivar name: the node name to which this object refers
1282     @ivar volumes: a structure as returned from
1283         L{ganeti.backend.GetVolumeList} (runtime)
1284     @ivar instances: a list of running instances (runtime)
1285     @ivar pinst: list of configured primary instances (config)
1286     @ivar sinst: list of configured secondary instances (config)
1287     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1288         of this node (config)
1289     @ivar mfree: free memory, as reported by hypervisor (runtime)
1290     @ivar dfree: free disk, as reported by the node (runtime)
1291     @ivar offline: the offline status (config)
1292     @type rpc_fail: boolean
1293     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1294         not whether the individual keys were correct) (runtime)
1295     @type lvm_fail: boolean
1296     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1297     @type hyp_fail: boolean
1298     @ivar hyp_fail: whether the RPC call didn't return the instance list
1299     @type ghost: boolean
1300     @ivar ghost: whether this is a known node or not (config)
1301     @type os_fail: boolean
1302     @ivar os_fail: whether the RPC call didn't return valid OS data
1303     @type oslist: list
1304     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1305
1306     """
1307     def __init__(self, offline=False, name=None):
1308       self.name = name
1309       self.volumes = {}
1310       self.instances = []
1311       self.pinst = []
1312       self.sinst = []
1313       self.sbp = {}
1314       self.mfree = 0
1315       self.dfree = 0
1316       self.offline = offline
1317       self.rpc_fail = False
1318       self.lvm_fail = False
1319       self.hyp_fail = False
1320       self.ghost = False
1321       self.os_fail = False
1322       self.oslist = {}
1323
1324   def ExpandNames(self):
1325     self.needed_locks = {
1326       locking.LEVEL_NODE: locking.ALL_SET,
1327       locking.LEVEL_INSTANCE: locking.ALL_SET,
1328     }
1329     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1330
1331   def _Error(self, ecode, item, msg, *args, **kwargs):
1332     """Format an error message.
1333
1334     Based on the opcode's error_codes parameter, either format a
1335     parseable error code, or a simpler error string.
1336
1337     This must be called only from Exec and functions called from Exec.
1338
1339     """
1340     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1341     itype, etxt = ecode
1342     # first complete the msg
1343     if args:
1344       msg = msg % args
1345     # then format the whole message
1346     if self.op.error_codes:
1347       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1348     else:
1349       if item:
1350         item = " " + item
1351       else:
1352         item = ""
1353       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1354     # and finally report it via the feedback_fn
1355     self._feedback_fn("  - %s" % msg)
1356
1357   def _ErrorIf(self, cond, *args, **kwargs):
1358     """Log an error message if the passed condition is True.
1359
1360     """
1361     cond = bool(cond) or self.op.debug_simulate_errors
1362     if cond:
1363       self._Error(*args, **kwargs)
1364     # do not mark the operation as failed for WARN cases only
1365     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1366       self.bad = self.bad or cond
1367
1368   def _VerifyNode(self, ninfo, nresult):
1369     """Run multiple tests against a node.
1370
1371     Test list:
1372
1373       - compares ganeti version
1374       - checks vg existence and size > 20G
1375       - checks config file checksum
1376       - checks ssh to other nodes
1377
1378     @type ninfo: L{objects.Node}
1379     @param ninfo: the node to check
1380     @param nresult: the results from the node
1381     @rtype: boolean
1382     @return: whether overall this call was successful (and we can expect
1383          reasonable values in the respose)
1384
1385     """
1386     node = ninfo.name
1387     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1388
1389     # main result, nresult should be a non-empty dict
1390     test = not nresult or not isinstance(nresult, dict)
1391     _ErrorIf(test, self.ENODERPC, node,
1392                   "unable to verify node: no data returned")
1393     if test:
1394       return False
1395
1396     # compares ganeti version
1397     local_version = constants.PROTOCOL_VERSION
1398     remote_version = nresult.get("version", None)
1399     test = not (remote_version and
1400                 isinstance(remote_version, (list, tuple)) and
1401                 len(remote_version) == 2)
1402     _ErrorIf(test, self.ENODERPC, node,
1403              "connection to node returned invalid data")
1404     if test:
1405       return False
1406
1407     test = local_version != remote_version[0]
1408     _ErrorIf(test, self.ENODEVERSION, node,
1409              "incompatible protocol versions: master %s,"
1410              " node %s", local_version, remote_version[0])
1411     if test:
1412       return False
1413
1414     # node seems compatible, we can actually try to look into its results
1415
1416     # full package version
1417     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1418                   self.ENODEVERSION, node,
1419                   "software version mismatch: master %s, node %s",
1420                   constants.RELEASE_VERSION, remote_version[1],
1421                   code=self.ETYPE_WARNING)
1422
1423     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1424     if isinstance(hyp_result, dict):
1425       for hv_name, hv_result in hyp_result.iteritems():
1426         test = hv_result is not None
1427         _ErrorIf(test, self.ENODEHV, node,
1428                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1429
1430
1431     test = nresult.get(constants.NV_NODESETUP,
1432                            ["Missing NODESETUP results"])
1433     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1434              "; ".join(test))
1435
1436     return True
1437
1438   def _VerifyNodeTime(self, ninfo, nresult,
1439                       nvinfo_starttime, nvinfo_endtime):
1440     """Check the node time.
1441
1442     @type ninfo: L{objects.Node}
1443     @param ninfo: the node to check
1444     @param nresult: the remote results for the node
1445     @param nvinfo_starttime: the start time of the RPC call
1446     @param nvinfo_endtime: the end time of the RPC call
1447
1448     """
1449     node = ninfo.name
1450     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1451
1452     ntime = nresult.get(constants.NV_TIME, None)
1453     try:
1454       ntime_merged = utils.MergeTime(ntime)
1455     except (ValueError, TypeError):
1456       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1457       return
1458
1459     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1460       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1461     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1462       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1463     else:
1464       ntime_diff = None
1465
1466     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1467              "Node time diverges by at least %s from master node time",
1468              ntime_diff)
1469
1470   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1471     """Check the node time.
1472
1473     @type ninfo: L{objects.Node}
1474     @param ninfo: the node to check
1475     @param nresult: the remote results for the node
1476     @param vg_name: the configured VG name
1477
1478     """
1479     if vg_name is None:
1480       return
1481
1482     node = ninfo.name
1483     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1484
1485     # checks vg existence and size > 20G
1486     vglist = nresult.get(constants.NV_VGLIST, None)
1487     test = not vglist
1488     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1489     if not test:
1490       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1491                                             constants.MIN_VG_SIZE)
1492       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1493
1494     # check pv names
1495     pvlist = nresult.get(constants.NV_PVLIST, None)
1496     test = pvlist is None
1497     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1498     if not test:
1499       # check that ':' is not present in PV names, since it's a
1500       # special character for lvcreate (denotes the range of PEs to
1501       # use on the PV)
1502       for _, pvname, owner_vg in pvlist:
1503         test = ":" in pvname
1504         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1505                  " '%s' of VG '%s'", pvname, owner_vg)
1506
1507   def _VerifyNodeNetwork(self, ninfo, nresult):
1508     """Check the node time.
1509
1510     @type ninfo: L{objects.Node}
1511     @param ninfo: the node to check
1512     @param nresult: the remote results for the node
1513
1514     """
1515     node = ninfo.name
1516     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1517
1518     test = constants.NV_NODELIST not in nresult
1519     _ErrorIf(test, self.ENODESSH, node,
1520              "node hasn't returned node ssh connectivity data")
1521     if not test:
1522       if nresult[constants.NV_NODELIST]:
1523         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1524           _ErrorIf(True, self.ENODESSH, node,
1525                    "ssh communication with node '%s': %s", a_node, a_msg)
1526
1527     test = constants.NV_NODENETTEST not in nresult
1528     _ErrorIf(test, self.ENODENET, node,
1529              "node hasn't returned node tcp connectivity data")
1530     if not test:
1531       if nresult[constants.NV_NODENETTEST]:
1532         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1533         for anode in nlist:
1534           _ErrorIf(True, self.ENODENET, node,
1535                    "tcp communication with node '%s': %s",
1536                    anode, nresult[constants.NV_NODENETTEST][anode])
1537
1538     test = constants.NV_MASTERIP not in nresult
1539     _ErrorIf(test, self.ENODENET, node,
1540              "node hasn't returned node master IP reachability data")
1541     if not test:
1542       if not nresult[constants.NV_MASTERIP]:
1543         if node == self.master_node:
1544           msg = "the master node cannot reach the master IP (not configured?)"
1545         else:
1546           msg = "cannot reach the master IP"
1547         _ErrorIf(True, self.ENODENET, node, msg)
1548
1549
1550   def _VerifyInstance(self, instance, instanceconfig, node_image):
1551     """Verify an instance.
1552
1553     This function checks to see if the required block devices are
1554     available on the instance's node.
1555
1556     """
1557     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1558     node_current = instanceconfig.primary_node
1559
1560     node_vol_should = {}
1561     instanceconfig.MapLVsByNode(node_vol_should)
1562
1563     for node in node_vol_should:
1564       n_img = node_image[node]
1565       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1566         # ignore missing volumes on offline or broken nodes
1567         continue
1568       for volume in node_vol_should[node]:
1569         test = volume not in n_img.volumes
1570         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1571                  "volume %s missing on node %s", volume, node)
1572
1573     if instanceconfig.admin_up:
1574       pri_img = node_image[node_current]
1575       test = instance not in pri_img.instances and not pri_img.offline
1576       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1577                "instance not running on its primary node %s",
1578                node_current)
1579
1580     for node, n_img in node_image.items():
1581       if (not node == node_current):
1582         test = instance in n_img.instances
1583         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1584                  "instance should not run on node %s", node)
1585
1586   def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1587     """Verify if there are any unknown volumes in the cluster.
1588
1589     The .os, .swap and backup volumes are ignored. All other volumes are
1590     reported as unknown.
1591
1592     """
1593     for node, n_img in node_image.items():
1594       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1595         # skip non-healthy nodes
1596         continue
1597       for volume in n_img.volumes:
1598         test = (node not in node_vol_should or
1599                 volume not in node_vol_should[node])
1600         self._ErrorIf(test, self.ENODEORPHANLV, node,
1601                       "volume %s is unknown", volume)
1602
1603   def _VerifyOrphanInstances(self, instancelist, node_image):
1604     """Verify the list of running instances.
1605
1606     This checks what instances are running but unknown to the cluster.
1607
1608     """
1609     for node, n_img in node_image.items():
1610       for o_inst in n_img.instances:
1611         test = o_inst not in instancelist
1612         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1613                       "instance %s on node %s should not exist", o_inst, node)
1614
1615   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1616     """Verify N+1 Memory Resilience.
1617
1618     Check that if one single node dies we can still start all the
1619     instances it was primary for.
1620
1621     """
1622     for node, n_img in node_image.items():
1623       # This code checks that every node which is now listed as
1624       # secondary has enough memory to host all instances it is
1625       # supposed to should a single other node in the cluster fail.
1626       # FIXME: not ready for failover to an arbitrary node
1627       # FIXME: does not support file-backed instances
1628       # WARNING: we currently take into account down instances as well
1629       # as up ones, considering that even if they're down someone
1630       # might want to start them even in the event of a node failure.
1631       for prinode, instances in n_img.sbp.items():
1632         needed_mem = 0
1633         for instance in instances:
1634           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1635           if bep[constants.BE_AUTO_BALANCE]:
1636             needed_mem += bep[constants.BE_MEMORY]
1637         test = n_img.mfree < needed_mem
1638         self._ErrorIf(test, self.ENODEN1, node,
1639                       "not enough memory on to accommodate"
1640                       " failovers should peer node %s fail", prinode)
1641
1642   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1643                        master_files):
1644     """Verifies and computes the node required file checksums.
1645
1646     @type ninfo: L{objects.Node}
1647     @param ninfo: the node to check
1648     @param nresult: the remote results for the node
1649     @param file_list: required list of files
1650     @param local_cksum: dictionary of local files and their checksums
1651     @param master_files: list of files that only masters should have
1652
1653     """
1654     node = ninfo.name
1655     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1656
1657     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1658     test = not isinstance(remote_cksum, dict)
1659     _ErrorIf(test, self.ENODEFILECHECK, node,
1660              "node hasn't returned file checksum data")
1661     if test:
1662       return
1663
1664     for file_name in file_list:
1665       node_is_mc = ninfo.master_candidate
1666       must_have = (file_name not in master_files) or node_is_mc
1667       # missing
1668       test1 = file_name not in remote_cksum
1669       # invalid checksum
1670       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1671       # existing and good
1672       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1673       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1674                "file '%s' missing", file_name)
1675       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1676                "file '%s' has wrong checksum", file_name)
1677       # not candidate and this is not a must-have file
1678       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1679                "file '%s' should not exist on non master"
1680                " candidates (and the file is outdated)", file_name)
1681       # all good, except non-master/non-must have combination
1682       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1683                "file '%s' should not exist"
1684                " on non master candidates", file_name)
1685
1686   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1687     """Verifies and the node DRBD status.
1688
1689     @type ninfo: L{objects.Node}
1690     @param ninfo: the node to check
1691     @param nresult: the remote results for the node
1692     @param instanceinfo: the dict of instances
1693     @param drbd_map: the DRBD map as returned by
1694         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1695
1696     """
1697     node = ninfo.name
1698     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1699
1700     # compute the DRBD minors
1701     node_drbd = {}
1702     for minor, instance in drbd_map[node].items():
1703       test = instance not in instanceinfo
1704       _ErrorIf(test, self.ECLUSTERCFG, None,
1705                "ghost instance '%s' in temporary DRBD map", instance)
1706         # ghost instance should not be running, but otherwise we
1707         # don't give double warnings (both ghost instance and
1708         # unallocated minor in use)
1709       if test:
1710         node_drbd[minor] = (instance, False)
1711       else:
1712         instance = instanceinfo[instance]
1713         node_drbd[minor] = (instance.name, instance.admin_up)
1714
1715     # and now check them
1716     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1717     test = not isinstance(used_minors, (tuple, list))
1718     _ErrorIf(test, self.ENODEDRBD, node,
1719              "cannot parse drbd status file: %s", str(used_minors))
1720     if test:
1721       # we cannot check drbd status
1722       return
1723
1724     for minor, (iname, must_exist) in node_drbd.items():
1725       test = minor not in used_minors and must_exist
1726       _ErrorIf(test, self.ENODEDRBD, node,
1727                "drbd minor %d of instance %s is not active", minor, iname)
1728     for minor in used_minors:
1729       test = minor not in node_drbd
1730       _ErrorIf(test, self.ENODEDRBD, node,
1731                "unallocated drbd minor %d is in use", minor)
1732
1733   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1734     """Builds the node OS structures.
1735
1736     @type ninfo: L{objects.Node}
1737     @param ninfo: the node to check
1738     @param nresult: the remote results for the node
1739     @param nimg: the node image object
1740
1741     """
1742     node = ninfo.name
1743     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1744
1745     remote_os = nresult.get(constants.NV_OSLIST, None)
1746     test = (not isinstance(remote_os, list) or
1747             not compat.all(remote_os,
1748                            lambda v: isinstance(v, list) and len(v) == 7))
1749
1750     _ErrorIf(test, self.ENODEOS, node,
1751              "node hasn't returned valid OS data")
1752
1753     nimg.os_fail = test
1754
1755     if test:
1756       return
1757
1758     os_dict = {}
1759
1760     for (name, os_path, status, diagnose,
1761          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1762
1763       if name not in os_dict:
1764         os_dict[name] = []
1765
1766       # parameters is a list of lists instead of list of tuples due to
1767       # JSON lacking a real tuple type, fix it:
1768       parameters = [tuple(v) for v in parameters]
1769       os_dict[name].append((os_path, status, diagnose,
1770                             set(variants), set(parameters), set(api_ver)))
1771
1772     nimg.oslist = os_dict
1773
1774   def _VerifyNodeOS(self, ninfo, nimg, base):
1775     """Verifies the node OS list.
1776
1777     @type ninfo: L{objects.Node}
1778     @param ninfo: the node to check
1779     @param nimg: the node image object
1780     @param base: the 'template' node we match against (e.g. from the master)
1781
1782     """
1783     node = ninfo.name
1784     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1785
1786     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1787
1788     for os_name, os_data in nimg.oslist.items():
1789       assert os_data, "Empty OS status for OS %s?!" % os_name
1790       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1791       _ErrorIf(not f_status, self.ENODEOS, node,
1792                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1793       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1794                "OS '%s' has multiple entries (first one shadows the rest): %s",
1795                os_name, utils.CommaJoin([v[0] for v in os_data]))
1796       # this will catched in backend too
1797       _ErrorIf(compat.any(f_api, lambda v: v >= constants.OS_API_V15)
1798                and not f_var, self.ENODEOS, node,
1799                "OS %s with API at least %d does not declare any variant",
1800                os_name, constants.OS_API_V15)
1801       # comparisons with the 'base' image
1802       test = os_name not in base.oslist
1803       _ErrorIf(test, self.ENODEOS, node,
1804                "Extra OS %s not present on reference node (%s)",
1805                os_name, base.name)
1806       if test:
1807         continue
1808       assert base.oslist[os_name], "Base node has empty OS status?"
1809       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1810       if not b_status:
1811         # base OS is invalid, skipping
1812         continue
1813       for kind, a, b in [("API version", f_api, b_api),
1814                          ("variants list", f_var, b_var),
1815                          ("parameters", f_param, b_param)]:
1816         _ErrorIf(a != b, self.ENODEOS, node,
1817                  "OS %s %s differs from reference node %s: %s vs. %s",
1818                  kind, os_name, base.name,
1819                  utils.CommaJoin(a), utils.CommaJoin(b))
1820
1821     # check any missing OSes
1822     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1823     _ErrorIf(missing, self.ENODEOS, node,
1824              "OSes present on reference node %s but missing on this node: %s",
1825              base.name, utils.CommaJoin(missing))
1826
1827   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1828     """Verifies and updates the node volume data.
1829
1830     This function will update a L{NodeImage}'s internal structures
1831     with data from the remote call.
1832
1833     @type ninfo: L{objects.Node}
1834     @param ninfo: the node to check
1835     @param nresult: the remote results for the node
1836     @param nimg: the node image object
1837     @param vg_name: the configured VG name
1838
1839     """
1840     node = ninfo.name
1841     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1842
1843     nimg.lvm_fail = True
1844     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1845     if vg_name is None:
1846       pass
1847     elif isinstance(lvdata, basestring):
1848       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1849                utils.SafeEncode(lvdata))
1850     elif not isinstance(lvdata, dict):
1851       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1852     else:
1853       nimg.volumes = lvdata
1854       nimg.lvm_fail = False
1855
1856   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1857     """Verifies and updates the node instance list.
1858
1859     If the listing was successful, then updates this node's instance
1860     list. Otherwise, it marks the RPC call as failed for the instance
1861     list key.
1862
1863     @type ninfo: L{objects.Node}
1864     @param ninfo: the node to check
1865     @param nresult: the remote results for the node
1866     @param nimg: the node image object
1867
1868     """
1869     idata = nresult.get(constants.NV_INSTANCELIST, None)
1870     test = not isinstance(idata, list)
1871     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1872                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1873     if test:
1874       nimg.hyp_fail = True
1875     else:
1876       nimg.instances = idata
1877
1878   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1879     """Verifies and computes a node information map
1880
1881     @type ninfo: L{objects.Node}
1882     @param ninfo: the node to check
1883     @param nresult: the remote results for the node
1884     @param nimg: the node image object
1885     @param vg_name: the configured VG name
1886
1887     """
1888     node = ninfo.name
1889     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1890
1891     # try to read free memory (from the hypervisor)
1892     hv_info = nresult.get(constants.NV_HVINFO, None)
1893     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1894     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1895     if not test:
1896       try:
1897         nimg.mfree = int(hv_info["memory_free"])
1898       except (ValueError, TypeError):
1899         _ErrorIf(True, self.ENODERPC, node,
1900                  "node returned invalid nodeinfo, check hypervisor")
1901
1902     # FIXME: devise a free space model for file based instances as well
1903     if vg_name is not None:
1904       test = (constants.NV_VGLIST not in nresult or
1905               vg_name not in nresult[constants.NV_VGLIST])
1906       _ErrorIf(test, self.ENODELVM, node,
1907                "node didn't return data for the volume group '%s'"
1908                " - it is either missing or broken", vg_name)
1909       if not test:
1910         try:
1911           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1912         except (ValueError, TypeError):
1913           _ErrorIf(True, self.ENODERPC, node,
1914                    "node returned invalid LVM info, check LVM status")
1915
1916   def BuildHooksEnv(self):
1917     """Build hooks env.
1918
1919     Cluster-Verify hooks just ran in the post phase and their failure makes
1920     the output be logged in the verify output and the verification to fail.
1921
1922     """
1923     all_nodes = self.cfg.GetNodeList()
1924     env = {
1925       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1926       }
1927     for node in self.cfg.GetAllNodesInfo().values():
1928       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1929
1930     return env, [], all_nodes
1931
1932   def Exec(self, feedback_fn):
1933     """Verify integrity of cluster, performing various test on nodes.
1934
1935     """
1936     self.bad = False
1937     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1938     verbose = self.op.verbose
1939     self._feedback_fn = feedback_fn
1940     feedback_fn("* Verifying global settings")
1941     for msg in self.cfg.VerifyConfig():
1942       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1943
1944     # Check the cluster certificates
1945     for cert_filename in constants.ALL_CERT_FILES:
1946       (errcode, msg) = _VerifyCertificate(cert_filename)
1947       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1948
1949     vg_name = self.cfg.GetVGName()
1950     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1951     cluster = self.cfg.GetClusterInfo()
1952     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1953     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1954     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1955     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1956                         for iname in instancelist)
1957     i_non_redundant = [] # Non redundant instances
1958     i_non_a_balanced = [] # Non auto-balanced instances
1959     n_offline = 0 # Count of offline nodes
1960     n_drained = 0 # Count of nodes being drained
1961     node_vol_should = {}
1962
1963     # FIXME: verify OS list
1964     # do local checksums
1965     master_files = [constants.CLUSTER_CONF_FILE]
1966     master_node = self.master_node = self.cfg.GetMasterNode()
1967     master_ip = self.cfg.GetMasterIP()
1968
1969     file_names = ssconf.SimpleStore().GetFileList()
1970     file_names.extend(constants.ALL_CERT_FILES)
1971     file_names.extend(master_files)
1972     if cluster.modify_etc_hosts:
1973       file_names.append(constants.ETC_HOSTS)
1974
1975     local_checksums = utils.FingerprintFiles(file_names)
1976
1977     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1978     node_verify_param = {
1979       constants.NV_FILELIST: file_names,
1980       constants.NV_NODELIST: [node.name for node in nodeinfo
1981                               if not node.offline],
1982       constants.NV_HYPERVISOR: hypervisors,
1983       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1984                                   node.secondary_ip) for node in nodeinfo
1985                                  if not node.offline],
1986       constants.NV_INSTANCELIST: hypervisors,
1987       constants.NV_VERSION: None,
1988       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1989       constants.NV_NODESETUP: None,
1990       constants.NV_TIME: None,
1991       constants.NV_MASTERIP: (master_node, master_ip),
1992       constants.NV_OSLIST: None,
1993       }
1994
1995     if vg_name is not None:
1996       node_verify_param[constants.NV_VGLIST] = None
1997       node_verify_param[constants.NV_LVLIST] = vg_name
1998       node_verify_param[constants.NV_PVLIST] = [vg_name]
1999       node_verify_param[constants.NV_DRBDLIST] = None
2000
2001     # Build our expected cluster state
2002     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2003                                                  name=node.name))
2004                       for node in nodeinfo)
2005
2006     for instance in instancelist:
2007       inst_config = instanceinfo[instance]
2008
2009       for nname in inst_config.all_nodes:
2010         if nname not in node_image:
2011           # ghost node
2012           gnode = self.NodeImage(name=nname)
2013           gnode.ghost = True
2014           node_image[nname] = gnode
2015
2016       inst_config.MapLVsByNode(node_vol_should)
2017
2018       pnode = inst_config.primary_node
2019       node_image[pnode].pinst.append(instance)
2020
2021       for snode in inst_config.secondary_nodes:
2022         nimg = node_image[snode]
2023         nimg.sinst.append(instance)
2024         if pnode not in nimg.sbp:
2025           nimg.sbp[pnode] = []
2026         nimg.sbp[pnode].append(instance)
2027
2028     # At this point, we have the in-memory data structures complete,
2029     # except for the runtime information, which we'll gather next
2030
2031     # Due to the way our RPC system works, exact response times cannot be
2032     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2033     # time before and after executing the request, we can at least have a time
2034     # window.
2035     nvinfo_starttime = time.time()
2036     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2037                                            self.cfg.GetClusterName())
2038     nvinfo_endtime = time.time()
2039
2040     all_drbd_map = self.cfg.ComputeDRBDMap()
2041
2042     feedback_fn("* Verifying node status")
2043
2044     refos_img = None
2045
2046     for node_i in nodeinfo:
2047       node = node_i.name
2048       nimg = node_image[node]
2049
2050       if node_i.offline:
2051         if verbose:
2052           feedback_fn("* Skipping offline node %s" % (node,))
2053         n_offline += 1
2054         continue
2055
2056       if node == master_node:
2057         ntype = "master"
2058       elif node_i.master_candidate:
2059         ntype = "master candidate"
2060       elif node_i.drained:
2061         ntype = "drained"
2062         n_drained += 1
2063       else:
2064         ntype = "regular"
2065       if verbose:
2066         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2067
2068       msg = all_nvinfo[node].fail_msg
2069       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2070       if msg:
2071         nimg.rpc_fail = True
2072         continue
2073
2074       nresult = all_nvinfo[node].payload
2075
2076       nimg.call_ok = self._VerifyNode(node_i, nresult)
2077       self._VerifyNodeNetwork(node_i, nresult)
2078       self._VerifyNodeLVM(node_i, nresult, vg_name)
2079       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2080                             master_files)
2081       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
2082       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2083
2084       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2085       self._UpdateNodeInstances(node_i, nresult, nimg)
2086       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2087       self._UpdateNodeOS(node_i, nresult, nimg)
2088       if not nimg.os_fail:
2089         if refos_img is None:
2090           refos_img = nimg
2091         self._VerifyNodeOS(node_i, nimg, refos_img)
2092
2093     feedback_fn("* Verifying instance status")
2094     for instance in instancelist:
2095       if verbose:
2096         feedback_fn("* Verifying instance %s" % instance)
2097       inst_config = instanceinfo[instance]
2098       self._VerifyInstance(instance, inst_config, node_image)
2099       inst_nodes_offline = []
2100
2101       pnode = inst_config.primary_node
2102       pnode_img = node_image[pnode]
2103       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2104                self.ENODERPC, pnode, "instance %s, connection to"
2105                " primary node failed", instance)
2106
2107       if pnode_img.offline:
2108         inst_nodes_offline.append(pnode)
2109
2110       # If the instance is non-redundant we cannot survive losing its primary
2111       # node, so we are not N+1 compliant. On the other hand we have no disk
2112       # templates with more than one secondary so that situation is not well
2113       # supported either.
2114       # FIXME: does not support file-backed instances
2115       if not inst_config.secondary_nodes:
2116         i_non_redundant.append(instance)
2117       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2118                instance, "instance has multiple secondary nodes: %s",
2119                utils.CommaJoin(inst_config.secondary_nodes),
2120                code=self.ETYPE_WARNING)
2121
2122       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2123         i_non_a_balanced.append(instance)
2124
2125       for snode in inst_config.secondary_nodes:
2126         s_img = node_image[snode]
2127         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2128                  "instance %s, connection to secondary node failed", instance)
2129
2130         if s_img.offline:
2131           inst_nodes_offline.append(snode)
2132
2133       # warn that the instance lives on offline nodes
2134       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2135                "instance lives on offline node(s) %s",
2136                utils.CommaJoin(inst_nodes_offline))
2137       # ... or ghost nodes
2138       for node in inst_config.all_nodes:
2139         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2140                  "instance lives on ghost node %s", node)
2141
2142     feedback_fn("* Verifying orphan volumes")
2143     self._VerifyOrphanVolumes(node_vol_should, node_image)
2144
2145     feedback_fn("* Verifying orphan instances")
2146     self._VerifyOrphanInstances(instancelist, node_image)
2147
2148     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2149       feedback_fn("* Verifying N+1 Memory redundancy")
2150       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2151
2152     feedback_fn("* Other Notes")
2153     if i_non_redundant:
2154       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2155                   % len(i_non_redundant))
2156
2157     if i_non_a_balanced:
2158       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2159                   % len(i_non_a_balanced))
2160
2161     if n_offline:
2162       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2163
2164     if n_drained:
2165       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2166
2167     return not self.bad
2168
2169   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2170     """Analyze the post-hooks' result
2171
2172     This method analyses the hook result, handles it, and sends some
2173     nicely-formatted feedback back to the user.
2174
2175     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2176         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2177     @param hooks_results: the results of the multi-node hooks rpc call
2178     @param feedback_fn: function used send feedback back to the caller
2179     @param lu_result: previous Exec result
2180     @return: the new Exec result, based on the previous result
2181         and hook results
2182
2183     """
2184     # We only really run POST phase hooks, and are only interested in
2185     # their results
2186     if phase == constants.HOOKS_PHASE_POST:
2187       # Used to change hooks' output to proper indentation
2188       indent_re = re.compile('^', re.M)
2189       feedback_fn("* Hooks Results")
2190       assert hooks_results, "invalid result from hooks"
2191
2192       for node_name in hooks_results:
2193         res = hooks_results[node_name]
2194         msg = res.fail_msg
2195         test = msg and not res.offline
2196         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2197                       "Communication failure in hooks execution: %s", msg)
2198         if res.offline or msg:
2199           # No need to investigate payload if node is offline or gave an error.
2200           # override manually lu_result here as _ErrorIf only
2201           # overrides self.bad
2202           lu_result = 1
2203           continue
2204         for script, hkr, output in res.payload:
2205           test = hkr == constants.HKR_FAIL
2206           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2207                         "Script %s failed, output:", script)
2208           if test:
2209             output = indent_re.sub('      ', output)
2210             feedback_fn("%s" % output)
2211             lu_result = 0
2212
2213       return lu_result
2214
2215
2216 class LUVerifyDisks(NoHooksLU):
2217   """Verifies the cluster disks status.
2218
2219   """
2220   _OP_REQP = []
2221   REQ_BGL = False
2222
2223   def ExpandNames(self):
2224     self.needed_locks = {
2225       locking.LEVEL_NODE: locking.ALL_SET,
2226       locking.LEVEL_INSTANCE: locking.ALL_SET,
2227     }
2228     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2229
2230   def Exec(self, feedback_fn):
2231     """Verify integrity of cluster disks.
2232
2233     @rtype: tuple of three items
2234     @return: a tuple of (dict of node-to-node_error, list of instances
2235         which need activate-disks, dict of instance: (node, volume) for
2236         missing volumes
2237
2238     """
2239     result = res_nodes, res_instances, res_missing = {}, [], {}
2240
2241     vg_name = self.cfg.GetVGName()
2242     nodes = utils.NiceSort(self.cfg.GetNodeList())
2243     instances = [self.cfg.GetInstanceInfo(name)
2244                  for name in self.cfg.GetInstanceList()]
2245
2246     nv_dict = {}
2247     for inst in instances:
2248       inst_lvs = {}
2249       if (not inst.admin_up or
2250           inst.disk_template not in constants.DTS_NET_MIRROR):
2251         continue
2252       inst.MapLVsByNode(inst_lvs)
2253       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2254       for node, vol_list in inst_lvs.iteritems():
2255         for vol in vol_list:
2256           nv_dict[(node, vol)] = inst
2257
2258     if not nv_dict:
2259       return result
2260
2261     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2262
2263     for node in nodes:
2264       # node_volume
2265       node_res = node_lvs[node]
2266       if node_res.offline:
2267         continue
2268       msg = node_res.fail_msg
2269       if msg:
2270         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2271         res_nodes[node] = msg
2272         continue
2273
2274       lvs = node_res.payload
2275       for lv_name, (_, _, lv_online) in lvs.items():
2276         inst = nv_dict.pop((node, lv_name), None)
2277         if (not lv_online and inst is not None
2278             and inst.name not in res_instances):
2279           res_instances.append(inst.name)
2280
2281     # any leftover items in nv_dict are missing LVs, let's arrange the
2282     # data better
2283     for key, inst in nv_dict.iteritems():
2284       if inst.name not in res_missing:
2285         res_missing[inst.name] = []
2286       res_missing[inst.name].append(key)
2287
2288     return result
2289
2290
2291 class LURepairDiskSizes(NoHooksLU):
2292   """Verifies the cluster disks sizes.
2293
2294   """
2295   _OP_REQP = [("instances", _TListOf(_TNonEmptyString))]
2296   REQ_BGL = False
2297
2298   def ExpandNames(self):
2299     if self.op.instances:
2300       self.wanted_names = []
2301       for name in self.op.instances:
2302         full_name = _ExpandInstanceName(self.cfg, name)
2303         self.wanted_names.append(full_name)
2304       self.needed_locks = {
2305         locking.LEVEL_NODE: [],
2306         locking.LEVEL_INSTANCE: self.wanted_names,
2307         }
2308       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2309     else:
2310       self.wanted_names = None
2311       self.needed_locks = {
2312         locking.LEVEL_NODE: locking.ALL_SET,
2313         locking.LEVEL_INSTANCE: locking.ALL_SET,
2314         }
2315     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2316
2317   def DeclareLocks(self, level):
2318     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2319       self._LockInstancesNodes(primary_only=True)
2320
2321   def CheckPrereq(self):
2322     """Check prerequisites.
2323
2324     This only checks the optional instance list against the existing names.
2325
2326     """
2327     if self.wanted_names is None:
2328       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2329
2330     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2331                              in self.wanted_names]
2332
2333   def _EnsureChildSizes(self, disk):
2334     """Ensure children of the disk have the needed disk size.
2335
2336     This is valid mainly for DRBD8 and fixes an issue where the
2337     children have smaller disk size.
2338
2339     @param disk: an L{ganeti.objects.Disk} object
2340
2341     """
2342     if disk.dev_type == constants.LD_DRBD8:
2343       assert disk.children, "Empty children for DRBD8?"
2344       fchild = disk.children[0]
2345       mismatch = fchild.size < disk.size
2346       if mismatch:
2347         self.LogInfo("Child disk has size %d, parent %d, fixing",
2348                      fchild.size, disk.size)
2349         fchild.size = disk.size
2350
2351       # and we recurse on this child only, not on the metadev
2352       return self._EnsureChildSizes(fchild) or mismatch
2353     else:
2354       return False
2355
2356   def Exec(self, feedback_fn):
2357     """Verify the size of cluster disks.
2358
2359     """
2360     # TODO: check child disks too
2361     # TODO: check differences in size between primary/secondary nodes
2362     per_node_disks = {}
2363     for instance in self.wanted_instances:
2364       pnode = instance.primary_node
2365       if pnode not in per_node_disks:
2366         per_node_disks[pnode] = []
2367       for idx, disk in enumerate(instance.disks):
2368         per_node_disks[pnode].append((instance, idx, disk))
2369
2370     changed = []
2371     for node, dskl in per_node_disks.items():
2372       newl = [v[2].Copy() for v in dskl]
2373       for dsk in newl:
2374         self.cfg.SetDiskID(dsk, node)
2375       result = self.rpc.call_blockdev_getsizes(node, newl)
2376       if result.fail_msg:
2377         self.LogWarning("Failure in blockdev_getsizes call to node"
2378                         " %s, ignoring", node)
2379         continue
2380       if len(result.data) != len(dskl):
2381         self.LogWarning("Invalid result from node %s, ignoring node results",
2382                         node)
2383         continue
2384       for ((instance, idx, disk), size) in zip(dskl, result.data):
2385         if size is None:
2386           self.LogWarning("Disk %d of instance %s did not return size"
2387                           " information, ignoring", idx, instance.name)
2388           continue
2389         if not isinstance(size, (int, long)):
2390           self.LogWarning("Disk %d of instance %s did not return valid"
2391                           " size information, ignoring", idx, instance.name)
2392           continue
2393         size = size >> 20
2394         if size != disk.size:
2395           self.LogInfo("Disk %d of instance %s has mismatched size,"
2396                        " correcting: recorded %d, actual %d", idx,
2397                        instance.name, disk.size, size)
2398           disk.size = size
2399           self.cfg.Update(instance, feedback_fn)
2400           changed.append((instance.name, idx, size))
2401         if self._EnsureChildSizes(disk):
2402           self.cfg.Update(instance, feedback_fn)
2403           changed.append((instance.name, idx, disk.size))
2404     return changed
2405
2406
2407 class LURenameCluster(LogicalUnit):
2408   """Rename the cluster.
2409
2410   """
2411   HPATH = "cluster-rename"
2412   HTYPE = constants.HTYPE_CLUSTER
2413   _OP_REQP = [("name", _TNonEmptyString)]
2414
2415   def BuildHooksEnv(self):
2416     """Build hooks env.
2417
2418     """
2419     env = {
2420       "OP_TARGET": self.cfg.GetClusterName(),
2421       "NEW_NAME": self.op.name,
2422       }
2423     mn = self.cfg.GetMasterNode()
2424     all_nodes = self.cfg.GetNodeList()
2425     return env, [mn], all_nodes
2426
2427   def CheckPrereq(self):
2428     """Verify that the passed name is a valid one.
2429
2430     """
2431     hostname = utils.GetHostInfo(self.op.name)
2432
2433     new_name = hostname.name
2434     self.ip = new_ip = hostname.ip
2435     old_name = self.cfg.GetClusterName()
2436     old_ip = self.cfg.GetMasterIP()
2437     if new_name == old_name and new_ip == old_ip:
2438       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2439                                  " cluster has changed",
2440                                  errors.ECODE_INVAL)
2441     if new_ip != old_ip:
2442       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2443         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2444                                    " reachable on the network. Aborting." %
2445                                    new_ip, errors.ECODE_NOTUNIQUE)
2446
2447     self.op.name = new_name
2448
2449   def Exec(self, feedback_fn):
2450     """Rename the cluster.
2451
2452     """
2453     clustername = self.op.name
2454     ip = self.ip
2455
2456     # shutdown the master IP
2457     master = self.cfg.GetMasterNode()
2458     result = self.rpc.call_node_stop_master(master, False)
2459     result.Raise("Could not disable the master role")
2460
2461     try:
2462       cluster = self.cfg.GetClusterInfo()
2463       cluster.cluster_name = clustername
2464       cluster.master_ip = ip
2465       self.cfg.Update(cluster, feedback_fn)
2466
2467       # update the known hosts file
2468       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2469       node_list = self.cfg.GetNodeList()
2470       try:
2471         node_list.remove(master)
2472       except ValueError:
2473         pass
2474       result = self.rpc.call_upload_file(node_list,
2475                                          constants.SSH_KNOWN_HOSTS_FILE)
2476       for to_node, to_result in result.iteritems():
2477         msg = to_result.fail_msg
2478         if msg:
2479           msg = ("Copy of file %s to node %s failed: %s" %
2480                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2481           self.proc.LogWarning(msg)
2482
2483     finally:
2484       result = self.rpc.call_node_start_master(master, False, False)
2485       msg = result.fail_msg
2486       if msg:
2487         self.LogWarning("Could not re-enable the master role on"
2488                         " the master, please restart manually: %s", msg)
2489
2490
2491 def _RecursiveCheckIfLVMBased(disk):
2492   """Check if the given disk or its children are lvm-based.
2493
2494   @type disk: L{objects.Disk}
2495   @param disk: the disk to check
2496   @rtype: boolean
2497   @return: boolean indicating whether a LD_LV dev_type was found or not
2498
2499   """
2500   if disk.children:
2501     for chdisk in disk.children:
2502       if _RecursiveCheckIfLVMBased(chdisk):
2503         return True
2504   return disk.dev_type == constants.LD_LV
2505
2506
2507 class LUSetClusterParams(LogicalUnit):
2508   """Change the parameters of the cluster.
2509
2510   """
2511   HPATH = "cluster-modify"
2512   HTYPE = constants.HTYPE_CLUSTER
2513   _OP_REQP = [
2514     ("hvparams", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2515     ("os_hvp", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2516     ("osparams", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2517     ("enabled_hypervisors",
2518      _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2519     ]
2520   _OP_DEFS = [
2521     ("candidate_pool_size", None),
2522     ("uid_pool", None),
2523     ("add_uids", None),
2524     ("remove_uids", None),
2525     ("hvparams", None),
2526     ("os_hvp", None),
2527     ]
2528   REQ_BGL = False
2529
2530   def CheckArguments(self):
2531     """Check parameters
2532
2533     """
2534     if self.op.candidate_pool_size is not None:
2535       try:
2536         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2537       except (ValueError, TypeError), err:
2538         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2539                                    str(err), errors.ECODE_INVAL)
2540       if self.op.candidate_pool_size < 1:
2541         raise errors.OpPrereqError("At least one master candidate needed",
2542                                    errors.ECODE_INVAL)
2543
2544     _CheckBooleanOpField(self.op, "maintain_node_health")
2545
2546     if self.op.uid_pool:
2547       uidpool.CheckUidPool(self.op.uid_pool)
2548
2549     if self.op.add_uids:
2550       uidpool.CheckUidPool(self.op.add_uids)
2551
2552     if self.op.remove_uids:
2553       uidpool.CheckUidPool(self.op.remove_uids)
2554
2555   def ExpandNames(self):
2556     # FIXME: in the future maybe other cluster params won't require checking on
2557     # all nodes to be modified.
2558     self.needed_locks = {
2559       locking.LEVEL_NODE: locking.ALL_SET,
2560     }
2561     self.share_locks[locking.LEVEL_NODE] = 1
2562
2563   def BuildHooksEnv(self):
2564     """Build hooks env.
2565
2566     """
2567     env = {
2568       "OP_TARGET": self.cfg.GetClusterName(),
2569       "NEW_VG_NAME": self.op.vg_name,
2570       }
2571     mn = self.cfg.GetMasterNode()
2572     return env, [mn], [mn]
2573
2574   def CheckPrereq(self):
2575     """Check prerequisites.
2576
2577     This checks whether the given params don't conflict and
2578     if the given volume group is valid.
2579
2580     """
2581     if self.op.vg_name is not None and not self.op.vg_name:
2582       instances = self.cfg.GetAllInstancesInfo().values()
2583       for inst in instances:
2584         for disk in inst.disks:
2585           if _RecursiveCheckIfLVMBased(disk):
2586             raise errors.OpPrereqError("Cannot disable lvm storage while"
2587                                        " lvm-based instances exist",
2588                                        errors.ECODE_INVAL)
2589
2590     node_list = self.acquired_locks[locking.LEVEL_NODE]
2591
2592     # if vg_name not None, checks given volume group on all nodes
2593     if self.op.vg_name:
2594       vglist = self.rpc.call_vg_list(node_list)
2595       for node in node_list:
2596         msg = vglist[node].fail_msg
2597         if msg:
2598           # ignoring down node
2599           self.LogWarning("Error while gathering data on node %s"
2600                           " (ignoring node): %s", node, msg)
2601           continue
2602         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2603                                               self.op.vg_name,
2604                                               constants.MIN_VG_SIZE)
2605         if vgstatus:
2606           raise errors.OpPrereqError("Error on node '%s': %s" %
2607                                      (node, vgstatus), errors.ECODE_ENVIRON)
2608
2609     self.cluster = cluster = self.cfg.GetClusterInfo()
2610     # validate params changes
2611     if self.op.beparams:
2612       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2613       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2614
2615     if self.op.nicparams:
2616       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2617       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2618       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2619       nic_errors = []
2620
2621       # check all instances for consistency
2622       for instance in self.cfg.GetAllInstancesInfo().values():
2623         for nic_idx, nic in enumerate(instance.nics):
2624           params_copy = copy.deepcopy(nic.nicparams)
2625           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2626
2627           # check parameter syntax
2628           try:
2629             objects.NIC.CheckParameterSyntax(params_filled)
2630           except errors.ConfigurationError, err:
2631             nic_errors.append("Instance %s, nic/%d: %s" %
2632                               (instance.name, nic_idx, err))
2633
2634           # if we're moving instances to routed, check that they have an ip
2635           target_mode = params_filled[constants.NIC_MODE]
2636           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2637             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2638                               (instance.name, nic_idx))
2639       if nic_errors:
2640         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2641                                    "\n".join(nic_errors))
2642
2643     # hypervisor list/parameters
2644     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2645     if self.op.hvparams:
2646       for hv_name, hv_dict in self.op.hvparams.items():
2647         if hv_name not in self.new_hvparams:
2648           self.new_hvparams[hv_name] = hv_dict
2649         else:
2650           self.new_hvparams[hv_name].update(hv_dict)
2651
2652     # os hypervisor parameters
2653     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2654     if self.op.os_hvp:
2655       for os_name, hvs in self.op.os_hvp.items():
2656         if os_name not in self.new_os_hvp:
2657           self.new_os_hvp[os_name] = hvs
2658         else:
2659           for hv_name, hv_dict in hvs.items():
2660             if hv_name not in self.new_os_hvp[os_name]:
2661               self.new_os_hvp[os_name][hv_name] = hv_dict
2662             else:
2663               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2664
2665     # os parameters
2666     self.new_osp = objects.FillDict(cluster.osparams, {})
2667     if self.op.osparams:
2668       for os_name, osp in self.op.osparams.items():
2669         if os_name not in self.new_osp:
2670           self.new_osp[os_name] = {}
2671
2672         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2673                                                   use_none=True)
2674
2675         if not self.new_osp[os_name]:
2676           # we removed all parameters
2677           del self.new_osp[os_name]
2678         else:
2679           # check the parameter validity (remote check)
2680           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2681                          os_name, self.new_osp[os_name])
2682
2683     # changes to the hypervisor list
2684     if self.op.enabled_hypervisors is not None:
2685       self.hv_list = self.op.enabled_hypervisors
2686       for hv in self.hv_list:
2687         # if the hypervisor doesn't already exist in the cluster
2688         # hvparams, we initialize it to empty, and then (in both
2689         # cases) we make sure to fill the defaults, as we might not
2690         # have a complete defaults list if the hypervisor wasn't
2691         # enabled before
2692         if hv not in new_hvp:
2693           new_hvp[hv] = {}
2694         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2695         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2696     else:
2697       self.hv_list = cluster.enabled_hypervisors
2698
2699     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2700       # either the enabled list has changed, or the parameters have, validate
2701       for hv_name, hv_params in self.new_hvparams.items():
2702         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2703             (self.op.enabled_hypervisors and
2704              hv_name in self.op.enabled_hypervisors)):
2705           # either this is a new hypervisor, or its parameters have changed
2706           hv_class = hypervisor.GetHypervisor(hv_name)
2707           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2708           hv_class.CheckParameterSyntax(hv_params)
2709           _CheckHVParams(self, node_list, hv_name, hv_params)
2710
2711     if self.op.os_hvp:
2712       # no need to check any newly-enabled hypervisors, since the
2713       # defaults have already been checked in the above code-block
2714       for os_name, os_hvp in self.new_os_hvp.items():
2715         for hv_name, hv_params in os_hvp.items():
2716           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2717           # we need to fill in the new os_hvp on top of the actual hv_p
2718           cluster_defaults = self.new_hvparams.get(hv_name, {})
2719           new_osp = objects.FillDict(cluster_defaults, hv_params)
2720           hv_class = hypervisor.GetHypervisor(hv_name)
2721           hv_class.CheckParameterSyntax(new_osp)
2722           _CheckHVParams(self, node_list, hv_name, new_osp)
2723
2724
2725   def Exec(self, feedback_fn):
2726     """Change the parameters of the cluster.
2727
2728     """
2729     if self.op.vg_name is not None:
2730       new_volume = self.op.vg_name
2731       if not new_volume:
2732         new_volume = None
2733       if new_volume != self.cfg.GetVGName():
2734         self.cfg.SetVGName(new_volume)
2735       else:
2736         feedback_fn("Cluster LVM configuration already in desired"
2737                     " state, not changing")
2738     if self.op.hvparams:
2739       self.cluster.hvparams = self.new_hvparams
2740     if self.op.os_hvp:
2741       self.cluster.os_hvp = self.new_os_hvp
2742     if self.op.enabled_hypervisors is not None:
2743       self.cluster.hvparams = self.new_hvparams
2744       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2745     if self.op.beparams:
2746       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2747     if self.op.nicparams:
2748       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2749     if self.op.osparams:
2750       self.cluster.osparams = self.new_osp
2751
2752     if self.op.candidate_pool_size is not None:
2753       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2754       # we need to update the pool size here, otherwise the save will fail
2755       _AdjustCandidatePool(self, [])
2756
2757     if self.op.maintain_node_health is not None:
2758       self.cluster.maintain_node_health = self.op.maintain_node_health
2759
2760     if self.op.add_uids is not None:
2761       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2762
2763     if self.op.remove_uids is not None:
2764       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2765
2766     if self.op.uid_pool is not None:
2767       self.cluster.uid_pool = self.op.uid_pool
2768
2769     self.cfg.Update(self.cluster, feedback_fn)
2770
2771
2772 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2773   """Distribute additional files which are part of the cluster configuration.
2774
2775   ConfigWriter takes care of distributing the config and ssconf files, but
2776   there are more files which should be distributed to all nodes. This function
2777   makes sure those are copied.
2778
2779   @param lu: calling logical unit
2780   @param additional_nodes: list of nodes not in the config to distribute to
2781
2782   """
2783   # 1. Gather target nodes
2784   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2785   dist_nodes = lu.cfg.GetOnlineNodeList()
2786   if additional_nodes is not None:
2787     dist_nodes.extend(additional_nodes)
2788   if myself.name in dist_nodes:
2789     dist_nodes.remove(myself.name)
2790
2791   # 2. Gather files to distribute
2792   dist_files = set([constants.ETC_HOSTS,
2793                     constants.SSH_KNOWN_HOSTS_FILE,
2794                     constants.RAPI_CERT_FILE,
2795                     constants.RAPI_USERS_FILE,
2796                     constants.CONFD_HMAC_KEY,
2797                     constants.CLUSTER_DOMAIN_SECRET_FILE,
2798                    ])
2799
2800   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2801   for hv_name in enabled_hypervisors:
2802     hv_class = hypervisor.GetHypervisor(hv_name)
2803     dist_files.update(hv_class.GetAncillaryFiles())
2804
2805   # 3. Perform the files upload
2806   for fname in dist_files:
2807     if os.path.exists(fname):
2808       result = lu.rpc.call_upload_file(dist_nodes, fname)
2809       for to_node, to_result in result.items():
2810         msg = to_result.fail_msg
2811         if msg:
2812           msg = ("Copy of file %s to node %s failed: %s" %
2813                  (fname, to_node, msg))
2814           lu.proc.LogWarning(msg)
2815
2816
2817 class LURedistributeConfig(NoHooksLU):
2818   """Force the redistribution of cluster configuration.
2819
2820   This is a very simple LU.
2821
2822   """
2823   _OP_REQP = []
2824   REQ_BGL = False
2825
2826   def ExpandNames(self):
2827     self.needed_locks = {
2828       locking.LEVEL_NODE: locking.ALL_SET,
2829     }
2830     self.share_locks[locking.LEVEL_NODE] = 1
2831
2832   def Exec(self, feedback_fn):
2833     """Redistribute the configuration.
2834
2835     """
2836     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2837     _RedistributeAncillaryFiles(self)
2838
2839
2840 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2841   """Sleep and poll for an instance's disk to sync.
2842
2843   """
2844   if not instance.disks or disks is not None and not disks:
2845     return True
2846
2847   disks = _ExpandCheckDisks(instance, disks)
2848
2849   if not oneshot:
2850     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2851
2852   node = instance.primary_node
2853
2854   for dev in disks:
2855     lu.cfg.SetDiskID(dev, node)
2856
2857   # TODO: Convert to utils.Retry
2858
2859   retries = 0
2860   degr_retries = 10 # in seconds, as we sleep 1 second each time
2861   while True:
2862     max_time = 0
2863     done = True
2864     cumul_degraded = False
2865     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2866     msg = rstats.fail_msg
2867     if msg:
2868       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2869       retries += 1
2870       if retries >= 10:
2871         raise errors.RemoteError("Can't contact node %s for mirror data,"
2872                                  " aborting." % node)
2873       time.sleep(6)
2874       continue
2875     rstats = rstats.payload
2876     retries = 0
2877     for i, mstat in enumerate(rstats):
2878       if mstat is None:
2879         lu.LogWarning("Can't compute data for node %s/%s",
2880                            node, disks[i].iv_name)
2881         continue
2882
2883       cumul_degraded = (cumul_degraded or
2884                         (mstat.is_degraded and mstat.sync_percent is None))
2885       if mstat.sync_percent is not None:
2886         done = False
2887         if mstat.estimated_time is not None:
2888           rem_time = ("%s remaining (estimated)" %
2889                       utils.FormatSeconds(mstat.estimated_time))
2890           max_time = mstat.estimated_time
2891         else:
2892           rem_time = "no time estimate"
2893         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2894                         (disks[i].iv_name, mstat.sync_percent, rem_time))
2895
2896     # if we're done but degraded, let's do a few small retries, to
2897     # make sure we see a stable and not transient situation; therefore
2898     # we force restart of the loop
2899     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2900       logging.info("Degraded disks found, %d retries left", degr_retries)
2901       degr_retries -= 1
2902       time.sleep(1)
2903       continue
2904
2905     if done or oneshot:
2906       break
2907
2908     time.sleep(min(60, max_time))
2909
2910   if done:
2911     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2912   return not cumul_degraded
2913
2914
2915 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2916   """Check that mirrors are not degraded.
2917
2918   The ldisk parameter, if True, will change the test from the
2919   is_degraded attribute (which represents overall non-ok status for
2920   the device(s)) to the ldisk (representing the local storage status).
2921
2922   """
2923   lu.cfg.SetDiskID(dev, node)
2924
2925   result = True
2926
2927   if on_primary or dev.AssembleOnSecondary():
2928     rstats = lu.rpc.call_blockdev_find(node, dev)
2929     msg = rstats.fail_msg
2930     if msg:
2931       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2932       result = False
2933     elif not rstats.payload:
2934       lu.LogWarning("Can't find disk on node %s", node)
2935       result = False
2936     else:
2937       if ldisk:
2938         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2939       else:
2940         result = result and not rstats.payload.is_degraded
2941
2942   if dev.children:
2943     for child in dev.children:
2944       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2945
2946   return result
2947
2948
2949 class LUDiagnoseOS(NoHooksLU):
2950   """Logical unit for OS diagnose/query.
2951
2952   """
2953   _OP_REQP = [
2954     ("output_fields", _TListOf(_TNonEmptyString)),
2955     ("names", _TListOf(_TNonEmptyString)),
2956     ]
2957   REQ_BGL = False
2958   _FIELDS_STATIC = utils.FieldSet()
2959   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2960                                    "parameters", "api_versions")
2961
2962   def CheckArguments(self):
2963     if self.op.names:
2964       raise errors.OpPrereqError("Selective OS query not supported",
2965                                  errors.ECODE_INVAL)
2966
2967     _CheckOutputFields(static=self._FIELDS_STATIC,
2968                        dynamic=self._FIELDS_DYNAMIC,
2969                        selected=self.op.output_fields)
2970
2971   def ExpandNames(self):
2972     # Lock all nodes, in shared mode
2973     # Temporary removal of locks, should be reverted later
2974     # TODO: reintroduce locks when they are lighter-weight
2975     self.needed_locks = {}
2976     #self.share_locks[locking.LEVEL_NODE] = 1
2977     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2978
2979   @staticmethod
2980   def _DiagnoseByOS(rlist):
2981     """Remaps a per-node return list into an a per-os per-node dictionary
2982
2983     @param rlist: a map with node names as keys and OS objects as values
2984
2985     @rtype: dict
2986     @return: a dictionary with osnames as keys and as value another
2987         map, with nodes as keys and tuples of (path, status, diagnose,
2988         variants, parameters, api_versions) as values, eg::
2989
2990           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2991                                      (/srv/..., False, "invalid api")],
2992                            "node2": [(/srv/..., True, "", [], [])]}
2993           }
2994
2995     """
2996     all_os = {}
2997     # we build here the list of nodes that didn't fail the RPC (at RPC
2998     # level), so that nodes with a non-responding node daemon don't
2999     # make all OSes invalid
3000     good_nodes = [node_name for node_name in rlist
3001                   if not rlist[node_name].fail_msg]
3002     for node_name, nr in rlist.items():
3003       if nr.fail_msg or not nr.payload:
3004         continue
3005       for (name, path, status, diagnose, variants,
3006            params, api_versions) in nr.payload:
3007         if name not in all_os:
3008           # build a list of nodes for this os containing empty lists
3009           # for each node in node_list
3010           all_os[name] = {}
3011           for nname in good_nodes:
3012             all_os[name][nname] = []
3013         # convert params from [name, help] to (name, help)
3014         params = [tuple(v) for v in params]
3015         all_os[name][node_name].append((path, status, diagnose,
3016                                         variants, params, api_versions))
3017     return all_os
3018
3019   def Exec(self, feedback_fn):
3020     """Compute the list of OSes.
3021
3022     """
3023     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3024     node_data = self.rpc.call_os_diagnose(valid_nodes)
3025     pol = self._DiagnoseByOS(node_data)
3026     output = []
3027
3028     for os_name, os_data in pol.items():
3029       row = []
3030       valid = True
3031       (variants, params, api_versions) = null_state = (set(), set(), set())
3032       for idx, osl in enumerate(os_data.values()):
3033         valid = bool(valid and osl and osl[0][1])
3034         if not valid:
3035           (variants, params, api_versions) = null_state
3036           break
3037         node_variants, node_params, node_api = osl[0][3:6]
3038         if idx == 0: # first entry
3039           variants = set(node_variants)
3040           params = set(node_params)
3041           api_versions = set(node_api)
3042         else: # keep consistency
3043           variants.intersection_update(node_variants)
3044           params.intersection_update(node_params)
3045           api_versions.intersection_update(node_api)
3046
3047       for field in self.op.output_fields:
3048         if field == "name":
3049           val = os_name
3050         elif field == "valid":
3051           val = valid
3052         elif field == "node_status":
3053           # this is just a copy of the dict
3054           val = {}
3055           for node_name, nos_list in os_data.items():
3056             val[node_name] = nos_list
3057         elif field == "variants":
3058           val = list(variants)
3059         elif field == "parameters":
3060           val = list(params)
3061         elif field == "api_versions":
3062           val = list(api_versions)
3063         else:
3064           raise errors.ParameterError(field)
3065         row.append(val)
3066       output.append(row)
3067
3068     return output
3069
3070
3071 class LURemoveNode(LogicalUnit):
3072   """Logical unit for removing a node.
3073
3074   """
3075   HPATH = "node-remove"
3076   HTYPE = constants.HTYPE_NODE
3077   _OP_REQP = [("node_name", _TNonEmptyString)]
3078
3079   def BuildHooksEnv(self):
3080     """Build hooks env.
3081
3082     This doesn't run on the target node in the pre phase as a failed
3083     node would then be impossible to remove.
3084
3085     """
3086     env = {
3087       "OP_TARGET": self.op.node_name,
3088       "NODE_NAME": self.op.node_name,
3089       }
3090     all_nodes = self.cfg.GetNodeList()
3091     try:
3092       all_nodes.remove(self.op.node_name)
3093     except ValueError:
3094       logging.warning("Node %s which is about to be removed not found"
3095                       " in the all nodes list", self.op.node_name)
3096     return env, all_nodes, all_nodes
3097
3098   def CheckPrereq(self):
3099     """Check prerequisites.
3100
3101     This checks:
3102      - the node exists in the configuration
3103      - it does not have primary or secondary instances
3104      - it's not the master
3105
3106     Any errors are signaled by raising errors.OpPrereqError.
3107
3108     """
3109     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3110     node = self.cfg.GetNodeInfo(self.op.node_name)
3111     assert node is not None
3112
3113     instance_list = self.cfg.GetInstanceList()
3114
3115     masternode = self.cfg.GetMasterNode()
3116     if node.name == masternode:
3117       raise errors.OpPrereqError("Node is the master node,"
3118                                  " you need to failover first.",
3119                                  errors.ECODE_INVAL)
3120
3121     for instance_name in instance_list:
3122       instance = self.cfg.GetInstanceInfo(instance_name)
3123       if node.name in instance.all_nodes:
3124         raise errors.OpPrereqError("Instance %s is still running on the node,"
3125                                    " please remove first." % instance_name,
3126                                    errors.ECODE_INVAL)
3127     self.op.node_name = node.name
3128     self.node = node
3129
3130   def Exec(self, feedback_fn):
3131     """Removes the node from the cluster.
3132
3133     """
3134     node = self.node
3135     logging.info("Stopping the node daemon and removing configs from node %s",
3136                  node.name)
3137
3138     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3139
3140     # Promote nodes to master candidate as needed
3141     _AdjustCandidatePool(self, exceptions=[node.name])
3142     self.context.RemoveNode(node.name)
3143
3144     # Run post hooks on the node before it's removed
3145     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3146     try:
3147       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3148     except:
3149       # pylint: disable-msg=W0702
3150       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3151
3152     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3153     msg = result.fail_msg
3154     if msg:
3155       self.LogWarning("Errors encountered on the remote node while leaving"
3156                       " the cluster: %s", msg)
3157
3158     # Remove node from our /etc/hosts
3159     if self.cfg.GetClusterInfo().modify_etc_hosts:
3160       # FIXME: this should be done via an rpc call to node daemon
3161       utils.RemoveHostFromEtcHosts(node.name)
3162       _RedistributeAncillaryFiles(self)
3163
3164
3165 class LUQueryNodes(NoHooksLU):
3166   """Logical unit for querying nodes.
3167
3168   """
3169   # pylint: disable-msg=W0142
3170   _OP_REQP = [
3171     ("output_fields", _TListOf(_TNonEmptyString)),
3172     ("names", _TListOf(_TNonEmptyString)),
3173     ("use_locking", _TBool),
3174     ]
3175   REQ_BGL = False
3176
3177   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3178                     "master_candidate", "offline", "drained"]
3179
3180   _FIELDS_DYNAMIC = utils.FieldSet(
3181     "dtotal", "dfree",
3182     "mtotal", "mnode", "mfree",
3183     "bootid",
3184     "ctotal", "cnodes", "csockets",
3185     )
3186
3187   _FIELDS_STATIC = utils.FieldSet(*[
3188     "pinst_cnt", "sinst_cnt",
3189     "pinst_list", "sinst_list",
3190     "pip", "sip", "tags",
3191     "master",
3192     "role"] + _SIMPLE_FIELDS
3193     )
3194
3195   def CheckArguments(self):
3196     _CheckOutputFields(static=self._FIELDS_STATIC,
3197                        dynamic=self._FIELDS_DYNAMIC,
3198                        selected=self.op.output_fields)
3199
3200   def ExpandNames(self):
3201     self.needed_locks = {}
3202     self.share_locks[locking.LEVEL_NODE] = 1
3203
3204     if self.op.names:
3205       self.wanted = _GetWantedNodes(self, self.op.names)
3206     else:
3207       self.wanted = locking.ALL_SET
3208
3209     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3210     self.do_locking = self.do_node_query and self.op.use_locking
3211     if self.do_locking:
3212       # if we don't request only static fields, we need to lock the nodes
3213       self.needed_locks[locking.LEVEL_NODE] = self.wanted
3214
3215   def Exec(self, feedback_fn):
3216     """Computes the list of nodes and their attributes.
3217
3218     """
3219     all_info = self.cfg.GetAllNodesInfo()
3220     if self.do_locking:
3221       nodenames = self.acquired_locks[locking.LEVEL_NODE]
3222     elif self.wanted != locking.ALL_SET:
3223       nodenames = self.wanted
3224       missing = set(nodenames).difference(all_info.keys())
3225       if missing:
3226         raise errors.OpExecError(
3227           "Some nodes were removed before retrieving their data: %s" % missing)
3228     else:
3229       nodenames = all_info.keys()
3230
3231     nodenames = utils.NiceSort(nodenames)
3232     nodelist = [all_info[name] for name in nodenames]
3233
3234     # begin data gathering
3235
3236     if self.do_node_query:
3237       live_data = {}
3238       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3239                                           self.cfg.GetHypervisorType())
3240       for name in nodenames:
3241         nodeinfo = node_data[name]
3242         if not nodeinfo.fail_msg and nodeinfo.payload:
3243           nodeinfo = nodeinfo.payload
3244           fn = utils.TryConvert
3245           live_data[name] = {
3246             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3247             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3248             "mfree": fn(int, nodeinfo.get('memory_free', None)),
3249             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3250             "dfree": fn(int, nodeinfo.get('vg_free', None)),
3251             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3252             "bootid": nodeinfo.get('bootid', None),
3253             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3254             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3255             }
3256         else:
3257           live_data[name] = {}
3258     else:
3259       live_data = dict.fromkeys(nodenames, {})
3260
3261     node_to_primary = dict([(name, set()) for name in nodenames])
3262     node_to_secondary = dict([(name, set()) for name in nodenames])
3263
3264     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3265                              "sinst_cnt", "sinst_list"))
3266     if inst_fields & frozenset(self.op.output_fields):
3267       inst_data = self.cfg.GetAllInstancesInfo()
3268
3269       for inst in inst_data.values():
3270         if inst.primary_node in node_to_primary:
3271           node_to_primary[inst.primary_node].add(inst.name)
3272         for secnode in inst.secondary_nodes:
3273           if secnode in node_to_secondary:
3274             node_to_secondary[secnode].add(inst.name)
3275
3276     master_node = self.cfg.GetMasterNode()
3277
3278     # end data gathering
3279
3280     output = []
3281     for node in nodelist:
3282       node_output = []
3283       for field in self.op.output_fields:
3284         if field in self._SIMPLE_FIELDS:
3285           val = getattr(node, field)
3286         elif field == "pinst_list":
3287           val = list(node_to_primary[node.name])
3288         elif field == "sinst_list":
3289           val = list(node_to_secondary[node.name])
3290         elif field == "pinst_cnt":
3291           val = len(node_to_primary[node.name])
3292         elif field == "sinst_cnt":
3293           val = len(node_to_secondary[node.name])
3294         elif field == "pip":
3295           val = node.primary_ip
3296         elif field == "sip":
3297           val = node.secondary_ip
3298         elif field == "tags":
3299           val = list(node.GetTags())
3300         elif field == "master":
3301           val = node.name == master_node
3302         elif self._FIELDS_DYNAMIC.Matches(field):
3303           val = live_data[node.name].get(field, None)
3304         elif field == "role":
3305           if node.name == master_node:
3306             val = "M"
3307           elif node.master_candidate:
3308             val = "C"
3309           elif node.drained:
3310             val = "D"
3311           elif node.offline:
3312             val = "O"
3313           else:
3314             val = "R"
3315         else:
3316           raise errors.ParameterError(field)
3317         node_output.append(val)
3318       output.append(node_output)
3319
3320     return output
3321
3322
3323 class LUQueryNodeVolumes(NoHooksLU):
3324   """Logical unit for getting volumes on node(s).
3325
3326   """
3327   _OP_REQP = [
3328     ("nodes", _TListOf(_TNonEmptyString)),
3329     ("output_fields", _TListOf(_TNonEmptyString)),
3330     ]
3331   REQ_BGL = False
3332   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3333   _FIELDS_STATIC = utils.FieldSet("node")
3334
3335   def CheckArguments(self):
3336     _CheckOutputFields(static=self._FIELDS_STATIC,
3337                        dynamic=self._FIELDS_DYNAMIC,
3338                        selected=self.op.output_fields)
3339
3340   def ExpandNames(self):
3341     self.needed_locks = {}
3342     self.share_locks[locking.LEVEL_NODE] = 1
3343     if not self.op.nodes:
3344       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3345     else:
3346       self.needed_locks[locking.LEVEL_NODE] = \
3347         _GetWantedNodes(self, self.op.nodes)
3348
3349   def Exec(self, feedback_fn):
3350     """Computes the list of nodes and their attributes.
3351
3352     """
3353     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3354     volumes = self.rpc.call_node_volumes(nodenames)
3355
3356     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3357              in self.cfg.GetInstanceList()]
3358
3359     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3360
3361     output = []
3362     for node in nodenames:
3363       nresult = volumes[node]
3364       if nresult.offline:
3365         continue
3366       msg = nresult.fail_msg
3367       if msg:
3368         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3369         continue
3370
3371       node_vols = nresult.payload[:]
3372       node_vols.sort(key=lambda vol: vol['dev'])
3373
3374       for vol in node_vols:
3375         node_output = []
3376         for field in self.op.output_fields:
3377           if field == "node":
3378             val = node
3379           elif field == "phys":
3380             val = vol['dev']
3381           elif field == "vg":
3382             val = vol['vg']
3383           elif field == "name":
3384             val = vol['name']
3385           elif field == "size":
3386             val = int(float(vol['size']))
3387           elif field == "instance":
3388             for inst in ilist:
3389               if node not in lv_by_node[inst]:
3390                 continue
3391               if vol['name'] in lv_by_node[inst][node]:
3392                 val = inst.name
3393                 break
3394             else:
3395               val = '-'
3396           else:
3397             raise errors.ParameterError(field)
3398           node_output.append(str(val))
3399
3400         output.append(node_output)
3401
3402     return output
3403
3404
3405 class LUQueryNodeStorage(NoHooksLU):
3406   """Logical unit for getting information on storage units on node(s).
3407
3408   """
3409   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3410   _OP_REQP = [
3411     ("nodes", _TListOf(_TNonEmptyString)),
3412     ("storage_type", _CheckStorageType),
3413     ("output_fields", _TListOf(_TNonEmptyString)),
3414     ]
3415   _OP_DEFS = [("name", None)]
3416   REQ_BGL = False
3417
3418   def CheckArguments(self):
3419     _CheckOutputFields(static=self._FIELDS_STATIC,
3420                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3421                        selected=self.op.output_fields)
3422
3423   def ExpandNames(self):
3424     self.needed_locks = {}
3425     self.share_locks[locking.LEVEL_NODE] = 1
3426
3427     if self.op.nodes:
3428       self.needed_locks[locking.LEVEL_NODE] = \
3429         _GetWantedNodes(self, self.op.nodes)
3430     else:
3431       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3432
3433   def Exec(self, feedback_fn):
3434     """Computes the list of nodes and their attributes.
3435
3436     """
3437     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3438
3439     # Always get name to sort by
3440     if constants.SF_NAME in self.op.output_fields:
3441       fields = self.op.output_fields[:]
3442     else:
3443       fields = [constants.SF_NAME] + self.op.output_fields
3444
3445     # Never ask for node or type as it's only known to the LU
3446     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3447       while extra in fields:
3448         fields.remove(extra)
3449
3450     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3451     name_idx = field_idx[constants.SF_NAME]
3452
3453     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3454     data = self.rpc.call_storage_list(self.nodes,
3455                                       self.op.storage_type, st_args,
3456                                       self.op.name, fields)
3457
3458     result = []
3459
3460     for node in utils.NiceSort(self.nodes):
3461       nresult = data[node]
3462       if nresult.offline:
3463         continue
3464
3465       msg = nresult.fail_msg
3466       if msg:
3467         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3468         continue
3469
3470       rows = dict([(row[name_idx], row) for row in nresult.payload])
3471
3472       for name in utils.NiceSort(rows.keys()):
3473         row = rows[name]
3474
3475         out = []
3476
3477         for field in self.op.output_fields:
3478           if field == constants.SF_NODE:
3479             val = node
3480           elif field == constants.SF_TYPE:
3481             val = self.op.storage_type
3482           elif field in field_idx:
3483             val = row[field_idx[field]]
3484           else:
3485             raise errors.ParameterError(field)
3486
3487           out.append(val)
3488
3489         result.append(out)
3490
3491     return result
3492
3493
3494 class LUModifyNodeStorage(NoHooksLU):
3495   """Logical unit for modifying a storage volume on a node.
3496
3497   """
3498   _OP_REQP = [
3499     ("node_name", _TNonEmptyString),
3500     ("storage_type", _CheckStorageType),
3501     ("name", _TNonEmptyString),
3502     ("changes", _TDict),
3503     ]
3504   REQ_BGL = False
3505
3506   def CheckArguments(self):
3507     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3508
3509     storage_type = self.op.storage_type
3510
3511     try:
3512       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3513     except KeyError:
3514       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3515                                  " modified" % storage_type,
3516                                  errors.ECODE_INVAL)
3517
3518     diff = set(self.op.changes.keys()) - modifiable
3519     if diff:
3520       raise errors.OpPrereqError("The following fields can not be modified for"
3521                                  " storage units of type '%s': %r" %
3522                                  (storage_type, list(diff)),
3523                                  errors.ECODE_INVAL)
3524
3525   def ExpandNames(self):
3526     self.needed_locks = {
3527       locking.LEVEL_NODE: self.op.node_name,
3528       }
3529
3530   def Exec(self, feedback_fn):
3531     """Computes the list of nodes and their attributes.
3532
3533     """
3534     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3535     result = self.rpc.call_storage_modify(self.op.node_name,
3536                                           self.op.storage_type, st_args,
3537                                           self.op.name, self.op.changes)
3538     result.Raise("Failed to modify storage unit '%s' on %s" %
3539                  (self.op.name, self.op.node_name))
3540
3541
3542 class LUAddNode(LogicalUnit):
3543   """Logical unit for adding node to the cluster.
3544
3545   """
3546   HPATH = "node-add"
3547   HTYPE = constants.HTYPE_NODE
3548   _OP_REQP = [
3549     ("node_name", _TNonEmptyString),
3550     ]
3551   _OP_DEFS = [("secondary_ip", None)]
3552
3553   def CheckArguments(self):
3554     # validate/normalize the node name
3555     self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3556
3557   def BuildHooksEnv(self):
3558     """Build hooks env.
3559
3560     This will run on all nodes before, and on all nodes + the new node after.
3561
3562     """
3563     env = {
3564       "OP_TARGET": self.op.node_name,
3565       "NODE_NAME": self.op.node_name,
3566       "NODE_PIP": self.op.primary_ip,
3567       "NODE_SIP": self.op.secondary_ip,
3568       }
3569     nodes_0 = self.cfg.GetNodeList()
3570     nodes_1 = nodes_0 + [self.op.node_name, ]
3571     return env, nodes_0, nodes_1
3572
3573   def CheckPrereq(self):
3574     """Check prerequisites.
3575
3576     This checks:
3577      - the new node is not already in the config
3578      - it is resolvable
3579      - its parameters (single/dual homed) matches the cluster
3580
3581     Any errors are signaled by raising errors.OpPrereqError.
3582
3583     """
3584     node_name = self.op.node_name
3585     cfg = self.cfg
3586
3587     dns_data = utils.GetHostInfo(node_name)
3588
3589     node = dns_data.name
3590     primary_ip = self.op.primary_ip = dns_data.ip
3591     if self.op.secondary_ip is None:
3592       self.op.secondary_ip = primary_ip
3593     if not utils.IsValidIP(self.op.secondary_ip):
3594       raise errors.OpPrereqError("Invalid secondary IP given",
3595                                  errors.ECODE_INVAL)
3596     secondary_ip = self.op.secondary_ip
3597
3598     node_list = cfg.GetNodeList()
3599     if not self.op.readd and node in node_list:
3600       raise errors.OpPrereqError("Node %s is already in the configuration" %
3601                                  node, errors.ECODE_EXISTS)
3602     elif self.op.readd and node not in node_list:
3603       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3604                                  errors.ECODE_NOENT)
3605
3606     self.changed_primary_ip = False
3607
3608     for existing_node_name in node_list:
3609       existing_node = cfg.GetNodeInfo(existing_node_name)
3610
3611       if self.op.readd and node == existing_node_name:
3612         if existing_node.secondary_ip != secondary_ip:
3613           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3614                                      " address configuration as before",
3615                                      errors.ECODE_INVAL)
3616         if existing_node.primary_ip != primary_ip:
3617           self.changed_primary_ip = True
3618
3619         continue
3620
3621       if (existing_node.primary_ip == primary_ip or
3622           existing_node.secondary_ip == primary_ip or
3623           existing_node.primary_ip == secondary_ip or
3624           existing_node.secondary_ip == secondary_ip):
3625         raise errors.OpPrereqError("New node ip address(es) conflict with"
3626                                    " existing node %s" % existing_node.name,
3627                                    errors.ECODE_NOTUNIQUE)
3628
3629     # check that the type of the node (single versus dual homed) is the
3630     # same as for the master
3631     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3632     master_singlehomed = myself.secondary_ip == myself.primary_ip
3633     newbie_singlehomed = secondary_ip == primary_ip
3634     if master_singlehomed != newbie_singlehomed:
3635       if master_singlehomed:
3636         raise errors.OpPrereqError("The master has no private ip but the"
3637                                    " new node has one",
3638                                    errors.ECODE_INVAL)
3639       else:
3640         raise errors.OpPrereqError("The master has a private ip but the"
3641                                    " new node doesn't have one",
3642                                    errors.ECODE_INVAL)
3643
3644     # checks reachability
3645     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3646       raise errors.OpPrereqError("Node not reachable by ping",
3647                                  errors.ECODE_ENVIRON)
3648
3649     if not newbie_singlehomed:
3650       # check reachability from my secondary ip to newbie's secondary ip
3651       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3652                            source=myself.secondary_ip):
3653         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3654                                    " based ping to noded port",
3655                                    errors.ECODE_ENVIRON)
3656
3657     if self.op.readd:
3658       exceptions = [node]
3659     else:
3660       exceptions = []
3661
3662     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3663
3664     if self.op.readd:
3665       self.new_node = self.cfg.GetNodeInfo(node)
3666       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3667     else:
3668       self.new_node = objects.Node(name=node,
3669                                    primary_ip=primary_ip,
3670                                    secondary_ip=secondary_ip,
3671                                    master_candidate=self.master_candidate,
3672                                    offline=False, drained=False)
3673
3674   def Exec(self, feedback_fn):
3675     """Adds the new node to the cluster.
3676
3677     """
3678     new_node = self.new_node
3679     node = new_node.name
3680
3681     # for re-adds, reset the offline/drained/master-candidate flags;
3682     # we need to reset here, otherwise offline would prevent RPC calls
3683     # later in the procedure; this also means that if the re-add
3684     # fails, we are left with a non-offlined, broken node
3685     if self.op.readd:
3686       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3687       self.LogInfo("Readding a node, the offline/drained flags were reset")
3688       # if we demote the node, we do cleanup later in the procedure
3689       new_node.master_candidate = self.master_candidate
3690       if self.changed_primary_ip:
3691         new_node.primary_ip = self.op.primary_ip
3692
3693     # notify the user about any possible mc promotion
3694     if new_node.master_candidate:
3695       self.LogInfo("Node will be a master candidate")
3696
3697     # check connectivity
3698     result = self.rpc.call_version([node])[node]
3699     result.Raise("Can't get version information from node %s" % node)
3700     if constants.PROTOCOL_VERSION == result.payload:
3701       logging.info("Communication to node %s fine, sw version %s match",
3702                    node, result.payload)
3703     else:
3704       raise errors.OpExecError("Version mismatch master version %s,"
3705                                " node version %s" %
3706                                (constants.PROTOCOL_VERSION, result.payload))
3707
3708     # setup ssh on node
3709     if self.cfg.GetClusterInfo().modify_ssh_setup:
3710       logging.info("Copy ssh key to node %s", node)
3711       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3712       keyarray = []
3713       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3714                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3715                   priv_key, pub_key]
3716
3717       for i in keyfiles:
3718         keyarray.append(utils.ReadFile(i))
3719
3720       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3721                                       keyarray[2], keyarray[3], keyarray[4],
3722                                       keyarray[5])
3723       result.Raise("Cannot transfer ssh keys to the new node")
3724
3725     # Add node to our /etc/hosts, and add key to known_hosts
3726     if self.cfg.GetClusterInfo().modify_etc_hosts:
3727       # FIXME: this should be done via an rpc call to node daemon
3728       utils.AddHostToEtcHosts(new_node.name)
3729
3730     if new_node.secondary_ip != new_node.primary_ip:
3731       result = self.rpc.call_node_has_ip_address(new_node.name,
3732                                                  new_node.secondary_ip)
3733       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3734                    prereq=True, ecode=errors.ECODE_ENVIRON)
3735       if not result.payload:
3736         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3737                                  " you gave (%s). Please fix and re-run this"
3738                                  " command." % new_node.secondary_ip)
3739
3740     node_verify_list = [self.cfg.GetMasterNode()]
3741     node_verify_param = {
3742       constants.NV_NODELIST: [node],
3743       # TODO: do a node-net-test as well?
3744     }
3745
3746     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3747                                        self.cfg.GetClusterName())
3748     for verifier in node_verify_list:
3749       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3750       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3751       if nl_payload:
3752         for failed in nl_payload:
3753           feedback_fn("ssh/hostname verification failed"
3754                       " (checking from %s): %s" %
3755                       (verifier, nl_payload[failed]))
3756         raise errors.OpExecError("ssh/hostname verification failed.")
3757
3758     if self.op.readd:
3759       _RedistributeAncillaryFiles(self)
3760       self.context.ReaddNode(new_node)
3761       # make sure we redistribute the config
3762       self.cfg.Update(new_node, feedback_fn)
3763       # and make sure the new node will not have old files around
3764       if not new_node.master_candidate:
3765         result = self.rpc.call_node_demote_from_mc(new_node.name)
3766         msg = result.fail_msg
3767         if msg:
3768           self.LogWarning("Node failed to demote itself from master"
3769                           " candidate status: %s" % msg)
3770     else:
3771       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3772       self.context.AddNode(new_node, self.proc.GetECId())
3773
3774
3775 class LUSetNodeParams(LogicalUnit):
3776   """Modifies the parameters of a node.
3777
3778   """
3779   HPATH = "node-modify"
3780   HTYPE = constants.HTYPE_NODE
3781   _OP_REQP = [("node_name", _TNonEmptyString)]
3782   REQ_BGL = False
3783
3784   def CheckArguments(self):
3785     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3786     _CheckBooleanOpField(self.op, 'master_candidate')
3787     _CheckBooleanOpField(self.op, 'offline')
3788     _CheckBooleanOpField(self.op, 'drained')
3789     _CheckBooleanOpField(self.op, 'auto_promote')
3790     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3791     if all_mods.count(None) == 3:
3792       raise errors.OpPrereqError("Please pass at least one modification",
3793                                  errors.ECODE_INVAL)
3794     if all_mods.count(True) > 1:
3795       raise errors.OpPrereqError("Can't set the node into more than one"
3796                                  " state at the same time",
3797                                  errors.ECODE_INVAL)
3798
3799     # Boolean value that tells us whether we're offlining or draining the node
3800     self.offline_or_drain = (self.op.offline == True or
3801                              self.op.drained == True)
3802     self.deoffline_or_drain = (self.op.offline == False or
3803                                self.op.drained == False)
3804     self.might_demote = (self.op.master_candidate == False or
3805                          self.offline_or_drain)
3806
3807     self.lock_all = self.op.auto_promote and self.might_demote
3808
3809
3810   def ExpandNames(self):
3811     if self.lock_all:
3812       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3813     else:
3814       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3815
3816   def BuildHooksEnv(self):
3817     """Build hooks env.
3818
3819     This runs on the master node.
3820
3821     """
3822     env = {
3823       "OP_TARGET": self.op.node_name,
3824       "MASTER_CANDIDATE": str(self.op.master_candidate),
3825       "OFFLINE": str(self.op.offline),
3826       "DRAINED": str(self.op.drained),
3827       }
3828     nl = [self.cfg.GetMasterNode(),
3829           self.op.node_name]
3830     return env, nl, nl
3831
3832   def CheckPrereq(self):
3833     """Check prerequisites.
3834
3835     This only checks the instance list against the existing names.
3836
3837     """
3838     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3839
3840     if (self.op.master_candidate is not None or
3841         self.op.drained is not None or
3842         self.op.offline is not None):
3843       # we can't change the master's node flags
3844       if self.op.node_name == self.cfg.GetMasterNode():
3845         raise errors.OpPrereqError("The master role can be changed"
3846                                    " only via masterfailover",
3847                                    errors.ECODE_INVAL)
3848
3849
3850     if node.master_candidate and self.might_demote and not self.lock_all:
3851       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3852       # check if after removing the current node, we're missing master
3853       # candidates
3854       (mc_remaining, mc_should, _) = \
3855           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3856       if mc_remaining < mc_should:
3857         raise errors.OpPrereqError("Not enough master candidates, please"
3858                                    " pass auto_promote to allow promotion",
3859                                    errors.ECODE_INVAL)
3860
3861     if (self.op.master_candidate == True and
3862         ((node.offline and not self.op.offline == False) or
3863          (node.drained and not self.op.drained == False))):
3864       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3865                                  " to master_candidate" % node.name,
3866                                  errors.ECODE_INVAL)
3867
3868     # If we're being deofflined/drained, we'll MC ourself if needed
3869     if (self.deoffline_or_drain and not self.offline_or_drain and not
3870         self.op.master_candidate == True and not node.master_candidate):
3871       self.op.master_candidate = _DecideSelfPromotion(self)
3872       if self.op.master_candidate:
3873         self.LogInfo("Autopromoting node to master candidate")
3874
3875     return
3876
3877   def Exec(self, feedback_fn):
3878     """Modifies a node.
3879
3880     """
3881     node = self.node
3882
3883     result = []
3884     changed_mc = False
3885
3886     if self.op.offline is not None:
3887       node.offline = self.op.offline
3888       result.append(("offline", str(self.op.offline)))
3889       if self.op.offline == True:
3890         if node.master_candidate:
3891           node.master_candidate = False
3892           changed_mc = True
3893           result.append(("master_candidate", "auto-demotion due to offline"))
3894         if node.drained:
3895           node.drained = False
3896           result.append(("drained", "clear drained status due to offline"))
3897
3898     if self.op.master_candidate is not None:
3899       node.master_candidate = self.op.master_candidate
3900       changed_mc = True
3901       result.append(("master_candidate", str(self.op.master_candidate)))
3902       if self.op.master_candidate == False:
3903         rrc = self.rpc.call_node_demote_from_mc(node.name)
3904         msg = rrc.fail_msg
3905         if msg:
3906           self.LogWarning("Node failed to demote itself: %s" % msg)
3907
3908     if self.op.drained is not None:
3909       node.drained = self.op.drained
3910       result.append(("drained", str(self.op.drained)))
3911       if self.op.drained == True:
3912         if node.master_candidate:
3913           node.master_candidate = False
3914           changed_mc = True
3915           result.append(("master_candidate", "auto-demotion due to drain"))
3916           rrc = self.rpc.call_node_demote_from_mc(node.name)
3917           msg = rrc.fail_msg
3918           if msg:
3919             self.LogWarning("Node failed to demote itself: %s" % msg)
3920         if node.offline:
3921           node.offline = False
3922           result.append(("offline", "clear offline status due to drain"))
3923
3924     # we locked all nodes, we adjust the CP before updating this node
3925     if self.lock_all:
3926       _AdjustCandidatePool(self, [node.name])
3927
3928     # this will trigger configuration file update, if needed
3929     self.cfg.Update(node, feedback_fn)
3930
3931     # this will trigger job queue propagation or cleanup
3932     if changed_mc:
3933       self.context.ReaddNode(node)
3934
3935     return result
3936
3937
3938 class LUPowercycleNode(NoHooksLU):
3939   """Powercycles a node.
3940
3941   """
3942   _OP_REQP = [
3943     ("node_name", _TNonEmptyString),
3944     ("force", _TBool),
3945     ]
3946   REQ_BGL = False
3947
3948   def CheckArguments(self):
3949     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3950     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3951       raise errors.OpPrereqError("The node is the master and the force"
3952                                  " parameter was not set",
3953                                  errors.ECODE_INVAL)
3954
3955   def ExpandNames(self):
3956     """Locking for PowercycleNode.
3957
3958     This is a last-resort option and shouldn't block on other
3959     jobs. Therefore, we grab no locks.
3960
3961     """
3962     self.needed_locks = {}
3963
3964   def Exec(self, feedback_fn):
3965     """Reboots a node.
3966
3967     """
3968     result = self.rpc.call_node_powercycle(self.op.node_name,
3969                                            self.cfg.GetHypervisorType())
3970     result.Raise("Failed to schedule the reboot")
3971     return result.payload
3972
3973
3974 class LUQueryClusterInfo(NoHooksLU):
3975   """Query cluster configuration.
3976
3977   """
3978   _OP_REQP = []
3979   REQ_BGL = False
3980
3981   def ExpandNames(self):
3982     self.needed_locks = {}
3983
3984   def Exec(self, feedback_fn):
3985     """Return cluster config.
3986
3987     """
3988     cluster = self.cfg.GetClusterInfo()
3989     os_hvp = {}
3990
3991     # Filter just for enabled hypervisors
3992     for os_name, hv_dict in cluster.os_hvp.items():
3993       os_hvp[os_name] = {}
3994       for hv_name, hv_params in hv_dict.items():
3995         if hv_name in cluster.enabled_hypervisors:
3996           os_hvp[os_name][hv_name] = hv_params
3997
3998     result = {
3999       "software_version": constants.RELEASE_VERSION,
4000       "protocol_version": constants.PROTOCOL_VERSION,
4001       "config_version": constants.CONFIG_VERSION,
4002       "os_api_version": max(constants.OS_API_VERSIONS),
4003       "export_version": constants.EXPORT_VERSION,
4004       "architecture": (platform.architecture()[0], platform.machine()),
4005       "name": cluster.cluster_name,
4006       "master": cluster.master_node,
4007       "default_hypervisor": cluster.enabled_hypervisors[0],
4008       "enabled_hypervisors": cluster.enabled_hypervisors,
4009       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4010                         for hypervisor_name in cluster.enabled_hypervisors]),
4011       "os_hvp": os_hvp,
4012       "beparams": cluster.beparams,
4013       "osparams": cluster.osparams,
4014       "nicparams": cluster.nicparams,
4015       "candidate_pool_size": cluster.candidate_pool_size,
4016       "master_netdev": cluster.master_netdev,
4017       "volume_group_name": cluster.volume_group_name,
4018       "file_storage_dir": cluster.file_storage_dir,
4019       "maintain_node_health": cluster.maintain_node_health,
4020       "ctime": cluster.ctime,
4021       "mtime": cluster.mtime,
4022       "uuid": cluster.uuid,
4023       "tags": list(cluster.GetTags()),
4024       "uid_pool": cluster.uid_pool,
4025       }
4026
4027     return result
4028
4029
4030 class LUQueryConfigValues(NoHooksLU):
4031   """Return configuration values.
4032
4033   """
4034   _OP_REQP = []
4035   REQ_BGL = False
4036   _FIELDS_DYNAMIC = utils.FieldSet()
4037   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4038                                   "watcher_pause")
4039
4040   def CheckArguments(self):
4041     _CheckOutputFields(static=self._FIELDS_STATIC,
4042                        dynamic=self._FIELDS_DYNAMIC,
4043                        selected=self.op.output_fields)
4044
4045   def ExpandNames(self):
4046     self.needed_locks = {}
4047
4048   def Exec(self, feedback_fn):
4049     """Dump a representation of the cluster config to the standard output.
4050
4051     """
4052     values = []
4053     for field in self.op.output_fields:
4054       if field == "cluster_name":
4055         entry = self.cfg.GetClusterName()
4056       elif field == "master_node":
4057         entry = self.cfg.GetMasterNode()
4058       elif field == "drain_flag":
4059         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4060       elif field == "watcher_pause":
4061         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4062       else:
4063         raise errors.ParameterError(field)
4064       values.append(entry)
4065     return values
4066
4067
4068 class LUActivateInstanceDisks(NoHooksLU):
4069   """Bring up an instance's disks.
4070
4071   """
4072   _OP_REQP = [("instance_name", _TNonEmptyString)]
4073   _OP_DEFS = [("ignore_size", False)]
4074   REQ_BGL = False
4075
4076   def ExpandNames(self):
4077     self._ExpandAndLockInstance()
4078     self.needed_locks[locking.LEVEL_NODE] = []
4079     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4080
4081   def DeclareLocks(self, level):
4082     if level == locking.LEVEL_NODE:
4083       self._LockInstancesNodes()
4084
4085   def CheckPrereq(self):
4086     """Check prerequisites.
4087
4088     This checks that the instance is in the cluster.
4089
4090     """
4091     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4092     assert self.instance is not None, \
4093       "Cannot retrieve locked instance %s" % self.op.instance_name
4094     _CheckNodeOnline(self, self.instance.primary_node)
4095
4096   def Exec(self, feedback_fn):
4097     """Activate the disks.
4098
4099     """
4100     disks_ok, disks_info = \
4101               _AssembleInstanceDisks(self, self.instance,
4102                                      ignore_size=self.op.ignore_size)
4103     if not disks_ok:
4104       raise errors.OpExecError("Cannot activate block devices")
4105
4106     return disks_info
4107
4108
4109 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4110                            ignore_size=False):
4111   """Prepare the block devices for an instance.
4112
4113   This sets up the block devices on all nodes.
4114
4115   @type lu: L{LogicalUnit}
4116   @param lu: the logical unit on whose behalf we execute
4117   @type instance: L{objects.Instance}
4118   @param instance: the instance for whose disks we assemble
4119   @type disks: list of L{objects.Disk} or None
4120   @param disks: which disks to assemble (or all, if None)
4121   @type ignore_secondaries: boolean
4122   @param ignore_secondaries: if true, errors on secondary nodes
4123       won't result in an error return from the function
4124   @type ignore_size: boolean
4125   @param ignore_size: if true, the current known size of the disk
4126       will not be used during the disk activation, useful for cases
4127       when the size is wrong
4128   @return: False if the operation failed, otherwise a list of
4129       (host, instance_visible_name, node_visible_name)
4130       with the mapping from node devices to instance devices
4131
4132   """
4133   device_info = []
4134   disks_ok = True
4135   iname = instance.name
4136   disks = _ExpandCheckDisks(instance, disks)
4137
4138   # With the two passes mechanism we try to reduce the window of
4139   # opportunity for the race condition of switching DRBD to primary
4140   # before handshaking occured, but we do not eliminate it
4141
4142   # The proper fix would be to wait (with some limits) until the
4143   # connection has been made and drbd transitions from WFConnection
4144   # into any other network-connected state (Connected, SyncTarget,
4145   # SyncSource, etc.)
4146
4147   # 1st pass, assemble on all nodes in secondary mode
4148   for inst_disk in disks:
4149     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4150       if ignore_size:
4151         node_disk = node_disk.Copy()
4152         node_disk.UnsetSize()
4153       lu.cfg.SetDiskID(node_disk, node)
4154       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4155       msg = result.fail_msg
4156       if msg:
4157         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4158                            " (is_primary=False, pass=1): %s",
4159                            inst_disk.iv_name, node, msg)
4160         if not ignore_secondaries:
4161           disks_ok = False
4162
4163   # FIXME: race condition on drbd migration to primary
4164
4165   # 2nd pass, do only the primary node
4166   for inst_disk in disks:
4167     dev_path = None
4168
4169     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4170       if node != instance.primary_node:
4171         continue
4172       if ignore_size:
4173         node_disk = node_disk.Copy()
4174         node_disk.UnsetSize()
4175       lu.cfg.SetDiskID(node_disk, node)
4176       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4177       msg = result.fail_msg
4178       if msg:
4179         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4180                            " (is_primary=True, pass=2): %s",
4181                            inst_disk.iv_name, node, msg)
4182         disks_ok = False
4183       else:
4184         dev_path = result.payload
4185
4186     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4187
4188   # leave the disks configured for the primary node
4189   # this is a workaround that would be fixed better by
4190   # improving the logical/physical id handling
4191   for disk in disks:
4192     lu.cfg.SetDiskID(disk, instance.primary_node)
4193
4194   return disks_ok, device_info
4195
4196
4197 def _StartInstanceDisks(lu, instance, force):
4198   """Start the disks of an instance.
4199
4200   """
4201   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4202                                            ignore_secondaries=force)
4203   if not disks_ok:
4204     _ShutdownInstanceDisks(lu, instance)
4205     if force is not None and not force:
4206       lu.proc.LogWarning("", hint="If the message above refers to a"
4207                          " secondary node,"
4208                          " you can retry the operation using '--force'.")
4209     raise errors.OpExecError("Disk consistency error")
4210
4211
4212 class LUDeactivateInstanceDisks(NoHooksLU):
4213   """Shutdown an instance's disks.
4214
4215   """
4216   _OP_REQP = [("instance_name", _TNonEmptyString)]
4217   REQ_BGL = False
4218
4219   def ExpandNames(self):
4220     self._ExpandAndLockInstance()
4221     self.needed_locks[locking.LEVEL_NODE] = []
4222     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4223
4224   def DeclareLocks(self, level):
4225     if level == locking.LEVEL_NODE:
4226       self._LockInstancesNodes()
4227
4228   def CheckPrereq(self):
4229     """Check prerequisites.
4230
4231     This checks that the instance is in the cluster.
4232
4233     """
4234     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4235     assert self.instance is not None, \
4236       "Cannot retrieve locked instance %s" % self.op.instance_name
4237
4238   def Exec(self, feedback_fn):
4239     """Deactivate the disks
4240
4241     """
4242     instance = self.instance
4243     _SafeShutdownInstanceDisks(self, instance)
4244
4245
4246 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4247   """Shutdown block devices of an instance.
4248
4249   This function checks if an instance is running, before calling
4250   _ShutdownInstanceDisks.
4251
4252   """
4253   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4254   _ShutdownInstanceDisks(lu, instance, disks=disks)
4255
4256
4257 def _ExpandCheckDisks(instance, disks):
4258   """Return the instance disks selected by the disks list
4259
4260   @type disks: list of L{objects.Disk} or None
4261   @param disks: selected disks
4262   @rtype: list of L{objects.Disk}
4263   @return: selected instance disks to act on
4264
4265   """
4266   if disks is None:
4267     return instance.disks
4268   else:
4269     if not set(disks).issubset(instance.disks):
4270       raise errors.ProgrammerError("Can only act on disks belonging to the"
4271                                    " target instance")
4272     return disks
4273
4274
4275 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4276   """Shutdown block devices of an instance.
4277
4278   This does the shutdown on all nodes of the instance.
4279
4280   If the ignore_primary is false, errors on the primary node are
4281   ignored.
4282
4283   """
4284   all_result = True
4285   disks = _ExpandCheckDisks(instance, disks)
4286
4287   for disk in disks:
4288     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4289       lu.cfg.SetDiskID(top_disk, node)
4290       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4291       msg = result.fail_msg
4292       if msg:
4293         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4294                       disk.iv_name, node, msg)
4295         if not ignore_primary or node != instance.primary_node:
4296           all_result = False
4297   return all_result
4298
4299
4300 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4301   """Checks if a node has enough free memory.
4302
4303   This function check if a given node has the needed amount of free
4304   memory. In case the node has less memory or we cannot get the
4305   information from the node, this function raise an OpPrereqError
4306   exception.
4307
4308   @type lu: C{LogicalUnit}
4309   @param lu: a logical unit from which we get configuration data
4310   @type node: C{str}
4311   @param node: the node to check
4312   @type reason: C{str}
4313   @param reason: string to use in the error message
4314   @type requested: C{int}
4315   @param requested: the amount of memory in MiB to check for
4316   @type hypervisor_name: C{str}
4317   @param hypervisor_name: the hypervisor to ask for memory stats
4318   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4319       we cannot check the node
4320
4321   """
4322   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4323   nodeinfo[node].Raise("Can't get data from node %s" % node,
4324                        prereq=True, ecode=errors.ECODE_ENVIRON)
4325   free_mem = nodeinfo[node].payload.get('memory_free', None)
4326   if not isinstance(free_mem, int):
4327     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4328                                " was '%s'" % (node, free_mem),
4329                                errors.ECODE_ENVIRON)
4330   if requested > free_mem:
4331     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4332                                " needed %s MiB, available %s MiB" %
4333                                (node, reason, requested, free_mem),
4334                                errors.ECODE_NORES)
4335
4336
4337 def _CheckNodesFreeDisk(lu, nodenames, requested):
4338   """Checks if nodes have enough free disk space in the default VG.
4339
4340   This function check if all given nodes have the needed amount of
4341   free disk. In case any node has less disk or we cannot get the
4342   information from the node, this function raise an OpPrereqError
4343   exception.
4344
4345   @type lu: C{LogicalUnit}
4346   @param lu: a logical unit from which we get configuration data
4347   @type nodenames: C{list}
4348   @param nodenames: the list of node names to check
4349   @type requested: C{int}
4350   @param requested: the amount of disk in MiB to check for
4351   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4352       we cannot check the node
4353
4354   """
4355   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4356                                    lu.cfg.GetHypervisorType())
4357   for node in nodenames:
4358     info = nodeinfo[node]
4359     info.Raise("Cannot get current information from node %s" % node,
4360                prereq=True, ecode=errors.ECODE_ENVIRON)
4361     vg_free = info.payload.get("vg_free", None)
4362     if not isinstance(vg_free, int):
4363       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4364                                  " result was '%s'" % (node, vg_free),
4365                                  errors.ECODE_ENVIRON)
4366     if requested > vg_free:
4367       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4368                                  " required %d MiB, available %d MiB" %
4369                                  (node, requested, vg_free),
4370                                  errors.ECODE_NORES)
4371
4372
4373 class LUStartupInstance(LogicalUnit):
4374   """Starts an instance.
4375
4376   """
4377   HPATH = "instance-start"
4378   HTYPE = constants.HTYPE_INSTANCE
4379   _OP_REQP = [
4380     ("instance_name", _TNonEmptyString),
4381     ("force", _TBool),
4382     ("beparams", _TDict),
4383     ("hvparams", _TDict),
4384     ]
4385   _OP_DEFS = [
4386     ("beparams", _EmptyDict),
4387     ("hvparams", _EmptyDict),
4388     ]
4389   REQ_BGL = False
4390
4391   def CheckArguments(self):
4392     # extra beparams
4393     if self.op.beparams:
4394       # fill the beparams dict
4395       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4396
4397   def ExpandNames(self):
4398     self._ExpandAndLockInstance()
4399
4400   def BuildHooksEnv(self):
4401     """Build hooks env.
4402
4403     This runs on master, primary and secondary nodes of the instance.
4404
4405     """
4406     env = {
4407       "FORCE": self.op.force,
4408       }
4409     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4410     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4411     return env, nl, nl
4412
4413   def CheckPrereq(self):
4414     """Check prerequisites.
4415
4416     This checks that the instance is in the cluster.
4417
4418     """
4419     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4420     assert self.instance is not None, \
4421       "Cannot retrieve locked instance %s" % self.op.instance_name
4422
4423     # extra hvparams
4424     if self.op.hvparams:
4425       # check hypervisor parameter syntax (locally)
4426       cluster = self.cfg.GetClusterInfo()
4427       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4428       filled_hvp = cluster.FillHV(instance)
4429       filled_hvp.update(self.op.hvparams)
4430       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4431       hv_type.CheckParameterSyntax(filled_hvp)
4432       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4433
4434     _CheckNodeOnline(self, instance.primary_node)
4435
4436     bep = self.cfg.GetClusterInfo().FillBE(instance)
4437     # check bridges existence
4438     _CheckInstanceBridgesExist(self, instance)
4439
4440     remote_info = self.rpc.call_instance_info(instance.primary_node,
4441                                               instance.name,
4442                                               instance.hypervisor)
4443     remote_info.Raise("Error checking node %s" % instance.primary_node,
4444                       prereq=True, ecode=errors.ECODE_ENVIRON)
4445     if not remote_info.payload: # not running already
4446       _CheckNodeFreeMemory(self, instance.primary_node,
4447                            "starting instance %s" % instance.name,
4448                            bep[constants.BE_MEMORY], instance.hypervisor)
4449
4450   def Exec(self, feedback_fn):
4451     """Start the instance.
4452
4453     """
4454     instance = self.instance
4455     force = self.op.force
4456
4457     self.cfg.MarkInstanceUp(instance.name)
4458
4459     node_current = instance.primary_node
4460
4461     _StartInstanceDisks(self, instance, force)
4462
4463     result = self.rpc.call_instance_start(node_current, instance,
4464                                           self.op.hvparams, self.op.beparams)
4465     msg = result.fail_msg
4466     if msg:
4467       _ShutdownInstanceDisks(self, instance)
4468       raise errors.OpExecError("Could not start instance: %s" % msg)
4469
4470
4471 class LURebootInstance(LogicalUnit):
4472   """Reboot an instance.
4473
4474   """
4475   HPATH = "instance-reboot"
4476   HTYPE = constants.HTYPE_INSTANCE
4477   _OP_REQP = [
4478     ("instance_name", _TNonEmptyString),
4479     ("ignore_secondaries", _TBool),
4480     ("reboot_type", _TElemOf(constants.REBOOT_TYPES)),
4481     ]
4482   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4483   REQ_BGL = False
4484
4485   def ExpandNames(self):
4486     self._ExpandAndLockInstance()
4487
4488   def BuildHooksEnv(self):
4489     """Build hooks env.
4490
4491     This runs on master, primary and secondary nodes of the instance.
4492
4493     """
4494     env = {
4495       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4496       "REBOOT_TYPE": self.op.reboot_type,
4497       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4498       }
4499     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4500     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4501     return env, nl, nl
4502
4503   def CheckPrereq(self):
4504     """Check prerequisites.
4505
4506     This checks that the instance is in the cluster.
4507
4508     """
4509     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4510     assert self.instance is not None, \
4511       "Cannot retrieve locked instance %s" % self.op.instance_name
4512
4513     _CheckNodeOnline(self, instance.primary_node)
4514
4515     # check bridges existence
4516     _CheckInstanceBridgesExist(self, instance)
4517
4518   def Exec(self, feedback_fn):
4519     """Reboot the instance.
4520
4521     """
4522     instance = self.instance
4523     ignore_secondaries = self.op.ignore_secondaries
4524     reboot_type = self.op.reboot_type
4525
4526     node_current = instance.primary_node
4527
4528     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4529                        constants.INSTANCE_REBOOT_HARD]:
4530       for disk in instance.disks:
4531         self.cfg.SetDiskID(disk, node_current)
4532       result = self.rpc.call_instance_reboot(node_current, instance,
4533                                              reboot_type,
4534                                              self.op.shutdown_timeout)
4535       result.Raise("Could not reboot instance")
4536     else:
4537       result = self.rpc.call_instance_shutdown(node_current, instance,
4538                                                self.op.shutdown_timeout)
4539       result.Raise("Could not shutdown instance for full reboot")
4540       _ShutdownInstanceDisks(self, instance)
4541       _StartInstanceDisks(self, instance, ignore_secondaries)
4542       result = self.rpc.call_instance_start(node_current, instance, None, None)
4543       msg = result.fail_msg
4544       if msg:
4545         _ShutdownInstanceDisks(self, instance)
4546         raise errors.OpExecError("Could not start instance for"
4547                                  " full reboot: %s" % msg)
4548
4549     self.cfg.MarkInstanceUp(instance.name)
4550
4551
4552 class LUShutdownInstance(LogicalUnit):
4553   """Shutdown an instance.
4554
4555   """
4556   HPATH = "instance-stop"
4557   HTYPE = constants.HTYPE_INSTANCE
4558   _OP_REQP = [("instance_name", _TNonEmptyString)]
4559   _OP_DEFS = [("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4560   REQ_BGL = False
4561
4562   def ExpandNames(self):
4563     self._ExpandAndLockInstance()
4564
4565   def BuildHooksEnv(self):
4566     """Build hooks env.
4567
4568     This runs on master, primary and secondary nodes of the instance.
4569
4570     """
4571     env = _BuildInstanceHookEnvByObject(self, self.instance)
4572     env["TIMEOUT"] = self.op.timeout
4573     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4574     return env, nl, nl
4575
4576   def CheckPrereq(self):
4577     """Check prerequisites.
4578
4579     This checks that the instance is in the cluster.
4580
4581     """
4582     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4583     assert self.instance is not None, \
4584       "Cannot retrieve locked instance %s" % self.op.instance_name
4585     _CheckNodeOnline(self, self.instance.primary_node)
4586
4587   def Exec(self, feedback_fn):
4588     """Shutdown the instance.
4589
4590     """
4591     instance = self.instance
4592     node_current = instance.primary_node
4593     timeout = self.op.timeout
4594     self.cfg.MarkInstanceDown(instance.name)
4595     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4596     msg = result.fail_msg
4597     if msg:
4598       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4599
4600     _ShutdownInstanceDisks(self, instance)
4601
4602
4603 class LUReinstallInstance(LogicalUnit):
4604   """Reinstall an instance.
4605
4606   """
4607   HPATH = "instance-reinstall"
4608   HTYPE = constants.HTYPE_INSTANCE
4609   _OP_REQP = [("instance_name", _TNonEmptyString)]
4610   _OP_DEFS = [
4611     ("os_type", None),
4612     ("force_variant", False),
4613     ]
4614   REQ_BGL = False
4615
4616   def ExpandNames(self):
4617     self._ExpandAndLockInstance()
4618
4619   def BuildHooksEnv(self):
4620     """Build hooks env.
4621
4622     This runs on master, primary and secondary nodes of the instance.
4623
4624     """
4625     env = _BuildInstanceHookEnvByObject(self, self.instance)
4626     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4627     return env, nl, nl
4628
4629   def CheckPrereq(self):
4630     """Check prerequisites.
4631
4632     This checks that the instance is in the cluster and is not running.
4633
4634     """
4635     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4636     assert instance is not None, \
4637       "Cannot retrieve locked instance %s" % self.op.instance_name
4638     _CheckNodeOnline(self, instance.primary_node)
4639
4640     if instance.disk_template == constants.DT_DISKLESS:
4641       raise errors.OpPrereqError("Instance '%s' has no disks" %
4642                                  self.op.instance_name,
4643                                  errors.ECODE_INVAL)
4644     _CheckInstanceDown(self, instance, "cannot reinstall")
4645
4646     if self.op.os_type is not None:
4647       # OS verification
4648       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4649       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4650
4651     self.instance = instance
4652
4653   def Exec(self, feedback_fn):
4654     """Reinstall the instance.
4655
4656     """
4657     inst = self.instance
4658
4659     if self.op.os_type is not None:
4660       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4661       inst.os = self.op.os_type
4662       self.cfg.Update(inst, feedback_fn)
4663
4664     _StartInstanceDisks(self, inst, None)
4665     try:
4666       feedback_fn("Running the instance OS create scripts...")
4667       # FIXME: pass debug option from opcode to backend
4668       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4669                                              self.op.debug_level)
4670       result.Raise("Could not install OS for instance %s on node %s" %
4671                    (inst.name, inst.primary_node))
4672     finally:
4673       _ShutdownInstanceDisks(self, inst)
4674
4675
4676 class LURecreateInstanceDisks(LogicalUnit):
4677   """Recreate an instance's missing disks.
4678
4679   """
4680   HPATH = "instance-recreate-disks"
4681   HTYPE = constants.HTYPE_INSTANCE
4682   _OP_REQP = [
4683     ("instance_name", _TNonEmptyString),
4684     ("disks", _TListOf(_TPositiveInt)),
4685     ]
4686   REQ_BGL = False
4687
4688   def ExpandNames(self):
4689     self._ExpandAndLockInstance()
4690
4691   def BuildHooksEnv(self):
4692     """Build hooks env.
4693
4694     This runs on master, primary and secondary nodes of the instance.
4695
4696     """
4697     env = _BuildInstanceHookEnvByObject(self, self.instance)
4698     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4699     return env, nl, nl
4700
4701   def CheckPrereq(self):
4702     """Check prerequisites.
4703
4704     This checks that the instance is in the cluster and is not running.
4705
4706     """
4707     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4708     assert instance is not None, \
4709       "Cannot retrieve locked instance %s" % self.op.instance_name
4710     _CheckNodeOnline(self, instance.primary_node)
4711
4712     if instance.disk_template == constants.DT_DISKLESS:
4713       raise errors.OpPrereqError("Instance '%s' has no disks" %
4714                                  self.op.instance_name, errors.ECODE_INVAL)
4715     _CheckInstanceDown(self, instance, "cannot recreate disks")
4716
4717     if not self.op.disks:
4718       self.op.disks = range(len(instance.disks))
4719     else:
4720       for idx in self.op.disks:
4721         if idx >= len(instance.disks):
4722           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4723                                      errors.ECODE_INVAL)
4724
4725     self.instance = instance
4726
4727   def Exec(self, feedback_fn):
4728     """Recreate the disks.
4729
4730     """
4731     to_skip = []
4732     for idx, _ in enumerate(self.instance.disks):
4733       if idx not in self.op.disks: # disk idx has not been passed in
4734         to_skip.append(idx)
4735         continue
4736
4737     _CreateDisks(self, self.instance, to_skip=to_skip)
4738
4739
4740 class LURenameInstance(LogicalUnit):
4741   """Rename an instance.
4742
4743   """
4744   HPATH = "instance-rename"
4745   HTYPE = constants.HTYPE_INSTANCE
4746   _OP_REQP = [
4747     ("instance_name", _TNonEmptyString),
4748     ("new_name", _TNonEmptyString),
4749     ]
4750   _OP_DEFS = [("ignore_ip", False)]
4751
4752   def BuildHooksEnv(self):
4753     """Build hooks env.
4754
4755     This runs on master, primary and secondary nodes of the instance.
4756
4757     """
4758     env = _BuildInstanceHookEnvByObject(self, self.instance)
4759     env["INSTANCE_NEW_NAME"] = self.op.new_name
4760     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4761     return env, nl, nl
4762
4763   def CheckPrereq(self):
4764     """Check prerequisites.
4765
4766     This checks that the instance is in the cluster and is not running.
4767
4768     """
4769     self.op.instance_name = _ExpandInstanceName(self.cfg,
4770                                                 self.op.instance_name)
4771     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4772     assert instance is not None
4773     _CheckNodeOnline(self, instance.primary_node)
4774     _CheckInstanceDown(self, instance, "cannot rename")
4775     self.instance = instance
4776
4777     # new name verification
4778     name_info = utils.GetHostInfo(self.op.new_name)
4779
4780     self.op.new_name = new_name = name_info.name
4781     instance_list = self.cfg.GetInstanceList()
4782     if new_name in instance_list:
4783       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4784                                  new_name, errors.ECODE_EXISTS)
4785
4786     if not self.op.ignore_ip:
4787       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4788         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4789                                    (name_info.ip, new_name),
4790                                    errors.ECODE_NOTUNIQUE)
4791
4792   def Exec(self, feedback_fn):
4793     """Reinstall the instance.
4794
4795     """
4796     inst = self.instance
4797     old_name = inst.name
4798
4799     if inst.disk_template == constants.DT_FILE:
4800       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4801
4802     self.cfg.RenameInstance(inst.name, self.op.new_name)
4803     # Change the instance lock. This is definitely safe while we hold the BGL
4804     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4805     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4806
4807     # re-read the instance from the configuration after rename
4808     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4809
4810     if inst.disk_template == constants.DT_FILE:
4811       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4812       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4813                                                      old_file_storage_dir,
4814                                                      new_file_storage_dir)
4815       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4816                    " (but the instance has been renamed in Ganeti)" %
4817                    (inst.primary_node, old_file_storage_dir,
4818                     new_file_storage_dir))
4819
4820     _StartInstanceDisks(self, inst, None)
4821     try:
4822       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4823                                                  old_name, self.op.debug_level)
4824       msg = result.fail_msg
4825       if msg:
4826         msg = ("Could not run OS rename script for instance %s on node %s"
4827                " (but the instance has been renamed in Ganeti): %s" %
4828                (inst.name, inst.primary_node, msg))
4829         self.proc.LogWarning(msg)
4830     finally:
4831       _ShutdownInstanceDisks(self, inst)
4832
4833
4834 class LURemoveInstance(LogicalUnit):
4835   """Remove an instance.
4836
4837   """
4838   HPATH = "instance-remove"
4839   HTYPE = constants.HTYPE_INSTANCE
4840   _OP_REQP = [
4841     ("instance_name", _TNonEmptyString),
4842     ("ignore_failures", _TBool),
4843     ]
4844   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4845   REQ_BGL = False
4846
4847   def ExpandNames(self):
4848     self._ExpandAndLockInstance()
4849     self.needed_locks[locking.LEVEL_NODE] = []
4850     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4851
4852   def DeclareLocks(self, level):
4853     if level == locking.LEVEL_NODE:
4854       self._LockInstancesNodes()
4855
4856   def BuildHooksEnv(self):
4857     """Build hooks env.
4858
4859     This runs on master, primary and secondary nodes of the instance.
4860
4861     """
4862     env = _BuildInstanceHookEnvByObject(self, self.instance)
4863     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4864     nl = [self.cfg.GetMasterNode()]
4865     nl_post = list(self.instance.all_nodes) + nl
4866     return env, nl, nl_post
4867
4868   def CheckPrereq(self):
4869     """Check prerequisites.
4870
4871     This checks that the instance is in the cluster.
4872
4873     """
4874     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4875     assert self.instance is not None, \
4876       "Cannot retrieve locked instance %s" % self.op.instance_name
4877
4878   def Exec(self, feedback_fn):
4879     """Remove the instance.
4880
4881     """
4882     instance = self.instance
4883     logging.info("Shutting down instance %s on node %s",
4884                  instance.name, instance.primary_node)
4885
4886     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4887                                              self.op.shutdown_timeout)
4888     msg = result.fail_msg
4889     if msg:
4890       if self.op.ignore_failures:
4891         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4892       else:
4893         raise errors.OpExecError("Could not shutdown instance %s on"
4894                                  " node %s: %s" %
4895                                  (instance.name, instance.primary_node, msg))
4896
4897     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4898
4899
4900 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4901   """Utility function to remove an instance.
4902
4903   """
4904   logging.info("Removing block devices for instance %s", instance.name)
4905
4906   if not _RemoveDisks(lu, instance):
4907     if not ignore_failures:
4908       raise errors.OpExecError("Can't remove instance's disks")
4909     feedback_fn("Warning: can't remove instance's disks")
4910
4911   logging.info("Removing instance %s out of cluster config", instance.name)
4912
4913   lu.cfg.RemoveInstance(instance.name)
4914
4915   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4916     "Instance lock removal conflict"
4917
4918   # Remove lock for the instance
4919   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4920
4921
4922 class LUQueryInstances(NoHooksLU):
4923   """Logical unit for querying instances.
4924
4925   """
4926   # pylint: disable-msg=W0142
4927   _OP_REQP = [
4928     ("output_fields", _TListOf(_TNonEmptyString)),
4929     ("names", _TListOf(_TNonEmptyString)),
4930     ("use_locking", _TBool),
4931     ]
4932   REQ_BGL = False
4933   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4934                     "serial_no", "ctime", "mtime", "uuid"]
4935   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4936                                     "admin_state",
4937                                     "disk_template", "ip", "mac", "bridge",
4938                                     "nic_mode", "nic_link",
4939                                     "sda_size", "sdb_size", "vcpus", "tags",
4940                                     "network_port", "beparams",
4941                                     r"(disk)\.(size)/([0-9]+)",
4942                                     r"(disk)\.(sizes)", "disk_usage",
4943                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4944                                     r"(nic)\.(bridge)/([0-9]+)",
4945                                     r"(nic)\.(macs|ips|modes|links|bridges)",
4946                                     r"(disk|nic)\.(count)",
4947                                     "hvparams",
4948                                     ] + _SIMPLE_FIELDS +
4949                                   ["hv/%s" % name
4950                                    for name in constants.HVS_PARAMETERS
4951                                    if name not in constants.HVC_GLOBALS] +
4952                                   ["be/%s" % name
4953                                    for name in constants.BES_PARAMETERS])
4954   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4955
4956
4957   def CheckArguments(self):
4958     _CheckOutputFields(static=self._FIELDS_STATIC,
4959                        dynamic=self._FIELDS_DYNAMIC,
4960                        selected=self.op.output_fields)
4961
4962   def ExpandNames(self):
4963     self.needed_locks = {}
4964     self.share_locks[locking.LEVEL_INSTANCE] = 1
4965     self.share_locks[locking.LEVEL_NODE] = 1
4966
4967     if self.op.names:
4968       self.wanted = _GetWantedInstances(self, self.op.names)
4969     else:
4970       self.wanted = locking.ALL_SET
4971
4972     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4973     self.do_locking = self.do_node_query and self.op.use_locking
4974     if self.do_locking:
4975       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4976       self.needed_locks[locking.LEVEL_NODE] = []
4977       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4978
4979   def DeclareLocks(self, level):
4980     if level == locking.LEVEL_NODE and self.do_locking:
4981       self._LockInstancesNodes()
4982
4983   def Exec(self, feedback_fn):
4984     """Computes the list of nodes and their attributes.
4985
4986     """
4987     # pylint: disable-msg=R0912
4988     # way too many branches here
4989     all_info = self.cfg.GetAllInstancesInfo()
4990     if self.wanted == locking.ALL_SET:
4991       # caller didn't specify instance names, so ordering is not important
4992       if self.do_locking:
4993         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4994       else:
4995         instance_names = all_info.keys()
4996       instance_names = utils.NiceSort(instance_names)
4997     else:
4998       # caller did specify names, so we must keep the ordering
4999       if self.do_locking:
5000         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5001       else:
5002         tgt_set = all_info.keys()
5003       missing = set(self.wanted).difference(tgt_set)
5004       if missing:
5005         raise errors.OpExecError("Some instances were removed before"
5006                                  " retrieving their data: %s" % missing)
5007       instance_names = self.wanted
5008
5009     instance_list = [all_info[iname] for iname in instance_names]
5010
5011     # begin data gathering
5012
5013     nodes = frozenset([inst.primary_node for inst in instance_list])
5014     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5015
5016     bad_nodes = []
5017     off_nodes = []
5018     if self.do_node_query:
5019       live_data = {}
5020       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5021       for name in nodes:
5022         result = node_data[name]
5023         if result.offline:
5024           # offline nodes will be in both lists
5025           off_nodes.append(name)
5026         if result.fail_msg:
5027           bad_nodes.append(name)
5028         else:
5029           if result.payload:
5030             live_data.update(result.payload)
5031           # else no instance is alive
5032     else:
5033       live_data = dict([(name, {}) for name in instance_names])
5034
5035     # end data gathering
5036
5037     HVPREFIX = "hv/"
5038     BEPREFIX = "be/"
5039     output = []
5040     cluster = self.cfg.GetClusterInfo()
5041     for instance in instance_list:
5042       iout = []
5043       i_hv = cluster.FillHV(instance, skip_globals=True)
5044       i_be = cluster.FillBE(instance)
5045       i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5046       for field in self.op.output_fields:
5047         st_match = self._FIELDS_STATIC.Matches(field)
5048         if field in self._SIMPLE_FIELDS:
5049           val = getattr(instance, field)
5050         elif field == "pnode":
5051           val = instance.primary_node
5052         elif field == "snodes":
5053           val = list(instance.secondary_nodes)
5054         elif field == "admin_state":
5055           val = instance.admin_up
5056         elif field == "oper_state":
5057           if instance.primary_node in bad_nodes:
5058             val = None
5059           else:
5060             val = bool(live_data.get(instance.name))
5061         elif field == "status":
5062           if instance.primary_node in off_nodes:
5063             val = "ERROR_nodeoffline"
5064           elif instance.primary_node in bad_nodes:
5065             val = "ERROR_nodedown"
5066           else:
5067             running = bool(live_data.get(instance.name))
5068             if running:
5069               if instance.admin_up:
5070                 val = "running"
5071               else:
5072                 val = "ERROR_up"
5073             else:
5074               if instance.admin_up:
5075                 val = "ERROR_down"
5076               else:
5077                 val = "ADMIN_down"
5078         elif field == "oper_ram":
5079           if instance.primary_node in bad_nodes:
5080             val = None
5081           elif instance.name in live_data:
5082             val = live_data[instance.name].get("memory", "?")
5083           else:
5084             val = "-"
5085         elif field == "vcpus":
5086           val = i_be[constants.BE_VCPUS]
5087         elif field == "disk_template":
5088           val = instance.disk_template
5089         elif field == "ip":
5090           if instance.nics:
5091             val = instance.nics[0].ip
5092           else:
5093             val = None
5094         elif field == "nic_mode":
5095           if instance.nics:
5096             val = i_nicp[0][constants.NIC_MODE]
5097           else:
5098             val = None
5099         elif field == "nic_link":
5100           if instance.nics:
5101             val = i_nicp[0][constants.NIC_LINK]
5102           else:
5103             val = None
5104         elif field == "bridge":
5105           if (instance.nics and
5106               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5107             val = i_nicp[0][constants.NIC_LINK]
5108           else:
5109             val = None
5110         elif field == "mac":
5111           if instance.nics:
5112             val = instance.nics[0].mac
5113           else:
5114             val = None
5115         elif field == "sda_size" or field == "sdb_size":
5116           idx = ord(field[2]) - ord('a')
5117           try:
5118             val = instance.FindDisk(idx).size
5119           except errors.OpPrereqError:
5120             val = None
5121         elif field == "disk_usage": # total disk usage per node
5122           disk_sizes = [{'size': disk.size} for disk in instance.disks]
5123           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5124         elif field == "tags":
5125           val = list(instance.GetTags())
5126         elif field == "hvparams":
5127           val = i_hv
5128         elif (field.startswith(HVPREFIX) and
5129               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5130               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5131           val = i_hv.get(field[len(HVPREFIX):], None)
5132         elif field == "beparams":
5133           val = i_be
5134         elif (field.startswith(BEPREFIX) and
5135               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5136           val = i_be.get(field[len(BEPREFIX):], None)
5137         elif st_match and st_match.groups():
5138           # matches a variable list
5139           st_groups = st_match.groups()
5140           if st_groups and st_groups[0] == "disk":
5141             if st_groups[1] == "count":
5142               val = len(instance.disks)
5143             elif st_groups[1] == "sizes":
5144               val = [disk.size for disk in instance.disks]
5145             elif st_groups[1] == "size":
5146               try:
5147                 val = instance.FindDisk(st_groups[2]).size
5148               except errors.OpPrereqError:
5149                 val = None
5150             else:
5151               assert False, "Unhandled disk parameter"
5152           elif st_groups[0] == "nic":
5153             if st_groups[1] == "count":
5154               val = len(instance.nics)
5155             elif st_groups[1] == "macs":
5156               val = [nic.mac for nic in instance.nics]
5157             elif st_groups[1] == "ips":
5158               val = [nic.ip for nic in instance.nics]
5159             elif st_groups[1] == "modes":
5160               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5161             elif st_groups[1] == "links":
5162               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5163             elif st_groups[1] == "bridges":
5164               val = []
5165               for nicp in i_nicp:
5166                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5167                   val.append(nicp[constants.NIC_LINK])
5168                 else:
5169                   val.append(None)
5170             else:
5171               # index-based item
5172               nic_idx = int(st_groups[2])
5173               if nic_idx >= len(instance.nics):
5174                 val = None
5175               else:
5176                 if st_groups[1] == "mac":
5177                   val = instance.nics[nic_idx].mac
5178                 elif st_groups[1] == "ip":
5179                   val = instance.nics[nic_idx].ip
5180                 elif st_groups[1] == "mode":
5181                   val = i_nicp[nic_idx][constants.NIC_MODE]
5182                 elif st_groups[1] == "link":
5183                   val = i_nicp[nic_idx][constants.NIC_LINK]
5184                 elif st_groups[1] == "bridge":
5185                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5186                   if nic_mode == constants.NIC_MODE_BRIDGED:
5187                     val = i_nicp[nic_idx][constants.NIC_LINK]
5188                   else:
5189                     val = None
5190                 else:
5191                   assert False, "Unhandled NIC parameter"
5192           else:
5193             assert False, ("Declared but unhandled variable parameter '%s'" %
5194                            field)
5195         else:
5196           assert False, "Declared but unhandled parameter '%s'" % field
5197         iout.append(val)
5198       output.append(iout)
5199
5200     return output
5201
5202
5203 class LUFailoverInstance(LogicalUnit):
5204   """Failover an instance.
5205
5206   """
5207   HPATH = "instance-failover"
5208   HTYPE = constants.HTYPE_INSTANCE
5209   _OP_REQP = [
5210     ("instance_name", _TNonEmptyString),
5211     ("ignore_consistency", _TBool),
5212     ]
5213   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5214   REQ_BGL = False
5215
5216   def ExpandNames(self):
5217     self._ExpandAndLockInstance()
5218     self.needed_locks[locking.LEVEL_NODE] = []
5219     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5220
5221   def DeclareLocks(self, level):
5222     if level == locking.LEVEL_NODE:
5223       self._LockInstancesNodes()
5224
5225   def BuildHooksEnv(self):
5226     """Build hooks env.
5227
5228     This runs on master, primary and secondary nodes of the instance.
5229
5230     """
5231     instance = self.instance
5232     source_node = instance.primary_node
5233     target_node = instance.secondary_nodes[0]
5234     env = {
5235       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5236       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5237       "OLD_PRIMARY": source_node,
5238       "OLD_SECONDARY": target_node,
5239       "NEW_PRIMARY": target_node,
5240       "NEW_SECONDARY": source_node,
5241       }
5242     env.update(_BuildInstanceHookEnvByObject(self, instance))
5243     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5244     nl_post = list(nl)
5245     nl_post.append(source_node)
5246     return env, nl, nl_post
5247
5248   def CheckPrereq(self):
5249     """Check prerequisites.
5250
5251     This checks that the instance is in the cluster.
5252
5253     """
5254     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5255     assert self.instance is not None, \
5256       "Cannot retrieve locked instance %s" % self.op.instance_name
5257
5258     bep = self.cfg.GetClusterInfo().FillBE(instance)
5259     if instance.disk_template not in constants.DTS_NET_MIRROR:
5260       raise errors.OpPrereqError("Instance's disk layout is not"
5261                                  " network mirrored, cannot failover.",
5262                                  errors.ECODE_STATE)
5263
5264     secondary_nodes = instance.secondary_nodes
5265     if not secondary_nodes:
5266       raise errors.ProgrammerError("no secondary node but using "
5267                                    "a mirrored disk template")
5268
5269     target_node = secondary_nodes[0]
5270     _CheckNodeOnline(self, target_node)
5271     _CheckNodeNotDrained(self, target_node)
5272     if instance.admin_up:
5273       # check memory requirements on the secondary node
5274       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5275                            instance.name, bep[constants.BE_MEMORY],
5276                            instance.hypervisor)
5277     else:
5278       self.LogInfo("Not checking memory on the secondary node as"
5279                    " instance will not be started")
5280
5281     # check bridge existance
5282     _CheckInstanceBridgesExist(self, instance, node=target_node)
5283
5284   def Exec(self, feedback_fn):
5285     """Failover an instance.
5286
5287     The failover is done by shutting it down on its present node and
5288     starting it on the secondary.
5289
5290     """
5291     instance = self.instance
5292
5293     source_node = instance.primary_node
5294     target_node = instance.secondary_nodes[0]
5295
5296     if instance.admin_up:
5297       feedback_fn("* checking disk consistency between source and target")
5298       for dev in instance.disks:
5299         # for drbd, these are drbd over lvm
5300         if not _CheckDiskConsistency(self, dev, target_node, False):
5301           if not self.op.ignore_consistency:
5302             raise errors.OpExecError("Disk %s is degraded on target node,"
5303                                      " aborting failover." % dev.iv_name)
5304     else:
5305       feedback_fn("* not checking disk consistency as instance is not running")
5306
5307     feedback_fn("* shutting down instance on source node")
5308     logging.info("Shutting down instance %s on node %s",
5309                  instance.name, source_node)
5310
5311     result = self.rpc.call_instance_shutdown(source_node, instance,
5312                                              self.op.shutdown_timeout)
5313     msg = result.fail_msg
5314     if msg:
5315       if self.op.ignore_consistency:
5316         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5317                              " Proceeding anyway. Please make sure node"
5318                              " %s is down. Error details: %s",
5319                              instance.name, source_node, source_node, msg)
5320       else:
5321         raise errors.OpExecError("Could not shutdown instance %s on"
5322                                  " node %s: %s" %
5323                                  (instance.name, source_node, msg))
5324
5325     feedback_fn("* deactivating the instance's disks on source node")
5326     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5327       raise errors.OpExecError("Can't shut down the instance's disks.")
5328
5329     instance.primary_node = target_node
5330     # distribute new instance config to the other nodes
5331     self.cfg.Update(instance, feedback_fn)
5332
5333     # Only start the instance if it's marked as up
5334     if instance.admin_up:
5335       feedback_fn("* activating the instance's disks on target node")
5336       logging.info("Starting instance %s on node %s",
5337                    instance.name, target_node)
5338
5339       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5340                                            ignore_secondaries=True)
5341       if not disks_ok:
5342         _ShutdownInstanceDisks(self, instance)
5343         raise errors.OpExecError("Can't activate the instance's disks")
5344
5345       feedback_fn("* starting the instance on the target node")
5346       result = self.rpc.call_instance_start(target_node, instance, None, None)
5347       msg = result.fail_msg
5348       if msg:
5349         _ShutdownInstanceDisks(self, instance)
5350         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5351                                  (instance.name, target_node, msg))
5352
5353
5354 class LUMigrateInstance(LogicalUnit):
5355   """Migrate an instance.
5356
5357   This is migration without shutting down, compared to the failover,
5358   which is done with shutdown.
5359
5360   """
5361   HPATH = "instance-migrate"
5362   HTYPE = constants.HTYPE_INSTANCE
5363   _OP_REQP = [
5364     ("instance_name", _TNonEmptyString),
5365     ("live", _TBool),
5366     ("cleanup", _TBool),
5367     ]
5368
5369   REQ_BGL = False
5370
5371   def ExpandNames(self):
5372     self._ExpandAndLockInstance()
5373
5374     self.needed_locks[locking.LEVEL_NODE] = []
5375     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5376
5377     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5378                                        self.op.live, self.op.cleanup)
5379     self.tasklets = [self._migrater]
5380
5381   def DeclareLocks(self, level):
5382     if level == locking.LEVEL_NODE:
5383       self._LockInstancesNodes()
5384
5385   def BuildHooksEnv(self):
5386     """Build hooks env.
5387
5388     This runs on master, primary and secondary nodes of the instance.
5389
5390     """
5391     instance = self._migrater.instance
5392     source_node = instance.primary_node
5393     target_node = instance.secondary_nodes[0]
5394     env = _BuildInstanceHookEnvByObject(self, instance)
5395     env["MIGRATE_LIVE"] = self.op.live
5396     env["MIGRATE_CLEANUP"] = self.op.cleanup
5397     env.update({
5398         "OLD_PRIMARY": source_node,
5399         "OLD_SECONDARY": target_node,
5400         "NEW_PRIMARY": target_node,
5401         "NEW_SECONDARY": source_node,
5402         })
5403     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5404     nl_post = list(nl)
5405     nl_post.append(source_node)
5406     return env, nl, nl_post
5407
5408
5409 class LUMoveInstance(LogicalUnit):
5410   """Move an instance by data-copying.
5411
5412   """
5413   HPATH = "instance-move"
5414   HTYPE = constants.HTYPE_INSTANCE
5415   _OP_REQP = [
5416     ("instance_name", _TNonEmptyString),
5417     ("target_node", _TNonEmptyString),
5418     ]
5419   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5420   REQ_BGL = False
5421
5422   def ExpandNames(self):
5423     self._ExpandAndLockInstance()
5424     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5425     self.op.target_node = target_node
5426     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5427     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5428
5429   def DeclareLocks(self, level):
5430     if level == locking.LEVEL_NODE:
5431       self._LockInstancesNodes(primary_only=True)
5432
5433   def BuildHooksEnv(self):
5434     """Build hooks env.
5435
5436     This runs on master, primary and secondary nodes of the instance.
5437
5438     """
5439     env = {
5440       "TARGET_NODE": self.op.target_node,
5441       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5442       }
5443     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5444     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5445                                        self.op.target_node]
5446     return env, nl, nl
5447
5448   def CheckPrereq(self):
5449     """Check prerequisites.
5450
5451     This checks that the instance is in the cluster.
5452
5453     """
5454     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5455     assert self.instance is not None, \
5456       "Cannot retrieve locked instance %s" % self.op.instance_name
5457
5458     node = self.cfg.GetNodeInfo(self.op.target_node)
5459     assert node is not None, \
5460       "Cannot retrieve locked node %s" % self.op.target_node
5461
5462     self.target_node = target_node = node.name
5463
5464     if target_node == instance.primary_node:
5465       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5466                                  (instance.name, target_node),
5467                                  errors.ECODE_STATE)
5468
5469     bep = self.cfg.GetClusterInfo().FillBE(instance)
5470
5471     for idx, dsk in enumerate(instance.disks):
5472       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5473         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5474                                    " cannot copy" % idx, errors.ECODE_STATE)
5475
5476     _CheckNodeOnline(self, target_node)
5477     _CheckNodeNotDrained(self, target_node)
5478
5479     if instance.admin_up:
5480       # check memory requirements on the secondary node
5481       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5482                            instance.name, bep[constants.BE_MEMORY],
5483                            instance.hypervisor)
5484     else:
5485       self.LogInfo("Not checking memory on the secondary node as"
5486                    " instance will not be started")
5487
5488     # check bridge existance
5489     _CheckInstanceBridgesExist(self, instance, node=target_node)
5490
5491   def Exec(self, feedback_fn):
5492     """Move an instance.
5493
5494     The move is done by shutting it down on its present node, copying
5495     the data over (slow) and starting it on the new node.
5496
5497     """
5498     instance = self.instance
5499
5500     source_node = instance.primary_node
5501     target_node = self.target_node
5502
5503     self.LogInfo("Shutting down instance %s on source node %s",
5504                  instance.name, source_node)
5505
5506     result = self.rpc.call_instance_shutdown(source_node, instance,
5507                                              self.op.shutdown_timeout)
5508     msg = result.fail_msg
5509     if msg:
5510       if self.op.ignore_consistency:
5511         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5512                              " Proceeding anyway. Please make sure node"
5513                              " %s is down. Error details: %s",
5514                              instance.name, source_node, source_node, msg)
5515       else:
5516         raise errors.OpExecError("Could not shutdown instance %s on"
5517                                  " node %s: %s" %
5518                                  (instance.name, source_node, msg))
5519
5520     # create the target disks
5521     try:
5522       _CreateDisks(self, instance, target_node=target_node)
5523     except errors.OpExecError:
5524       self.LogWarning("Device creation failed, reverting...")
5525       try:
5526         _RemoveDisks(self, instance, target_node=target_node)
5527       finally:
5528         self.cfg.ReleaseDRBDMinors(instance.name)
5529         raise
5530
5531     cluster_name = self.cfg.GetClusterInfo().cluster_name
5532
5533     errs = []
5534     # activate, get path, copy the data over
5535     for idx, disk in enumerate(instance.disks):
5536       self.LogInfo("Copying data for disk %d", idx)
5537       result = self.rpc.call_blockdev_assemble(target_node, disk,
5538                                                instance.name, True)
5539       if result.fail_msg:
5540         self.LogWarning("Can't assemble newly created disk %d: %s",
5541                         idx, result.fail_msg)
5542         errs.append(result.fail_msg)
5543         break
5544       dev_path = result.payload
5545       result = self.rpc.call_blockdev_export(source_node, disk,
5546                                              target_node, dev_path,
5547                                              cluster_name)
5548       if result.fail_msg:
5549         self.LogWarning("Can't copy data over for disk %d: %s",
5550                         idx, result.fail_msg)
5551         errs.append(result.fail_msg)
5552         break
5553
5554     if errs:
5555       self.LogWarning("Some disks failed to copy, aborting")
5556       try:
5557         _RemoveDisks(self, instance, target_node=target_node)
5558       finally:
5559         self.cfg.ReleaseDRBDMinors(instance.name)
5560         raise errors.OpExecError("Errors during disk copy: %s" %
5561                                  (",".join(errs),))
5562
5563     instance.primary_node = target_node
5564     self.cfg.Update(instance, feedback_fn)
5565
5566     self.LogInfo("Removing the disks on the original node")
5567     _RemoveDisks(self, instance, target_node=source_node)
5568
5569     # Only start the instance if it's marked as up
5570     if instance.admin_up:
5571       self.LogInfo("Starting instance %s on node %s",
5572                    instance.name, target_node)
5573
5574       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5575                                            ignore_secondaries=True)
5576       if not disks_ok:
5577         _ShutdownInstanceDisks(self, instance)
5578         raise errors.OpExecError("Can't activate the instance's disks")
5579
5580       result = self.rpc.call_instance_start(target_node, instance, None, None)
5581       msg = result.fail_msg
5582       if msg:
5583         _ShutdownInstanceDisks(self, instance)
5584         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5585                                  (instance.name, target_node, msg))
5586
5587
5588 class LUMigrateNode(LogicalUnit):
5589   """Migrate all instances from a node.
5590
5591   """
5592   HPATH = "node-migrate"
5593   HTYPE = constants.HTYPE_NODE
5594   _OP_REQP = [
5595     ("node_name", _TNonEmptyString),
5596     ("live", _TBool),
5597     ]
5598   REQ_BGL = False
5599
5600   def ExpandNames(self):
5601     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5602
5603     self.needed_locks = {
5604       locking.LEVEL_NODE: [self.op.node_name],
5605       }
5606
5607     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5608
5609     # Create tasklets for migrating instances for all instances on this node
5610     names = []
5611     tasklets = []
5612
5613     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5614       logging.debug("Migrating instance %s", inst.name)
5615       names.append(inst.name)
5616
5617       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5618
5619     self.tasklets = tasklets
5620
5621     # Declare instance locks
5622     self.needed_locks[locking.LEVEL_INSTANCE] = names
5623
5624   def DeclareLocks(self, level):
5625     if level == locking.LEVEL_NODE:
5626       self._LockInstancesNodes()
5627
5628   def BuildHooksEnv(self):
5629     """Build hooks env.
5630
5631     This runs on the master, the primary and all the secondaries.
5632
5633     """
5634     env = {
5635       "NODE_NAME": self.op.node_name,
5636       }
5637
5638     nl = [self.cfg.GetMasterNode()]
5639
5640     return (env, nl, nl)
5641
5642
5643 class TLMigrateInstance(Tasklet):
5644   def __init__(self, lu, instance_name, live, cleanup):
5645     """Initializes this class.
5646
5647     """
5648     Tasklet.__init__(self, lu)
5649
5650     # Parameters
5651     self.instance_name = instance_name
5652     self.live = live
5653     self.cleanup = cleanup
5654
5655   def CheckPrereq(self):
5656     """Check prerequisites.
5657
5658     This checks that the instance is in the cluster.
5659
5660     """
5661     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5662     instance = self.cfg.GetInstanceInfo(instance_name)
5663     assert instance is not None
5664
5665     if instance.disk_template != constants.DT_DRBD8:
5666       raise errors.OpPrereqError("Instance's disk layout is not"
5667                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5668
5669     secondary_nodes = instance.secondary_nodes
5670     if not secondary_nodes:
5671       raise errors.ConfigurationError("No secondary node but using"
5672                                       " drbd8 disk template")
5673
5674     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5675
5676     target_node = secondary_nodes[0]
5677     # check memory requirements on the secondary node
5678     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5679                          instance.name, i_be[constants.BE_MEMORY],
5680                          instance.hypervisor)
5681
5682     # check bridge existance
5683     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5684
5685     if not self.cleanup:
5686       _CheckNodeNotDrained(self.lu, target_node)
5687       result = self.rpc.call_instance_migratable(instance.primary_node,
5688                                                  instance)
5689       result.Raise("Can't migrate, please use failover",
5690                    prereq=True, ecode=errors.ECODE_STATE)
5691
5692     self.instance = instance
5693
5694   def _WaitUntilSync(self):
5695     """Poll with custom rpc for disk sync.
5696
5697     This uses our own step-based rpc call.
5698
5699     """
5700     self.feedback_fn("* wait until resync is done")
5701     all_done = False
5702     while not all_done:
5703       all_done = True
5704       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5705                                             self.nodes_ip,
5706                                             self.instance.disks)
5707       min_percent = 100
5708       for node, nres in result.items():
5709         nres.Raise("Cannot resync disks on node %s" % node)
5710         node_done, node_percent = nres.payload
5711         all_done = all_done and node_done
5712         if node_percent is not None:
5713           min_percent = min(min_percent, node_percent)
5714       if not all_done:
5715         if min_percent < 100:
5716           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5717         time.sleep(2)
5718
5719   def _EnsureSecondary(self, node):
5720     """Demote a node to secondary.
5721
5722     """
5723     self.feedback_fn("* switching node %s to secondary mode" % node)
5724
5725     for dev in self.instance.disks:
5726       self.cfg.SetDiskID(dev, node)
5727
5728     result = self.rpc.call_blockdev_close(node, self.instance.name,
5729                                           self.instance.disks)
5730     result.Raise("Cannot change disk to secondary on node %s" % node)
5731
5732   def _GoStandalone(self):
5733     """Disconnect from the network.
5734
5735     """
5736     self.feedback_fn("* changing into standalone mode")
5737     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5738                                                self.instance.disks)
5739     for node, nres in result.items():
5740       nres.Raise("Cannot disconnect disks node %s" % node)
5741
5742   def _GoReconnect(self, multimaster):
5743     """Reconnect to the network.
5744
5745     """
5746     if multimaster:
5747       msg = "dual-master"
5748     else:
5749       msg = "single-master"
5750     self.feedback_fn("* changing disks into %s mode" % msg)
5751     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5752                                            self.instance.disks,
5753                                            self.instance.name, multimaster)
5754     for node, nres in result.items():
5755       nres.Raise("Cannot change disks config on node %s" % node)
5756
5757   def _ExecCleanup(self):
5758     """Try to cleanup after a failed migration.
5759
5760     The cleanup is done by:
5761       - check that the instance is running only on one node
5762         (and update the config if needed)
5763       - change disks on its secondary node to secondary
5764       - wait until disks are fully synchronized
5765       - disconnect from the network
5766       - change disks into single-master mode
5767       - wait again until disks are fully synchronized
5768
5769     """
5770     instance = self.instance
5771     target_node = self.target_node
5772     source_node = self.source_node
5773
5774     # check running on only one node
5775     self.feedback_fn("* checking where the instance actually runs"
5776                      " (if this hangs, the hypervisor might be in"
5777                      " a bad state)")
5778     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5779     for node, result in ins_l.items():
5780       result.Raise("Can't contact node %s" % node)
5781
5782     runningon_source = instance.name in ins_l[source_node].payload
5783     runningon_target = instance.name in ins_l[target_node].payload
5784
5785     if runningon_source and runningon_target:
5786       raise errors.OpExecError("Instance seems to be running on two nodes,"
5787                                " or the hypervisor is confused. You will have"
5788                                " to ensure manually that it runs only on one"
5789                                " and restart this operation.")
5790
5791     if not (runningon_source or runningon_target):
5792       raise errors.OpExecError("Instance does not seem to be running at all."
5793                                " In this case, it's safer to repair by"
5794                                " running 'gnt-instance stop' to ensure disk"
5795                                " shutdown, and then restarting it.")
5796
5797     if runningon_target:
5798       # the migration has actually succeeded, we need to update the config
5799       self.feedback_fn("* instance running on secondary node (%s),"
5800                        " updating config" % target_node)
5801       instance.primary_node = target_node
5802       self.cfg.Update(instance, self.feedback_fn)
5803       demoted_node = source_node
5804     else:
5805       self.feedback_fn("* instance confirmed to be running on its"
5806                        " primary node (%s)" % source_node)
5807       demoted_node = target_node
5808
5809     self._EnsureSecondary(demoted_node)
5810     try:
5811       self._WaitUntilSync()
5812     except errors.OpExecError:
5813       # we ignore here errors, since if the device is standalone, it
5814       # won't be able to sync
5815       pass
5816     self._GoStandalone()
5817     self._GoReconnect(False)
5818     self._WaitUntilSync()
5819
5820     self.feedback_fn("* done")
5821
5822   def _RevertDiskStatus(self):
5823     """Try to revert the disk status after a failed migration.
5824
5825     """
5826     target_node = self.target_node
5827     try:
5828       self._EnsureSecondary(target_node)
5829       self._GoStandalone()
5830       self._GoReconnect(False)
5831       self._WaitUntilSync()
5832     except errors.OpExecError, err:
5833       self.lu.LogWarning("Migration failed and I can't reconnect the"
5834                          " drives: error '%s'\n"
5835                          "Please look and recover the instance status" %
5836                          str(err))
5837
5838   def _AbortMigration(self):
5839     """Call the hypervisor code to abort a started migration.
5840
5841     """
5842     instance = self.instance
5843     target_node = self.target_node
5844     migration_info = self.migration_info
5845
5846     abort_result = self.rpc.call_finalize_migration(target_node,
5847                                                     instance,
5848                                                     migration_info,
5849                                                     False)
5850     abort_msg = abort_result.fail_msg
5851     if abort_msg:
5852       logging.error("Aborting migration failed on target node %s: %s",
5853                     target_node, abort_msg)
5854       # Don't raise an exception here, as we stil have to try to revert the
5855       # disk status, even if this step failed.
5856
5857   def _ExecMigration(self):
5858     """Migrate an instance.
5859
5860     The migrate is done by:
5861       - change the disks into dual-master mode
5862       - wait until disks are fully synchronized again
5863       - migrate the instance
5864       - change disks on the new secondary node (the old primary) to secondary
5865       - wait until disks are fully synchronized
5866       - change disks into single-master mode
5867
5868     """
5869     instance = self.instance
5870     target_node = self.target_node
5871     source_node = self.source_node
5872
5873     self.feedback_fn("* checking disk consistency between source and target")
5874     for dev in instance.disks:
5875       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5876         raise errors.OpExecError("Disk %s is degraded or not fully"
5877                                  " synchronized on target node,"
5878                                  " aborting migrate." % dev.iv_name)
5879
5880     # First get the migration information from the remote node
5881     result = self.rpc.call_migration_info(source_node, instance)
5882     msg = result.fail_msg
5883     if msg:
5884       log_err = ("Failed fetching source migration information from %s: %s" %
5885                  (source_node, msg))
5886       logging.error(log_err)
5887       raise errors.OpExecError(log_err)
5888
5889     self.migration_info = migration_info = result.payload
5890
5891     # Then switch the disks to master/master mode
5892     self._EnsureSecondary(target_node)
5893     self._GoStandalone()
5894     self._GoReconnect(True)
5895     self._WaitUntilSync()
5896
5897     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5898     result = self.rpc.call_accept_instance(target_node,
5899                                            instance,
5900                                            migration_info,
5901                                            self.nodes_ip[target_node])
5902
5903     msg = result.fail_msg
5904     if msg:
5905       logging.error("Instance pre-migration failed, trying to revert"
5906                     " disk status: %s", msg)
5907       self.feedback_fn("Pre-migration failed, aborting")
5908       self._AbortMigration()
5909       self._RevertDiskStatus()
5910       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5911                                (instance.name, msg))
5912
5913     self.feedback_fn("* migrating instance to %s" % target_node)
5914     time.sleep(10)
5915     result = self.rpc.call_instance_migrate(source_node, instance,
5916                                             self.nodes_ip[target_node],
5917                                             self.live)
5918     msg = result.fail_msg
5919     if msg:
5920       logging.error("Instance migration failed, trying to revert"
5921                     " disk status: %s", msg)
5922       self.feedback_fn("Migration failed, aborting")
5923       self._AbortMigration()
5924       self._RevertDiskStatus()
5925       raise errors.OpExecError("Could not migrate instance %s: %s" %
5926                                (instance.name, msg))
5927     time.sleep(10)
5928
5929     instance.primary_node = target_node
5930     # distribute new instance config to the other nodes
5931     self.cfg.Update(instance, self.feedback_fn)
5932
5933     result = self.rpc.call_finalize_migration(target_node,
5934                                               instance,
5935                                               migration_info,
5936                                               True)
5937     msg = result.fail_msg
5938     if msg:
5939       logging.error("Instance migration succeeded, but finalization failed:"
5940                     " %s", msg)
5941       raise errors.OpExecError("Could not finalize instance migration: %s" %
5942                                msg)
5943
5944     self._EnsureSecondary(source_node)
5945     self._WaitUntilSync()
5946     self._GoStandalone()
5947     self._GoReconnect(False)
5948     self._WaitUntilSync()
5949
5950     self.feedback_fn("* done")
5951
5952   def Exec(self, feedback_fn):
5953     """Perform the migration.
5954
5955     """
5956     feedback_fn("Migrating instance %s" % self.instance.name)
5957
5958     self.feedback_fn = feedback_fn
5959
5960     self.source_node = self.instance.primary_node
5961     self.target_node = self.instance.secondary_nodes[0]
5962     self.all_nodes = [self.source_node, self.target_node]
5963     self.nodes_ip = {
5964       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5965       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5966       }
5967
5968     if self.cleanup:
5969       return self._ExecCleanup()
5970     else:
5971       return self._ExecMigration()
5972
5973
5974 def _CreateBlockDev(lu, node, instance, device, force_create,
5975                     info, force_open):
5976   """Create a tree of block devices on a given node.
5977
5978   If this device type has to be created on secondaries, create it and
5979   all its children.
5980
5981   If not, just recurse to children keeping the same 'force' value.
5982
5983   @param lu: the lu on whose behalf we execute
5984   @param node: the node on which to create the device
5985   @type instance: L{objects.Instance}
5986   @param instance: the instance which owns the device
5987   @type device: L{objects.Disk}
5988   @param device: the device to create
5989   @type force_create: boolean
5990   @param force_create: whether to force creation of this device; this
5991       will be change to True whenever we find a device which has
5992       CreateOnSecondary() attribute
5993   @param info: the extra 'metadata' we should attach to the device
5994       (this will be represented as a LVM tag)
5995   @type force_open: boolean
5996   @param force_open: this parameter will be passes to the
5997       L{backend.BlockdevCreate} function where it specifies
5998       whether we run on primary or not, and it affects both
5999       the child assembly and the device own Open() execution
6000
6001   """
6002   if device.CreateOnSecondary():
6003     force_create = True
6004
6005   if device.children:
6006     for child in device.children:
6007       _CreateBlockDev(lu, node, instance, child, force_create,
6008                       info, force_open)
6009
6010   if not force_create:
6011     return
6012
6013   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6014
6015
6016 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6017   """Create a single block device on a given node.
6018
6019   This will not recurse over children of the device, so they must be
6020   created in advance.
6021
6022   @param lu: the lu on whose behalf we execute
6023   @param node: the node on which to create the device
6024   @type instance: L{objects.Instance}
6025   @param instance: the instance which owns the device
6026   @type device: L{objects.Disk}
6027   @param device: the device to create
6028   @param info: the extra 'metadata' we should attach to the device
6029       (this will be represented as a LVM tag)
6030   @type force_open: boolean
6031   @param force_open: this parameter will be passes to the
6032       L{backend.BlockdevCreate} function where it specifies
6033       whether we run on primary or not, and it affects both
6034       the child assembly and the device own Open() execution
6035
6036   """
6037   lu.cfg.SetDiskID(device, node)
6038   result = lu.rpc.call_blockdev_create(node, device, device.size,
6039                                        instance.name, force_open, info)
6040   result.Raise("Can't create block device %s on"
6041                " node %s for instance %s" % (device, node, instance.name))
6042   if device.physical_id is None:
6043     device.physical_id = result.payload
6044
6045
6046 def _GenerateUniqueNames(lu, exts):
6047   """Generate a suitable LV name.
6048
6049   This will generate a logical volume name for the given instance.
6050
6051   """
6052   results = []
6053   for val in exts:
6054     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6055     results.append("%s%s" % (new_id, val))
6056   return results
6057
6058
6059 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6060                          p_minor, s_minor):
6061   """Generate a drbd8 device complete with its children.
6062
6063   """
6064   port = lu.cfg.AllocatePort()
6065   vgname = lu.cfg.GetVGName()
6066   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6067   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6068                           logical_id=(vgname, names[0]))
6069   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6070                           logical_id=(vgname, names[1]))
6071   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6072                           logical_id=(primary, secondary, port,
6073                                       p_minor, s_minor,
6074                                       shared_secret),
6075                           children=[dev_data, dev_meta],
6076                           iv_name=iv_name)
6077   return drbd_dev
6078
6079
6080 def _GenerateDiskTemplate(lu, template_name,
6081                           instance_name, primary_node,
6082                           secondary_nodes, disk_info,
6083                           file_storage_dir, file_driver,
6084                           base_index):
6085   """Generate the entire disk layout for a given template type.
6086
6087   """
6088   #TODO: compute space requirements
6089
6090   vgname = lu.cfg.GetVGName()
6091   disk_count = len(disk_info)
6092   disks = []
6093   if template_name == constants.DT_DISKLESS:
6094     pass
6095   elif template_name == constants.DT_PLAIN:
6096     if len(secondary_nodes) != 0:
6097       raise errors.ProgrammerError("Wrong template configuration")
6098
6099     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6100                                       for i in range(disk_count)])
6101     for idx, disk in enumerate(disk_info):
6102       disk_index = idx + base_index
6103       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6104                               logical_id=(vgname, names[idx]),
6105                               iv_name="disk/%d" % disk_index,
6106                               mode=disk["mode"])
6107       disks.append(disk_dev)
6108   elif template_name == constants.DT_DRBD8:
6109     if len(secondary_nodes) != 1:
6110       raise errors.ProgrammerError("Wrong template configuration")
6111     remote_node = secondary_nodes[0]
6112     minors = lu.cfg.AllocateDRBDMinor(
6113       [primary_node, remote_node] * len(disk_info), instance_name)
6114
6115     names = []
6116     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6117                                                for i in range(disk_count)]):
6118       names.append(lv_prefix + "_data")
6119       names.append(lv_prefix + "_meta")
6120     for idx, disk in enumerate(disk_info):
6121       disk_index = idx + base_index
6122       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6123                                       disk["size"], names[idx*2:idx*2+2],
6124                                       "disk/%d" % disk_index,
6125                                       minors[idx*2], minors[idx*2+1])
6126       disk_dev.mode = disk["mode"]
6127       disks.append(disk_dev)
6128   elif template_name == constants.DT_FILE:
6129     if len(secondary_nodes) != 0:
6130       raise errors.ProgrammerError("Wrong template configuration")
6131
6132     _RequireFileStorage()
6133
6134     for idx, disk in enumerate(disk_info):
6135       disk_index = idx + base_index
6136       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6137                               iv_name="disk/%d" % disk_index,
6138                               logical_id=(file_driver,
6139                                           "%s/disk%d" % (file_storage_dir,
6140                                                          disk_index)),
6141                               mode=disk["mode"])
6142       disks.append(disk_dev)
6143   else:
6144     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6145   return disks
6146
6147
6148 def _GetInstanceInfoText(instance):
6149   """Compute that text that should be added to the disk's metadata.
6150
6151   """
6152   return "originstname+%s" % instance.name
6153
6154
6155 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6156   """Create all disks for an instance.
6157
6158   This abstracts away some work from AddInstance.
6159
6160   @type lu: L{LogicalUnit}
6161   @param lu: the logical unit on whose behalf we execute
6162   @type instance: L{objects.Instance}
6163   @param instance: the instance whose disks we should create
6164   @type to_skip: list
6165   @param to_skip: list of indices to skip
6166   @type target_node: string
6167   @param target_node: if passed, overrides the target node for creation
6168   @rtype: boolean
6169   @return: the success of the creation
6170
6171   """
6172   info = _GetInstanceInfoText(instance)
6173   if target_node is None:
6174     pnode = instance.primary_node
6175     all_nodes = instance.all_nodes
6176   else:
6177     pnode = target_node
6178     all_nodes = [pnode]
6179
6180   if instance.disk_template == constants.DT_FILE:
6181     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6182     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6183
6184     result.Raise("Failed to create directory '%s' on"
6185                  " node %s" % (file_storage_dir, pnode))
6186
6187   # Note: this needs to be kept in sync with adding of disks in
6188   # LUSetInstanceParams
6189   for idx, device in enumerate(instance.disks):
6190     if to_skip and idx in to_skip:
6191       continue
6192     logging.info("Creating volume %s for instance %s",
6193                  device.iv_name, instance.name)
6194     #HARDCODE
6195     for node in all_nodes:
6196       f_create = node == pnode
6197       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6198
6199
6200 def _RemoveDisks(lu, instance, target_node=None):
6201   """Remove all disks for an instance.
6202
6203   This abstracts away some work from `AddInstance()` and
6204   `RemoveInstance()`. Note that in case some of the devices couldn't
6205   be removed, the removal will continue with the other ones (compare
6206   with `_CreateDisks()`).
6207
6208   @type lu: L{LogicalUnit}
6209   @param lu: the logical unit on whose behalf we execute
6210   @type instance: L{objects.Instance}
6211   @param instance: the instance whose disks we should remove
6212   @type target_node: string
6213   @param target_node: used to override the node on which to remove the disks
6214   @rtype: boolean
6215   @return: the success of the removal
6216
6217   """
6218   logging.info("Removing block devices for instance %s", instance.name)
6219
6220   all_result = True
6221   for device in instance.disks:
6222     if target_node:
6223       edata = [(target_node, device)]
6224     else:
6225       edata = device.ComputeNodeTree(instance.primary_node)
6226     for node, disk in edata:
6227       lu.cfg.SetDiskID(disk, node)
6228       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6229       if msg:
6230         lu.LogWarning("Could not remove block device %s on node %s,"
6231                       " continuing anyway: %s", device.iv_name, node, msg)
6232         all_result = False
6233
6234   if instance.disk_template == constants.DT_FILE:
6235     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6236     if target_node:
6237       tgt = target_node
6238     else:
6239       tgt = instance.primary_node
6240     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6241     if result.fail_msg:
6242       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6243                     file_storage_dir, instance.primary_node, result.fail_msg)
6244       all_result = False
6245
6246   return all_result
6247
6248
6249 def _ComputeDiskSize(disk_template, disks):
6250   """Compute disk size requirements in the volume group
6251
6252   """
6253   # Required free disk space as a function of disk and swap space
6254   req_size_dict = {
6255     constants.DT_DISKLESS: None,
6256     constants.DT_PLAIN: sum(d["size"] for d in disks),
6257     # 128 MB are added for drbd metadata for each disk
6258     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6259     constants.DT_FILE: None,
6260   }
6261
6262   if disk_template not in req_size_dict:
6263     raise errors.ProgrammerError("Disk template '%s' size requirement"
6264                                  " is unknown" %  disk_template)
6265
6266   return req_size_dict[disk_template]
6267
6268
6269 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6270   """Hypervisor parameter validation.
6271
6272   This function abstract the hypervisor parameter validation to be
6273   used in both instance create and instance modify.
6274
6275   @type lu: L{LogicalUnit}
6276   @param lu: the logical unit for which we check
6277   @type nodenames: list
6278   @param nodenames: the list of nodes on which we should check
6279   @type hvname: string
6280   @param hvname: the name of the hypervisor we should use
6281   @type hvparams: dict
6282   @param hvparams: the parameters which we need to check
6283   @raise errors.OpPrereqError: if the parameters are not valid
6284
6285   """
6286   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6287                                                   hvname,
6288                                                   hvparams)
6289   for node in nodenames:
6290     info = hvinfo[node]
6291     if info.offline:
6292       continue
6293     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6294
6295
6296 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6297   """OS parameters validation.
6298
6299   @type lu: L{LogicalUnit}
6300   @param lu: the logical unit for which we check
6301   @type required: boolean
6302   @param required: whether the validation should fail if the OS is not
6303       found
6304   @type nodenames: list
6305   @param nodenames: the list of nodes on which we should check
6306   @type osname: string
6307   @param osname: the name of the hypervisor we should use
6308   @type osparams: dict
6309   @param osparams: the parameters which we need to check
6310   @raise errors.OpPrereqError: if the parameters are not valid
6311
6312   """
6313   result = lu.rpc.call_os_validate(required, nodenames, osname,
6314                                    [constants.OS_VALIDATE_PARAMETERS],
6315                                    osparams)
6316   for node, nres in result.items():
6317     # we don't check for offline cases since this should be run only
6318     # against the master node and/or an instance's nodes
6319     nres.Raise("OS Parameters validation failed on node %s" % node)
6320     if not nres.payload:
6321       lu.LogInfo("OS %s not found on node %s, validation skipped",
6322                  osname, node)
6323
6324
6325 class LUCreateInstance(LogicalUnit):
6326   """Create an instance.
6327
6328   """
6329   HPATH = "instance-add"
6330   HTYPE = constants.HTYPE_INSTANCE
6331   _OP_REQP = [
6332     ("instance_name", _TNonEmptyString),
6333     ("mode", _TElemOf(constants.INSTANCE_CREATE_MODES)),
6334     ("start", _TBool),
6335     ("wait_for_sync", _TBool),
6336     ("ip_check", _TBool),
6337     ("disks", _TListOf(_TDict)),
6338     ("nics", _TListOf(_TDict)),
6339     ("hvparams", _TDict),
6340     ("beparams", _TDict),
6341     ("osparams", _TDict),
6342     ]
6343   _OP_DEFS = [
6344     ("name_check", True),
6345     ("no_install", False),
6346     ("os_type", None),
6347     ("force_variant", False),
6348     ("source_handshake", None),
6349     ("source_x509_ca", None),
6350     ("source_instance_name", None),
6351     ("src_node", None),
6352     ("src_path", None),
6353     ("pnode", None),
6354     ("snode", None),
6355     ("iallocator", None),
6356     ("hypervisor", None),
6357     ("disk_template", None),
6358     ("identify_defaults", None),
6359     ]
6360   REQ_BGL = False
6361
6362   def CheckArguments(self):
6363     """Check arguments.
6364
6365     """
6366     # do not require name_check to ease forward/backward compatibility
6367     # for tools
6368     if self.op.no_install and self.op.start:
6369       self.LogInfo("No-installation mode selected, disabling startup")
6370       self.op.start = False
6371     # validate/normalize the instance name
6372     self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6373     if self.op.ip_check and not self.op.name_check:
6374       # TODO: make the ip check more flexible and not depend on the name check
6375       raise errors.OpPrereqError("Cannot do ip checks without a name check",
6376                                  errors.ECODE_INVAL)
6377
6378     # check nics' parameter names
6379     for nic in self.op.nics:
6380       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6381
6382     # check disks. parameter names and consistent adopt/no-adopt strategy
6383     has_adopt = has_no_adopt = False
6384     for disk in self.op.disks:
6385       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6386       if "adopt" in disk:
6387         has_adopt = True
6388       else:
6389         has_no_adopt = True
6390     if has_adopt and has_no_adopt:
6391       raise errors.OpPrereqError("Either all disks are adopted or none is",
6392                                  errors.ECODE_INVAL)
6393     if has_adopt:
6394       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6395         raise errors.OpPrereqError("Disk adoption is not supported for the"
6396                                    " '%s' disk template" %
6397                                    self.op.disk_template,
6398                                    errors.ECODE_INVAL)
6399       if self.op.iallocator is not None:
6400         raise errors.OpPrereqError("Disk adoption not allowed with an"
6401                                    " iallocator script", errors.ECODE_INVAL)
6402       if self.op.mode == constants.INSTANCE_IMPORT:
6403         raise errors.OpPrereqError("Disk adoption not allowed for"
6404                                    " instance import", errors.ECODE_INVAL)
6405
6406     self.adopt_disks = has_adopt
6407
6408     # instance name verification
6409     if self.op.name_check:
6410       self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6411       self.op.instance_name = self.hostname1.name
6412       # used in CheckPrereq for ip ping check
6413       self.check_ip = self.hostname1.ip
6414     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6415       raise errors.OpPrereqError("Remote imports require names to be checked" %
6416                                  errors.ECODE_INVAL)
6417     else:
6418       self.check_ip = None
6419
6420     # file storage checks
6421     if (self.op.file_driver and
6422         not self.op.file_driver in constants.FILE_DRIVER):
6423       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6424                                  self.op.file_driver, errors.ECODE_INVAL)
6425
6426     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6427       raise errors.OpPrereqError("File storage directory path not absolute",
6428                                  errors.ECODE_INVAL)
6429
6430     ### Node/iallocator related checks
6431     if [self.op.iallocator, self.op.pnode].count(None) != 1:
6432       raise errors.OpPrereqError("One and only one of iallocator and primary"
6433                                  " node must be given",
6434                                  errors.ECODE_INVAL)
6435
6436     self._cds = _GetClusterDomainSecret()
6437
6438     if self.op.mode == constants.INSTANCE_IMPORT:
6439       # On import force_variant must be True, because if we forced it at
6440       # initial install, our only chance when importing it back is that it
6441       # works again!
6442       self.op.force_variant = True
6443
6444       if self.op.no_install:
6445         self.LogInfo("No-installation mode has no effect during import")
6446
6447     elif self.op.mode == constants.INSTANCE_CREATE:
6448       if self.op.os_type is None:
6449         raise errors.OpPrereqError("No guest OS specified",
6450                                    errors.ECODE_INVAL)
6451       if self.op.disk_template is None:
6452         raise errors.OpPrereqError("No disk template specified",
6453                                    errors.ECODE_INVAL)
6454
6455     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6456       # Check handshake to ensure both clusters have the same domain secret
6457       src_handshake = self.op.source_handshake
6458       if not src_handshake:
6459         raise errors.OpPrereqError("Missing source handshake",
6460                                    errors.ECODE_INVAL)
6461
6462       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6463                                                            src_handshake)
6464       if errmsg:
6465         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6466                                    errors.ECODE_INVAL)
6467
6468       # Load and check source CA
6469       self.source_x509_ca_pem = self.op.source_x509_ca
6470       if not self.source_x509_ca_pem:
6471         raise errors.OpPrereqError("Missing source X509 CA",
6472                                    errors.ECODE_INVAL)
6473
6474       try:
6475         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6476                                                     self._cds)
6477       except OpenSSL.crypto.Error, err:
6478         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6479                                    (err, ), errors.ECODE_INVAL)
6480
6481       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6482       if errcode is not None:
6483         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6484                                    errors.ECODE_INVAL)
6485
6486       self.source_x509_ca = cert
6487
6488       src_instance_name = self.op.source_instance_name
6489       if not src_instance_name:
6490         raise errors.OpPrereqError("Missing source instance name",
6491                                    errors.ECODE_INVAL)
6492
6493       self.source_instance_name = \
6494         utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6495
6496     else:
6497       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6498                                  self.op.mode, errors.ECODE_INVAL)
6499
6500   def ExpandNames(self):
6501     """ExpandNames for CreateInstance.
6502
6503     Figure out the right locks for instance creation.
6504
6505     """
6506     self.needed_locks = {}
6507
6508     instance_name = self.op.instance_name
6509     # this is just a preventive check, but someone might still add this
6510     # instance in the meantime, and creation will fail at lock-add time
6511     if instance_name in self.cfg.GetInstanceList():
6512       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6513                                  instance_name, errors.ECODE_EXISTS)
6514
6515     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6516
6517     if self.op.iallocator:
6518       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6519     else:
6520       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6521       nodelist = [self.op.pnode]
6522       if self.op.snode is not None:
6523         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6524         nodelist.append(self.op.snode)
6525       self.needed_locks[locking.LEVEL_NODE] = nodelist
6526
6527     # in case of import lock the source node too
6528     if self.op.mode == constants.INSTANCE_IMPORT:
6529       src_node = self.op.src_node
6530       src_path = self.op.src_path
6531
6532       if src_path is None:
6533         self.op.src_path = src_path = self.op.instance_name
6534
6535       if src_node is None:
6536         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6537         self.op.src_node = None
6538         if os.path.isabs(src_path):
6539           raise errors.OpPrereqError("Importing an instance from an absolute"
6540                                      " path requires a source node option.",
6541                                      errors.ECODE_INVAL)
6542       else:
6543         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6544         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6545           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6546         if not os.path.isabs(src_path):
6547           self.op.src_path = src_path = \
6548             utils.PathJoin(constants.EXPORT_DIR, src_path)
6549
6550   def _RunAllocator(self):
6551     """Run the allocator based on input opcode.
6552
6553     """
6554     nics = [n.ToDict() for n in self.nics]
6555     ial = IAllocator(self.cfg, self.rpc,
6556                      mode=constants.IALLOCATOR_MODE_ALLOC,
6557                      name=self.op.instance_name,
6558                      disk_template=self.op.disk_template,
6559                      tags=[],
6560                      os=self.op.os_type,
6561                      vcpus=self.be_full[constants.BE_VCPUS],
6562                      mem_size=self.be_full[constants.BE_MEMORY],
6563                      disks=self.disks,
6564                      nics=nics,
6565                      hypervisor=self.op.hypervisor,
6566                      )
6567
6568     ial.Run(self.op.iallocator)
6569
6570     if not ial.success:
6571       raise errors.OpPrereqError("Can't compute nodes using"
6572                                  " iallocator '%s': %s" %
6573                                  (self.op.iallocator, ial.info),
6574                                  errors.ECODE_NORES)
6575     if len(ial.result) != ial.required_nodes:
6576       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6577                                  " of nodes (%s), required %s" %
6578                                  (self.op.iallocator, len(ial.result),
6579                                   ial.required_nodes), errors.ECODE_FAULT)
6580     self.op.pnode = ial.result[0]
6581     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6582                  self.op.instance_name, self.op.iallocator,
6583                  utils.CommaJoin(ial.result))
6584     if ial.required_nodes == 2:
6585       self.op.snode = ial.result[1]
6586
6587   def BuildHooksEnv(self):
6588     """Build hooks env.
6589
6590     This runs on master, primary and secondary nodes of the instance.
6591
6592     """
6593     env = {
6594       "ADD_MODE": self.op.mode,
6595       }
6596     if self.op.mode == constants.INSTANCE_IMPORT:
6597       env["SRC_NODE"] = self.op.src_node
6598       env["SRC_PATH"] = self.op.src_path
6599       env["SRC_IMAGES"] = self.src_images
6600
6601     env.update(_BuildInstanceHookEnv(
6602       name=self.op.instance_name,
6603       primary_node=self.op.pnode,
6604       secondary_nodes=self.secondaries,
6605       status=self.op.start,
6606       os_type=self.op.os_type,
6607       memory=self.be_full[constants.BE_MEMORY],
6608       vcpus=self.be_full[constants.BE_VCPUS],
6609       nics=_NICListToTuple(self, self.nics),
6610       disk_template=self.op.disk_template,
6611       disks=[(d["size"], d["mode"]) for d in self.disks],
6612       bep=self.be_full,
6613       hvp=self.hv_full,
6614       hypervisor_name=self.op.hypervisor,
6615     ))
6616
6617     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6618           self.secondaries)
6619     return env, nl, nl
6620
6621   def _ReadExportInfo(self):
6622     """Reads the export information from disk.
6623
6624     It will override the opcode source node and path with the actual
6625     information, if these two were not specified before.
6626
6627     @return: the export information
6628
6629     """
6630     assert self.op.mode == constants.INSTANCE_IMPORT
6631
6632     src_node = self.op.src_node
6633     src_path = self.op.src_path
6634
6635     if src_node is None:
6636       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6637       exp_list = self.rpc.call_export_list(locked_nodes)
6638       found = False
6639       for node in exp_list:
6640         if exp_list[node].fail_msg:
6641           continue
6642         if src_path in exp_list[node].payload:
6643           found = True
6644           self.op.src_node = src_node = node
6645           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6646                                                        src_path)
6647           break
6648       if not found:
6649         raise errors.OpPrereqError("No export found for relative path %s" %
6650                                     src_path, errors.ECODE_INVAL)
6651
6652     _CheckNodeOnline(self, src_node)
6653     result = self.rpc.call_export_info(src_node, src_path)
6654     result.Raise("No export or invalid export found in dir %s" % src_path)
6655
6656     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6657     if not export_info.has_section(constants.INISECT_EXP):
6658       raise errors.ProgrammerError("Corrupted export config",
6659                                    errors.ECODE_ENVIRON)
6660
6661     ei_version = export_info.get(constants.INISECT_EXP, "version")
6662     if (int(ei_version) != constants.EXPORT_VERSION):
6663       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6664                                  (ei_version, constants.EXPORT_VERSION),
6665                                  errors.ECODE_ENVIRON)
6666     return export_info
6667
6668   def _ReadExportParams(self, einfo):
6669     """Use export parameters as defaults.
6670
6671     In case the opcode doesn't specify (as in override) some instance
6672     parameters, then try to use them from the export information, if
6673     that declares them.
6674
6675     """
6676     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6677
6678     if self.op.disk_template is None:
6679       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6680         self.op.disk_template = einfo.get(constants.INISECT_INS,
6681                                           "disk_template")
6682       else:
6683         raise errors.OpPrereqError("No disk template specified and the export"
6684                                    " is missing the disk_template information",
6685                                    errors.ECODE_INVAL)
6686
6687     if not self.op.disks:
6688       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6689         disks = []
6690         # TODO: import the disk iv_name too
6691         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6692           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6693           disks.append({"size": disk_sz})
6694         self.op.disks = disks
6695       else:
6696         raise errors.OpPrereqError("No disk info specified and the export"
6697                                    " is missing the disk information",
6698                                    errors.ECODE_INVAL)
6699
6700     if (not self.op.nics and
6701         einfo.has_option(constants.INISECT_INS, "nic_count")):
6702       nics = []
6703       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6704         ndict = {}
6705         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6706           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6707           ndict[name] = v
6708         nics.append(ndict)
6709       self.op.nics = nics
6710
6711     if (self.op.hypervisor is None and
6712         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6713       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6714     if einfo.has_section(constants.INISECT_HYP):
6715       # use the export parameters but do not override the ones
6716       # specified by the user
6717       for name, value in einfo.items(constants.INISECT_HYP):
6718         if name not in self.op.hvparams:
6719           self.op.hvparams[name] = value
6720
6721     if einfo.has_section(constants.INISECT_BEP):
6722       # use the parameters, without overriding
6723       for name, value in einfo.items(constants.INISECT_BEP):
6724         if name not in self.op.beparams:
6725           self.op.beparams[name] = value
6726     else:
6727       # try to read the parameters old style, from the main section
6728       for name in constants.BES_PARAMETERS:
6729         if (name not in self.op.beparams and
6730             einfo.has_option(constants.INISECT_INS, name)):
6731           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6732
6733     if einfo.has_section(constants.INISECT_OSP):
6734       # use the parameters, without overriding
6735       for name, value in einfo.items(constants.INISECT_OSP):
6736         if name not in self.op.osparams:
6737           self.op.osparams[name] = value
6738
6739   def _RevertToDefaults(self, cluster):
6740     """Revert the instance parameters to the default values.
6741
6742     """
6743     # hvparams
6744     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6745     for name in self.op.hvparams.keys():
6746       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6747         del self.op.hvparams[name]
6748     # beparams
6749     be_defs = cluster.SimpleFillBE({})
6750     for name in self.op.beparams.keys():
6751       if name in be_defs and be_defs[name] == self.op.beparams[name]:
6752         del self.op.beparams[name]
6753     # nic params
6754     nic_defs = cluster.SimpleFillNIC({})
6755     for nic in self.op.nics:
6756       for name in constants.NICS_PARAMETERS:
6757         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6758           del nic[name]
6759     # osparams
6760     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6761     for name in self.op.osparams.keys():
6762       if name in os_defs and os_defs[name] == self.op.osparams[name]:
6763         del self.op.osparams[name]
6764
6765   def CheckPrereq(self):
6766     """Check prerequisites.
6767
6768     """
6769     if self.op.mode == constants.INSTANCE_IMPORT:
6770       export_info = self._ReadExportInfo()
6771       self._ReadExportParams(export_info)
6772
6773     _CheckDiskTemplate(self.op.disk_template)
6774
6775     if (not self.cfg.GetVGName() and
6776         self.op.disk_template not in constants.DTS_NOT_LVM):
6777       raise errors.OpPrereqError("Cluster does not support lvm-based"
6778                                  " instances", errors.ECODE_STATE)
6779
6780     if self.op.hypervisor is None:
6781       self.op.hypervisor = self.cfg.GetHypervisorType()
6782
6783     cluster = self.cfg.GetClusterInfo()
6784     enabled_hvs = cluster.enabled_hypervisors
6785     if self.op.hypervisor not in enabled_hvs:
6786       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6787                                  " cluster (%s)" % (self.op.hypervisor,
6788                                   ",".join(enabled_hvs)),
6789                                  errors.ECODE_STATE)
6790
6791     # check hypervisor parameter syntax (locally)
6792     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6793     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6794                                       self.op.hvparams)
6795     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6796     hv_type.CheckParameterSyntax(filled_hvp)
6797     self.hv_full = filled_hvp
6798     # check that we don't specify global parameters on an instance
6799     _CheckGlobalHvParams(self.op.hvparams)
6800
6801     # fill and remember the beparams dict
6802     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6803     self.be_full = cluster.SimpleFillBE(self.op.beparams)
6804
6805     # build os parameters
6806     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6807
6808     # now that hvp/bep are in final format, let's reset to defaults,
6809     # if told to do so
6810     if self.op.identify_defaults:
6811       self._RevertToDefaults(cluster)
6812
6813     # NIC buildup
6814     self.nics = []
6815     for idx, nic in enumerate(self.op.nics):
6816       nic_mode_req = nic.get("mode", None)
6817       nic_mode = nic_mode_req
6818       if nic_mode is None:
6819         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6820
6821       # in routed mode, for the first nic, the default ip is 'auto'
6822       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6823         default_ip_mode = constants.VALUE_AUTO
6824       else:
6825         default_ip_mode = constants.VALUE_NONE
6826
6827       # ip validity checks
6828       ip = nic.get("ip", default_ip_mode)
6829       if ip is None or ip.lower() == constants.VALUE_NONE:
6830         nic_ip = None
6831       elif ip.lower() == constants.VALUE_AUTO:
6832         if not self.op.name_check:
6833           raise errors.OpPrereqError("IP address set to auto but name checks"
6834                                      " have been skipped. Aborting.",
6835                                      errors.ECODE_INVAL)
6836         nic_ip = self.hostname1.ip
6837       else:
6838         if not utils.IsValidIP(ip):
6839           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6840                                      " like a valid IP" % ip,
6841                                      errors.ECODE_INVAL)
6842         nic_ip = ip
6843
6844       # TODO: check the ip address for uniqueness
6845       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6846         raise errors.OpPrereqError("Routed nic mode requires an ip address",
6847                                    errors.ECODE_INVAL)
6848
6849       # MAC address verification
6850       mac = nic.get("mac", constants.VALUE_AUTO)
6851       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6852         mac = utils.NormalizeAndValidateMac(mac)
6853
6854         try:
6855           self.cfg.ReserveMAC(mac, self.proc.GetECId())
6856         except errors.ReservationError:
6857           raise errors.OpPrereqError("MAC address %s already in use"
6858                                      " in cluster" % mac,
6859                                      errors.ECODE_NOTUNIQUE)
6860
6861       # bridge verification
6862       bridge = nic.get("bridge", None)
6863       link = nic.get("link", None)
6864       if bridge and link:
6865         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6866                                    " at the same time", errors.ECODE_INVAL)
6867       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6868         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6869                                    errors.ECODE_INVAL)
6870       elif bridge:
6871         link = bridge
6872
6873       nicparams = {}
6874       if nic_mode_req:
6875         nicparams[constants.NIC_MODE] = nic_mode_req
6876       if link:
6877         nicparams[constants.NIC_LINK] = link
6878
6879       check_params = cluster.SimpleFillNIC(nicparams)
6880       objects.NIC.CheckParameterSyntax(check_params)
6881       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6882
6883     # disk checks/pre-build
6884     self.disks = []
6885     for disk in self.op.disks:
6886       mode = disk.get("mode", constants.DISK_RDWR)
6887       if mode not in constants.DISK_ACCESS_SET:
6888         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6889                                    mode, errors.ECODE_INVAL)
6890       size = disk.get("size", None)
6891       if size is None:
6892         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6893       try:
6894         size = int(size)
6895       except (TypeError, ValueError):
6896         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6897                                    errors.ECODE_INVAL)
6898       new_disk = {"size": size, "mode": mode}
6899       if "adopt" in disk:
6900         new_disk["adopt"] = disk["adopt"]
6901       self.disks.append(new_disk)
6902
6903     if self.op.mode == constants.INSTANCE_IMPORT:
6904
6905       # Check that the new instance doesn't have less disks than the export
6906       instance_disks = len(self.disks)
6907       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6908       if instance_disks < export_disks:
6909         raise errors.OpPrereqError("Not enough disks to import."
6910                                    " (instance: %d, export: %d)" %
6911                                    (instance_disks, export_disks),
6912                                    errors.ECODE_INVAL)
6913
6914       disk_images = []
6915       for idx in range(export_disks):
6916         option = 'disk%d_dump' % idx
6917         if export_info.has_option(constants.INISECT_INS, option):
6918           # FIXME: are the old os-es, disk sizes, etc. useful?
6919           export_name = export_info.get(constants.INISECT_INS, option)
6920           image = utils.PathJoin(self.op.src_path, export_name)
6921           disk_images.append(image)
6922         else:
6923           disk_images.append(False)
6924
6925       self.src_images = disk_images
6926
6927       old_name = export_info.get(constants.INISECT_INS, 'name')
6928       try:
6929         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6930       except (TypeError, ValueError), err:
6931         raise errors.OpPrereqError("Invalid export file, nic_count is not"
6932                                    " an integer: %s" % str(err),
6933                                    errors.ECODE_STATE)
6934       if self.op.instance_name == old_name:
6935         for idx, nic in enumerate(self.nics):
6936           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6937             nic_mac_ini = 'nic%d_mac' % idx
6938             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6939
6940     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6941
6942     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6943     if self.op.ip_check:
6944       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6945         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6946                                    (self.check_ip, self.op.instance_name),
6947                                    errors.ECODE_NOTUNIQUE)
6948
6949     #### mac address generation
6950     # By generating here the mac address both the allocator and the hooks get
6951     # the real final mac address rather than the 'auto' or 'generate' value.
6952     # There is a race condition between the generation and the instance object
6953     # creation, which means that we know the mac is valid now, but we're not
6954     # sure it will be when we actually add the instance. If things go bad
6955     # adding the instance will abort because of a duplicate mac, and the
6956     # creation job will fail.
6957     for nic in self.nics:
6958       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6959         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6960
6961     #### allocator run
6962
6963     if self.op.iallocator is not None:
6964       self._RunAllocator()
6965
6966     #### node related checks
6967
6968     # check primary node
6969     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6970     assert self.pnode is not None, \
6971       "Cannot retrieve locked node %s" % self.op.pnode
6972     if pnode.offline:
6973       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6974                                  pnode.name, errors.ECODE_STATE)
6975     if pnode.drained:
6976       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6977                                  pnode.name, errors.ECODE_STATE)
6978
6979     self.secondaries = []
6980
6981     # mirror node verification
6982     if self.op.disk_template in constants.DTS_NET_MIRROR:
6983       if self.op.snode is None:
6984         raise errors.OpPrereqError("The networked disk templates need"
6985                                    " a mirror node", errors.ECODE_INVAL)
6986       if self.op.snode == pnode.name:
6987         raise errors.OpPrereqError("The secondary node cannot be the"
6988                                    " primary node.", errors.ECODE_INVAL)
6989       _CheckNodeOnline(self, self.op.snode)
6990       _CheckNodeNotDrained(self, self.op.snode)
6991       self.secondaries.append(self.op.snode)
6992
6993     nodenames = [pnode.name] + self.secondaries
6994
6995     req_size = _ComputeDiskSize(self.op.disk_template,
6996                                 self.disks)
6997
6998     # Check lv size requirements, if not adopting
6999     if req_size is not None and not self.adopt_disks:
7000       _CheckNodesFreeDisk(self, nodenames, req_size)
7001
7002     if self.adopt_disks: # instead, we must check the adoption data
7003       all_lvs = set([i["adopt"] for i in self.disks])
7004       if len(all_lvs) != len(self.disks):
7005         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7006                                    errors.ECODE_INVAL)
7007       for lv_name in all_lvs:
7008         try:
7009           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7010         except errors.ReservationError:
7011           raise errors.OpPrereqError("LV named %s used by another instance" %
7012                                      lv_name, errors.ECODE_NOTUNIQUE)
7013
7014       node_lvs = self.rpc.call_lv_list([pnode.name],
7015                                        self.cfg.GetVGName())[pnode.name]
7016       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7017       node_lvs = node_lvs.payload
7018       delta = all_lvs.difference(node_lvs.keys())
7019       if delta:
7020         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7021                                    utils.CommaJoin(delta),
7022                                    errors.ECODE_INVAL)
7023       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7024       if online_lvs:
7025         raise errors.OpPrereqError("Online logical volumes found, cannot"
7026                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7027                                    errors.ECODE_STATE)
7028       # update the size of disk based on what is found
7029       for dsk in self.disks:
7030         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7031
7032     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7033
7034     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7035     # check OS parameters (remotely)
7036     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7037
7038     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7039
7040     # memory check on primary node
7041     if self.op.start:
7042       _CheckNodeFreeMemory(self, self.pnode.name,
7043                            "creating instance %s" % self.op.instance_name,
7044                            self.be_full[constants.BE_MEMORY],
7045                            self.op.hypervisor)
7046
7047     self.dry_run_result = list(nodenames)
7048
7049   def Exec(self, feedback_fn):
7050     """Create and add the instance to the cluster.
7051
7052     """
7053     instance = self.op.instance_name
7054     pnode_name = self.pnode.name
7055
7056     ht_kind = self.op.hypervisor
7057     if ht_kind in constants.HTS_REQ_PORT:
7058       network_port = self.cfg.AllocatePort()
7059     else:
7060       network_port = None
7061
7062     if constants.ENABLE_FILE_STORAGE:
7063       # this is needed because os.path.join does not accept None arguments
7064       if self.op.file_storage_dir is None:
7065         string_file_storage_dir = ""
7066       else:
7067         string_file_storage_dir = self.op.file_storage_dir
7068
7069       # build the full file storage dir path
7070       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7071                                         string_file_storage_dir, instance)
7072     else:
7073       file_storage_dir = ""
7074
7075     disks = _GenerateDiskTemplate(self,
7076                                   self.op.disk_template,
7077                                   instance, pnode_name,
7078                                   self.secondaries,
7079                                   self.disks,
7080                                   file_storage_dir,
7081                                   self.op.file_driver,
7082                                   0)
7083
7084     iobj = objects.Instance(name=instance, os=self.op.os_type,
7085                             primary_node=pnode_name,
7086                             nics=self.nics, disks=disks,
7087                             disk_template=self.op.disk_template,
7088                             admin_up=False,
7089                             network_port=network_port,
7090                             beparams=self.op.beparams,
7091                             hvparams=self.op.hvparams,
7092                             hypervisor=self.op.hypervisor,
7093                             osparams=self.op.osparams,
7094                             )
7095
7096     if self.adopt_disks:
7097       # rename LVs to the newly-generated names; we need to construct
7098       # 'fake' LV disks with the old data, plus the new unique_id
7099       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7100       rename_to = []
7101       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7102         rename_to.append(t_dsk.logical_id)
7103         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7104         self.cfg.SetDiskID(t_dsk, pnode_name)
7105       result = self.rpc.call_blockdev_rename(pnode_name,
7106                                              zip(tmp_disks, rename_to))
7107       result.Raise("Failed to rename adoped LVs")
7108     else:
7109       feedback_fn("* creating instance disks...")
7110       try:
7111         _CreateDisks(self, iobj)
7112       except errors.OpExecError:
7113         self.LogWarning("Device creation failed, reverting...")
7114         try:
7115           _RemoveDisks(self, iobj)
7116         finally:
7117           self.cfg.ReleaseDRBDMinors(instance)
7118           raise
7119
7120     feedback_fn("adding instance %s to cluster config" % instance)
7121
7122     self.cfg.AddInstance(iobj, self.proc.GetECId())
7123
7124     # Declare that we don't want to remove the instance lock anymore, as we've
7125     # added the instance to the config
7126     del self.remove_locks[locking.LEVEL_INSTANCE]
7127     # Unlock all the nodes
7128     if self.op.mode == constants.INSTANCE_IMPORT:
7129       nodes_keep = [self.op.src_node]
7130       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7131                        if node != self.op.src_node]
7132       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7133       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7134     else:
7135       self.context.glm.release(locking.LEVEL_NODE)
7136       del self.acquired_locks[locking.LEVEL_NODE]
7137
7138     if self.op.wait_for_sync:
7139       disk_abort = not _WaitForSync(self, iobj)
7140     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7141       # make sure the disks are not degraded (still sync-ing is ok)
7142       time.sleep(15)
7143       feedback_fn("* checking mirrors status")
7144       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7145     else:
7146       disk_abort = False
7147
7148     if disk_abort:
7149       _RemoveDisks(self, iobj)
7150       self.cfg.RemoveInstance(iobj.name)
7151       # Make sure the instance lock gets removed
7152       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7153       raise errors.OpExecError("There are some degraded disks for"
7154                                " this instance")
7155
7156     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7157       if self.op.mode == constants.INSTANCE_CREATE:
7158         if not self.op.no_install:
7159           feedback_fn("* running the instance OS create scripts...")
7160           # FIXME: pass debug option from opcode to backend
7161           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7162                                                  self.op.debug_level)
7163           result.Raise("Could not add os for instance %s"
7164                        " on node %s" % (instance, pnode_name))
7165
7166       elif self.op.mode == constants.INSTANCE_IMPORT:
7167         feedback_fn("* running the instance OS import scripts...")
7168
7169         transfers = []
7170
7171         for idx, image in enumerate(self.src_images):
7172           if not image:
7173             continue
7174
7175           # FIXME: pass debug option from opcode to backend
7176           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7177                                              constants.IEIO_FILE, (image, ),
7178                                              constants.IEIO_SCRIPT,
7179                                              (iobj.disks[idx], idx),
7180                                              None)
7181           transfers.append(dt)
7182
7183         import_result = \
7184           masterd.instance.TransferInstanceData(self, feedback_fn,
7185                                                 self.op.src_node, pnode_name,
7186                                                 self.pnode.secondary_ip,
7187                                                 iobj, transfers)
7188         if not compat.all(import_result):
7189           self.LogWarning("Some disks for instance %s on node %s were not"
7190                           " imported successfully" % (instance, pnode_name))
7191
7192       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7193         feedback_fn("* preparing remote import...")
7194         connect_timeout = constants.RIE_CONNECT_TIMEOUT
7195         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7196
7197         disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7198                                                      self.source_x509_ca,
7199                                                      self._cds, timeouts)
7200         if not compat.all(disk_results):
7201           # TODO: Should the instance still be started, even if some disks
7202           # failed to import (valid for local imports, too)?
7203           self.LogWarning("Some disks for instance %s on node %s were not"
7204                           " imported successfully" % (instance, pnode_name))
7205
7206         # Run rename script on newly imported instance
7207         assert iobj.name == instance
7208         feedback_fn("Running rename script for %s" % instance)
7209         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7210                                                    self.source_instance_name,
7211                                                    self.op.debug_level)
7212         if result.fail_msg:
7213           self.LogWarning("Failed to run rename script for %s on node"
7214                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7215
7216       else:
7217         # also checked in the prereq part
7218         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7219                                      % self.op.mode)
7220
7221     if self.op.start:
7222       iobj.admin_up = True
7223       self.cfg.Update(iobj, feedback_fn)
7224       logging.info("Starting instance %s on node %s", instance, pnode_name)
7225       feedback_fn("* starting instance...")
7226       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7227       result.Raise("Could not start instance")
7228
7229     return list(iobj.all_nodes)
7230
7231
7232 class LUConnectConsole(NoHooksLU):
7233   """Connect to an instance's console.
7234
7235   This is somewhat special in that it returns the command line that
7236   you need to run on the master node in order to connect to the
7237   console.
7238
7239   """
7240   _OP_REQP = [("instance_name", _TNonEmptyString)]
7241   REQ_BGL = False
7242
7243   def ExpandNames(self):
7244     self._ExpandAndLockInstance()
7245
7246   def CheckPrereq(self):
7247     """Check prerequisites.
7248
7249     This checks that the instance is in the cluster.
7250
7251     """
7252     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7253     assert self.instance is not None, \
7254       "Cannot retrieve locked instance %s" % self.op.instance_name
7255     _CheckNodeOnline(self, self.instance.primary_node)
7256
7257   def Exec(self, feedback_fn):
7258     """Connect to the console of an instance
7259
7260     """
7261     instance = self.instance
7262     node = instance.primary_node
7263
7264     node_insts = self.rpc.call_instance_list([node],
7265                                              [instance.hypervisor])[node]
7266     node_insts.Raise("Can't get node information from %s" % node)
7267
7268     if instance.name not in node_insts.payload:
7269       raise errors.OpExecError("Instance %s is not running." % instance.name)
7270
7271     logging.debug("Connecting to console of %s on %s", instance.name, node)
7272
7273     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7274     cluster = self.cfg.GetClusterInfo()
7275     # beparams and hvparams are passed separately, to avoid editing the
7276     # instance and then saving the defaults in the instance itself.
7277     hvparams = cluster.FillHV(instance)
7278     beparams = cluster.FillBE(instance)
7279     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7280
7281     # build ssh cmdline
7282     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7283
7284
7285 class LUReplaceDisks(LogicalUnit):
7286   """Replace the disks of an instance.
7287
7288   """
7289   HPATH = "mirrors-replace"
7290   HTYPE = constants.HTYPE_INSTANCE
7291   _OP_REQP = [
7292     ("instance_name", _TNonEmptyString),
7293     ("mode", _TElemOf(constants.REPLACE_MODES)),
7294     ("disks", _TListOf(_TPositiveInt)),
7295     ]
7296   _OP_DEFS = [
7297     ("remote_node", None),
7298     ("iallocator", None),
7299     ("early_release", None),
7300     ]
7301   REQ_BGL = False
7302
7303   def CheckArguments(self):
7304     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7305                                   self.op.iallocator)
7306
7307   def ExpandNames(self):
7308     self._ExpandAndLockInstance()
7309
7310     if self.op.iallocator is not None:
7311       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7312
7313     elif self.op.remote_node is not None:
7314       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7315       self.op.remote_node = remote_node
7316
7317       # Warning: do not remove the locking of the new secondary here
7318       # unless DRBD8.AddChildren is changed to work in parallel;
7319       # currently it doesn't since parallel invocations of
7320       # FindUnusedMinor will conflict
7321       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7322       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7323
7324     else:
7325       self.needed_locks[locking.LEVEL_NODE] = []
7326       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7327
7328     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7329                                    self.op.iallocator, self.op.remote_node,
7330                                    self.op.disks, False, self.op.early_release)
7331
7332     self.tasklets = [self.replacer]
7333
7334   def DeclareLocks(self, level):
7335     # If we're not already locking all nodes in the set we have to declare the
7336     # instance's primary/secondary nodes.
7337     if (level == locking.LEVEL_NODE and
7338         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7339       self._LockInstancesNodes()
7340
7341   def BuildHooksEnv(self):
7342     """Build hooks env.
7343
7344     This runs on the master, the primary and all the secondaries.
7345
7346     """
7347     instance = self.replacer.instance
7348     env = {
7349       "MODE": self.op.mode,
7350       "NEW_SECONDARY": self.op.remote_node,
7351       "OLD_SECONDARY": instance.secondary_nodes[0],
7352       }
7353     env.update(_BuildInstanceHookEnvByObject(self, instance))
7354     nl = [
7355       self.cfg.GetMasterNode(),
7356       instance.primary_node,
7357       ]
7358     if self.op.remote_node is not None:
7359       nl.append(self.op.remote_node)
7360     return env, nl, nl
7361
7362
7363 class TLReplaceDisks(Tasklet):
7364   """Replaces disks for an instance.
7365
7366   Note: Locking is not within the scope of this class.
7367
7368   """
7369   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7370                disks, delay_iallocator, early_release):
7371     """Initializes this class.
7372
7373     """
7374     Tasklet.__init__(self, lu)
7375
7376     # Parameters
7377     self.instance_name = instance_name
7378     self.mode = mode
7379     self.iallocator_name = iallocator_name
7380     self.remote_node = remote_node
7381     self.disks = disks
7382     self.delay_iallocator = delay_iallocator
7383     self.early_release = early_release
7384
7385     # Runtime data
7386     self.instance = None
7387     self.new_node = None
7388     self.target_node = None
7389     self.other_node = None
7390     self.remote_node_info = None
7391     self.node_secondary_ip = None
7392
7393   @staticmethod
7394   def CheckArguments(mode, remote_node, iallocator):
7395     """Helper function for users of this class.
7396
7397     """
7398     # check for valid parameter combination
7399     if mode == constants.REPLACE_DISK_CHG:
7400       if remote_node is None and iallocator is None:
7401         raise errors.OpPrereqError("When changing the secondary either an"
7402                                    " iallocator script must be used or the"
7403                                    " new node given", errors.ECODE_INVAL)
7404
7405       if remote_node is not None and iallocator is not None:
7406         raise errors.OpPrereqError("Give either the iallocator or the new"
7407                                    " secondary, not both", errors.ECODE_INVAL)
7408
7409     elif remote_node is not None or iallocator is not None:
7410       # Not replacing the secondary
7411       raise errors.OpPrereqError("The iallocator and new node options can"
7412                                  " only be used when changing the"
7413                                  " secondary node", errors.ECODE_INVAL)
7414
7415   @staticmethod
7416   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7417     """Compute a new secondary node using an IAllocator.
7418
7419     """
7420     ial = IAllocator(lu.cfg, lu.rpc,
7421                      mode=constants.IALLOCATOR_MODE_RELOC,
7422                      name=instance_name,
7423                      relocate_from=relocate_from)
7424
7425     ial.Run(iallocator_name)
7426
7427     if not ial.success:
7428       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7429                                  " %s" % (iallocator_name, ial.info),
7430                                  errors.ECODE_NORES)
7431
7432     if len(ial.result) != ial.required_nodes:
7433       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7434                                  " of nodes (%s), required %s" %
7435                                  (iallocator_name,
7436                                   len(ial.result), ial.required_nodes),
7437                                  errors.ECODE_FAULT)
7438
7439     remote_node_name = ial.result[0]
7440
7441     lu.LogInfo("Selected new secondary for instance '%s': %s",
7442                instance_name, remote_node_name)
7443
7444     return remote_node_name
7445
7446   def _FindFaultyDisks(self, node_name):
7447     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7448                                     node_name, True)
7449
7450   def CheckPrereq(self):
7451     """Check prerequisites.
7452
7453     This checks that the instance is in the cluster.
7454
7455     """
7456     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7457     assert instance is not None, \
7458       "Cannot retrieve locked instance %s" % self.instance_name
7459
7460     if instance.disk_template != constants.DT_DRBD8:
7461       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7462                                  " instances", errors.ECODE_INVAL)
7463
7464     if len(instance.secondary_nodes) != 1:
7465       raise errors.OpPrereqError("The instance has a strange layout,"
7466                                  " expected one secondary but found %d" %
7467                                  len(instance.secondary_nodes),
7468                                  errors.ECODE_FAULT)
7469
7470     if not self.delay_iallocator:
7471       self._CheckPrereq2()
7472
7473   def _CheckPrereq2(self):
7474     """Check prerequisites, second part.
7475
7476     This function should always be part of CheckPrereq. It was separated and is
7477     now called from Exec because during node evacuation iallocator was only
7478     called with an unmodified cluster model, not taking planned changes into
7479     account.
7480
7481     """
7482     instance = self.instance
7483     secondary_node = instance.secondary_nodes[0]
7484
7485     if self.iallocator_name is None:
7486       remote_node = self.remote_node
7487     else:
7488       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7489                                        instance.name, instance.secondary_nodes)
7490
7491     if remote_node is not None:
7492       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7493       assert self.remote_node_info is not None, \
7494         "Cannot retrieve locked node %s" % remote_node
7495     else:
7496       self.remote_node_info = None
7497
7498     if remote_node == self.instance.primary_node:
7499       raise errors.OpPrereqError("The specified node is the primary node of"
7500                                  " the instance.", errors.ECODE_INVAL)
7501
7502     if remote_node == secondary_node:
7503       raise errors.OpPrereqError("The specified node is already the"
7504                                  " secondary node of the instance.",
7505                                  errors.ECODE_INVAL)
7506
7507     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7508                                     constants.REPLACE_DISK_CHG):
7509       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7510                                  errors.ECODE_INVAL)
7511
7512     if self.mode == constants.REPLACE_DISK_AUTO:
7513       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7514       faulty_secondary = self._FindFaultyDisks(secondary_node)
7515
7516       if faulty_primary and faulty_secondary:
7517         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7518                                    " one node and can not be repaired"
7519                                    " automatically" % self.instance_name,
7520                                    errors.ECODE_STATE)
7521
7522       if faulty_primary:
7523         self.disks = faulty_primary
7524         self.target_node = instance.primary_node
7525         self.other_node = secondary_node
7526         check_nodes = [self.target_node, self.other_node]
7527       elif faulty_secondary:
7528         self.disks = faulty_secondary
7529         self.target_node = secondary_node
7530         self.other_node = instance.primary_node
7531         check_nodes = [self.target_node, self.other_node]
7532       else:
7533         self.disks = []
7534         check_nodes = []
7535
7536     else:
7537       # Non-automatic modes
7538       if self.mode == constants.REPLACE_DISK_PRI:
7539         self.target_node = instance.primary_node
7540         self.other_node = secondary_node
7541         check_nodes = [self.target_node, self.other_node]
7542
7543       elif self.mode == constants.REPLACE_DISK_SEC:
7544         self.target_node = secondary_node
7545         self.other_node = instance.primary_node
7546         check_nodes = [self.target_node, self.other_node]
7547
7548       elif self.mode == constants.REPLACE_DISK_CHG:
7549         self.new_node = remote_node
7550         self.other_node = instance.primary_node
7551         self.target_node = secondary_node
7552         check_nodes = [self.new_node, self.other_node]
7553
7554         _CheckNodeNotDrained(self.lu, remote_node)
7555
7556         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7557         assert old_node_info is not None
7558         if old_node_info.offline and not self.early_release:
7559           # doesn't make sense to delay the release
7560           self.early_release = True
7561           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7562                           " early-release mode", secondary_node)
7563
7564       else:
7565         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7566                                      self.mode)
7567
7568       # If not specified all disks should be replaced
7569       if not self.disks:
7570         self.disks = range(len(self.instance.disks))
7571
7572     for node in check_nodes:
7573       _CheckNodeOnline(self.lu, node)
7574
7575     # Check whether disks are valid
7576     for disk_idx in self.disks:
7577       instance.FindDisk(disk_idx)
7578
7579     # Get secondary node IP addresses
7580     node_2nd_ip = {}
7581
7582     for node_name in [self.target_node, self.other_node, self.new_node]:
7583       if node_name is not None:
7584         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7585
7586     self.node_secondary_ip = node_2nd_ip
7587
7588   def Exec(self, feedback_fn):
7589     """Execute disk replacement.
7590
7591     This dispatches the disk replacement to the appropriate handler.
7592
7593     """
7594     if self.delay_iallocator:
7595       self._CheckPrereq2()
7596
7597     if not self.disks:
7598       feedback_fn("No disks need replacement")
7599       return
7600
7601     feedback_fn("Replacing disk(s) %s for %s" %
7602                 (utils.CommaJoin(self.disks), self.instance.name))
7603
7604     activate_disks = (not self.instance.admin_up)
7605
7606     # Activate the instance disks if we're replacing them on a down instance
7607     if activate_disks:
7608       _StartInstanceDisks(self.lu, self.instance, True)
7609
7610     try:
7611       # Should we replace the secondary node?
7612       if self.new_node is not None:
7613         fn = self._ExecDrbd8Secondary
7614       else:
7615         fn = self._ExecDrbd8DiskOnly
7616
7617       return fn(feedback_fn)
7618
7619     finally:
7620       # Deactivate the instance disks if we're replacing them on a
7621       # down instance
7622       if activate_disks:
7623         _SafeShutdownInstanceDisks(self.lu, self.instance)
7624
7625   def _CheckVolumeGroup(self, nodes):
7626     self.lu.LogInfo("Checking volume groups")
7627
7628     vgname = self.cfg.GetVGName()
7629
7630     # Make sure volume group exists on all involved nodes
7631     results = self.rpc.call_vg_list(nodes)
7632     if not results:
7633       raise errors.OpExecError("Can't list volume groups on the nodes")
7634
7635     for node in nodes:
7636       res = results[node]
7637       res.Raise("Error checking node %s" % node)
7638       if vgname not in res.payload:
7639         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7640                                  (vgname, node))
7641
7642   def _CheckDisksExistence(self, nodes):
7643     # Check disk existence
7644     for idx, dev in enumerate(self.instance.disks):
7645       if idx not in self.disks:
7646         continue
7647
7648       for node in nodes:
7649         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7650         self.cfg.SetDiskID(dev, node)
7651
7652         result = self.rpc.call_blockdev_find(node, dev)
7653
7654         msg = result.fail_msg
7655         if msg or not result.payload:
7656           if not msg:
7657             msg = "disk not found"
7658           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7659                                    (idx, node, msg))
7660
7661   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7662     for idx, dev in enumerate(self.instance.disks):
7663       if idx not in self.disks:
7664         continue
7665
7666       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7667                       (idx, node_name))
7668
7669       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7670                                    ldisk=ldisk):
7671         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7672                                  " replace disks for instance %s" %
7673                                  (node_name, self.instance.name))
7674
7675   def _CreateNewStorage(self, node_name):
7676     vgname = self.cfg.GetVGName()
7677     iv_names = {}
7678
7679     for idx, dev in enumerate(self.instance.disks):
7680       if idx not in self.disks:
7681         continue
7682
7683       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7684
7685       self.cfg.SetDiskID(dev, node_name)
7686
7687       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7688       names = _GenerateUniqueNames(self.lu, lv_names)
7689
7690       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7691                              logical_id=(vgname, names[0]))
7692       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7693                              logical_id=(vgname, names[1]))
7694
7695       new_lvs = [lv_data, lv_meta]
7696       old_lvs = dev.children
7697       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7698
7699       # we pass force_create=True to force the LVM creation
7700       for new_lv in new_lvs:
7701         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7702                         _GetInstanceInfoText(self.instance), False)
7703
7704     return iv_names
7705
7706   def _CheckDevices(self, node_name, iv_names):
7707     for name, (dev, _, _) in iv_names.iteritems():
7708       self.cfg.SetDiskID(dev, node_name)
7709
7710       result = self.rpc.call_blockdev_find(node_name, dev)
7711
7712       msg = result.fail_msg
7713       if msg or not result.payload:
7714         if not msg:
7715           msg = "disk not found"
7716         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7717                                  (name, msg))
7718
7719       if result.payload.is_degraded:
7720         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7721
7722   def _RemoveOldStorage(self, node_name, iv_names):
7723     for name, (_, old_lvs, _) in iv_names.iteritems():
7724       self.lu.LogInfo("Remove logical volumes for %s" % name)
7725
7726       for lv in old_lvs:
7727         self.cfg.SetDiskID(lv, node_name)
7728
7729         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7730         if msg:
7731           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7732                              hint="remove unused LVs manually")
7733
7734   def _ReleaseNodeLock(self, node_name):
7735     """Releases the lock for a given node."""
7736     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7737
7738   def _ExecDrbd8DiskOnly(self, feedback_fn):
7739     """Replace a disk on the primary or secondary for DRBD 8.
7740
7741     The algorithm for replace is quite complicated:
7742
7743       1. for each disk to be replaced:
7744
7745         1. create new LVs on the target node with unique names
7746         1. detach old LVs from the drbd device
7747         1. rename old LVs to name_replaced.<time_t>
7748         1. rename new LVs to old LVs
7749         1. attach the new LVs (with the old names now) to the drbd device
7750
7751       1. wait for sync across all devices
7752
7753       1. for each modified disk:
7754
7755         1. remove old LVs (which have the name name_replaces.<time_t>)
7756
7757     Failures are not very well handled.
7758
7759     """
7760     steps_total = 6
7761
7762     # Step: check device activation
7763     self.lu.LogStep(1, steps_total, "Check device existence")
7764     self._CheckDisksExistence([self.other_node, self.target_node])
7765     self._CheckVolumeGroup([self.target_node, self.other_node])
7766
7767     # Step: check other node consistency
7768     self.lu.LogStep(2, steps_total, "Check peer consistency")
7769     self._CheckDisksConsistency(self.other_node,
7770                                 self.other_node == self.instance.primary_node,
7771                                 False)
7772
7773     # Step: create new storage
7774     self.lu.LogStep(3, steps_total, "Allocate new storage")
7775     iv_names = self._CreateNewStorage(self.target_node)
7776
7777     # Step: for each lv, detach+rename*2+attach
7778     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7779     for dev, old_lvs, new_lvs in iv_names.itervalues():
7780       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7781
7782       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7783                                                      old_lvs)
7784       result.Raise("Can't detach drbd from local storage on node"
7785                    " %s for device %s" % (self.target_node, dev.iv_name))
7786       #dev.children = []
7787       #cfg.Update(instance)
7788
7789       # ok, we created the new LVs, so now we know we have the needed
7790       # storage; as such, we proceed on the target node to rename
7791       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7792       # using the assumption that logical_id == physical_id (which in
7793       # turn is the unique_id on that node)
7794
7795       # FIXME(iustin): use a better name for the replaced LVs
7796       temp_suffix = int(time.time())
7797       ren_fn = lambda d, suff: (d.physical_id[0],
7798                                 d.physical_id[1] + "_replaced-%s" % suff)
7799
7800       # Build the rename list based on what LVs exist on the node
7801       rename_old_to_new = []
7802       for to_ren in old_lvs:
7803         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7804         if not result.fail_msg and result.payload:
7805           # device exists
7806           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7807
7808       self.lu.LogInfo("Renaming the old LVs on the target node")
7809       result = self.rpc.call_blockdev_rename(self.target_node,
7810                                              rename_old_to_new)
7811       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7812
7813       # Now we rename the new LVs to the old LVs
7814       self.lu.LogInfo("Renaming the new LVs on the target node")
7815       rename_new_to_old = [(new, old.physical_id)
7816                            for old, new in zip(old_lvs, new_lvs)]
7817       result = self.rpc.call_blockdev_rename(self.target_node,
7818                                              rename_new_to_old)
7819       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7820
7821       for old, new in zip(old_lvs, new_lvs):
7822         new.logical_id = old.logical_id
7823         self.cfg.SetDiskID(new, self.target_node)
7824
7825       for disk in old_lvs:
7826         disk.logical_id = ren_fn(disk, temp_suffix)
7827         self.cfg.SetDiskID(disk, self.target_node)
7828
7829       # Now that the new lvs have the old name, we can add them to the device
7830       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7831       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7832                                                   new_lvs)
7833       msg = result.fail_msg
7834       if msg:
7835         for new_lv in new_lvs:
7836           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7837                                                new_lv).fail_msg
7838           if msg2:
7839             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7840                                hint=("cleanup manually the unused logical"
7841                                      "volumes"))
7842         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7843
7844       dev.children = new_lvs
7845
7846       self.cfg.Update(self.instance, feedback_fn)
7847
7848     cstep = 5
7849     if self.early_release:
7850       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7851       cstep += 1
7852       self._RemoveOldStorage(self.target_node, iv_names)
7853       # WARNING: we release both node locks here, do not do other RPCs
7854       # than WaitForSync to the primary node
7855       self._ReleaseNodeLock([self.target_node, self.other_node])
7856
7857     # Wait for sync
7858     # This can fail as the old devices are degraded and _WaitForSync
7859     # does a combined result over all disks, so we don't check its return value
7860     self.lu.LogStep(cstep, steps_total, "Sync devices")
7861     cstep += 1
7862     _WaitForSync(self.lu, self.instance)
7863
7864     # Check all devices manually
7865     self._CheckDevices(self.instance.primary_node, iv_names)
7866
7867     # Step: remove old storage
7868     if not self.early_release:
7869       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7870       cstep += 1
7871       self._RemoveOldStorage(self.target_node, iv_names)
7872
7873   def _ExecDrbd8Secondary(self, feedback_fn):
7874     """Replace the secondary node for DRBD 8.
7875
7876     The algorithm for replace is quite complicated:
7877       - for all disks of the instance:
7878         - create new LVs on the new node with same names
7879         - shutdown the drbd device on the old secondary
7880         - disconnect the drbd network on the primary
7881         - create the drbd device on the new secondary
7882         - network attach the drbd on the primary, using an artifice:
7883           the drbd code for Attach() will connect to the network if it
7884           finds a device which is connected to the good local disks but
7885           not network enabled
7886       - wait for sync across all devices
7887       - remove all disks from the old secondary
7888
7889     Failures are not very well handled.
7890
7891     """
7892     steps_total = 6
7893
7894     # Step: check device activation
7895     self.lu.LogStep(1, steps_total, "Check device existence")
7896     self._CheckDisksExistence([self.instance.primary_node])
7897     self._CheckVolumeGroup([self.instance.primary_node])
7898
7899     # Step: check other node consistency
7900     self.lu.LogStep(2, steps_total, "Check peer consistency")
7901     self._CheckDisksConsistency(self.instance.primary_node, True, True)
7902
7903     # Step: create new storage
7904     self.lu.LogStep(3, steps_total, "Allocate new storage")
7905     for idx, dev in enumerate(self.instance.disks):
7906       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7907                       (self.new_node, idx))
7908       # we pass force_create=True to force LVM creation
7909       for new_lv in dev.children:
7910         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7911                         _GetInstanceInfoText(self.instance), False)
7912
7913     # Step 4: dbrd minors and drbd setups changes
7914     # after this, we must manually remove the drbd minors on both the
7915     # error and the success paths
7916     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7917     minors = self.cfg.AllocateDRBDMinor([self.new_node
7918                                          for dev in self.instance.disks],
7919                                         self.instance.name)
7920     logging.debug("Allocated minors %r", minors)
7921
7922     iv_names = {}
7923     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7924       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7925                       (self.new_node, idx))
7926       # create new devices on new_node; note that we create two IDs:
7927       # one without port, so the drbd will be activated without
7928       # networking information on the new node at this stage, and one
7929       # with network, for the latter activation in step 4
7930       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7931       if self.instance.primary_node == o_node1:
7932         p_minor = o_minor1
7933       else:
7934         assert self.instance.primary_node == o_node2, "Three-node instance?"
7935         p_minor = o_minor2
7936
7937       new_alone_id = (self.instance.primary_node, self.new_node, None,
7938                       p_minor, new_minor, o_secret)
7939       new_net_id = (self.instance.primary_node, self.new_node, o_port,
7940                     p_minor, new_minor, o_secret)
7941
7942       iv_names[idx] = (dev, dev.children, new_net_id)
7943       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7944                     new_net_id)
7945       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7946                               logical_id=new_alone_id,
7947                               children=dev.children,
7948                               size=dev.size)
7949       try:
7950         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7951                               _GetInstanceInfoText(self.instance), False)
7952       except errors.GenericError:
7953         self.cfg.ReleaseDRBDMinors(self.instance.name)
7954         raise
7955
7956     # We have new devices, shutdown the drbd on the old secondary
7957     for idx, dev in enumerate(self.instance.disks):
7958       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7959       self.cfg.SetDiskID(dev, self.target_node)
7960       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7961       if msg:
7962         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7963                            "node: %s" % (idx, msg),
7964                            hint=("Please cleanup this device manually as"
7965                                  " soon as possible"))
7966
7967     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7968     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7969                                                self.node_secondary_ip,
7970                                                self.instance.disks)\
7971                                               [self.instance.primary_node]
7972
7973     msg = result.fail_msg
7974     if msg:
7975       # detaches didn't succeed (unlikely)
7976       self.cfg.ReleaseDRBDMinors(self.instance.name)
7977       raise errors.OpExecError("Can't detach the disks from the network on"
7978                                " old node: %s" % (msg,))
7979
7980     # if we managed to detach at least one, we update all the disks of
7981     # the instance to point to the new secondary
7982     self.lu.LogInfo("Updating instance configuration")
7983     for dev, _, new_logical_id in iv_names.itervalues():
7984       dev.logical_id = new_logical_id
7985       self.cfg.SetDiskID(dev, self.instance.primary_node)
7986
7987     self.cfg.Update(self.instance, feedback_fn)
7988
7989     # and now perform the drbd attach
7990     self.lu.LogInfo("Attaching primary drbds to new secondary"
7991                     " (standalone => connected)")
7992     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7993                                             self.new_node],
7994                                            self.node_secondary_ip,
7995                                            self.instance.disks,
7996                                            self.instance.name,
7997                                            False)
7998     for to_node, to_result in result.items():
7999       msg = to_result.fail_msg
8000       if msg:
8001         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8002                            to_node, msg,
8003                            hint=("please do a gnt-instance info to see the"
8004                                  " status of disks"))
8005     cstep = 5
8006     if self.early_release:
8007       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8008       cstep += 1
8009       self._RemoveOldStorage(self.target_node, iv_names)
8010       # WARNING: we release all node locks here, do not do other RPCs
8011       # than WaitForSync to the primary node
8012       self._ReleaseNodeLock([self.instance.primary_node,
8013                              self.target_node,
8014                              self.new_node])
8015
8016     # Wait for sync
8017     # This can fail as the old devices are degraded and _WaitForSync
8018     # does a combined result over all disks, so we don't check its return value
8019     self.lu.LogStep(cstep, steps_total, "Sync devices")
8020     cstep += 1
8021     _WaitForSync(self.lu, self.instance)
8022
8023     # Check all devices manually
8024     self._CheckDevices(self.instance.primary_node, iv_names)
8025
8026     # Step: remove old storage
8027     if not self.early_release:
8028       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8029       self._RemoveOldStorage(self.target_node, iv_names)
8030
8031
8032 class LURepairNodeStorage(NoHooksLU):
8033   """Repairs the volume group on a node.
8034
8035   """
8036   _OP_REQP = [("node_name", _TNonEmptyString)]
8037   REQ_BGL = False
8038
8039   def CheckArguments(self):
8040     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8041
8042     _CheckStorageType(self.op.storage_type)
8043
8044     storage_type = self.op.storage_type
8045
8046     if (constants.SO_FIX_CONSISTENCY not in
8047         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8048       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8049                                  " repaired" % storage_type,
8050                                  errors.ECODE_INVAL)
8051
8052   def ExpandNames(self):
8053     self.needed_locks = {
8054       locking.LEVEL_NODE: [self.op.node_name],
8055       }
8056
8057   def _CheckFaultyDisks(self, instance, node_name):
8058     """Ensure faulty disks abort the opcode or at least warn."""
8059     try:
8060       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8061                                   node_name, True):
8062         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8063                                    " node '%s'" % (instance.name, node_name),
8064                                    errors.ECODE_STATE)
8065     except errors.OpPrereqError, err:
8066       if self.op.ignore_consistency:
8067         self.proc.LogWarning(str(err.args[0]))
8068       else:
8069         raise
8070
8071   def CheckPrereq(self):
8072     """Check prerequisites.
8073
8074     """
8075     # Check whether any instance on this node has faulty disks
8076     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8077       if not inst.admin_up:
8078         continue
8079       check_nodes = set(inst.all_nodes)
8080       check_nodes.discard(self.op.node_name)
8081       for inst_node_name in check_nodes:
8082         self._CheckFaultyDisks(inst, inst_node_name)
8083
8084   def Exec(self, feedback_fn):
8085     feedback_fn("Repairing storage unit '%s' on %s ..." %
8086                 (self.op.name, self.op.node_name))
8087
8088     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8089     result = self.rpc.call_storage_execute(self.op.node_name,
8090                                            self.op.storage_type, st_args,
8091                                            self.op.name,
8092                                            constants.SO_FIX_CONSISTENCY)
8093     result.Raise("Failed to repair storage unit '%s' on %s" %
8094                  (self.op.name, self.op.node_name))
8095
8096
8097 class LUNodeEvacuationStrategy(NoHooksLU):
8098   """Computes the node evacuation strategy.
8099
8100   """
8101   _OP_REQP = [("nodes", _TListOf(_TNonEmptyString))]
8102   _OP_DEFS = [
8103     ("remote_node", None),
8104     ("iallocator", None),
8105     ]
8106   REQ_BGL = False
8107
8108   def CheckArguments(self):
8109     if self.op.remote_node is not None and self.op.iallocator is not None:
8110       raise errors.OpPrereqError("Give either the iallocator or the new"
8111                                  " secondary, not both", errors.ECODE_INVAL)
8112
8113   def ExpandNames(self):
8114     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8115     self.needed_locks = locks = {}
8116     if self.op.remote_node is None:
8117       locks[locking.LEVEL_NODE] = locking.ALL_SET
8118     else:
8119       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8120       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8121
8122   def Exec(self, feedback_fn):
8123     if self.op.remote_node is not None:
8124       instances = []
8125       for node in self.op.nodes:
8126         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8127       result = []
8128       for i in instances:
8129         if i.primary_node == self.op.remote_node:
8130           raise errors.OpPrereqError("Node %s is the primary node of"
8131                                      " instance %s, cannot use it as"
8132                                      " secondary" %
8133                                      (self.op.remote_node, i.name),
8134                                      errors.ECODE_INVAL)
8135         result.append([i.name, self.op.remote_node])
8136     else:
8137       ial = IAllocator(self.cfg, self.rpc,
8138                        mode=constants.IALLOCATOR_MODE_MEVAC,
8139                        evac_nodes=self.op.nodes)
8140       ial.Run(self.op.iallocator, validate=True)
8141       if not ial.success:
8142         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8143                                  errors.ECODE_NORES)
8144       result = ial.result
8145     return result
8146
8147
8148 class LUGrowDisk(LogicalUnit):
8149   """Grow a disk of an instance.
8150
8151   """
8152   HPATH = "disk-grow"
8153   HTYPE = constants.HTYPE_INSTANCE
8154   _OP_REQP = [
8155     ("instance_name", _TNonEmptyString),
8156     ("disk", _TInt),
8157     ("amount", _TInt),
8158     ("wait_for_sync", _TBool),
8159     ]
8160   REQ_BGL = False
8161
8162   def ExpandNames(self):
8163     self._ExpandAndLockInstance()
8164     self.needed_locks[locking.LEVEL_NODE] = []
8165     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8166
8167   def DeclareLocks(self, level):
8168     if level == locking.LEVEL_NODE:
8169       self._LockInstancesNodes()
8170
8171   def BuildHooksEnv(self):
8172     """Build hooks env.
8173
8174     This runs on the master, the primary and all the secondaries.
8175
8176     """
8177     env = {
8178       "DISK": self.op.disk,
8179       "AMOUNT": self.op.amount,
8180       }
8181     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8182     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8183     return env, nl, nl
8184
8185   def CheckPrereq(self):
8186     """Check prerequisites.
8187
8188     This checks that the instance is in the cluster.
8189
8190     """
8191     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8192     assert instance is not None, \
8193       "Cannot retrieve locked instance %s" % self.op.instance_name
8194     nodenames = list(instance.all_nodes)
8195     for node in nodenames:
8196       _CheckNodeOnline(self, node)
8197
8198     self.instance = instance
8199
8200     if instance.disk_template not in constants.DTS_GROWABLE:
8201       raise errors.OpPrereqError("Instance's disk layout does not support"
8202                                  " growing.", errors.ECODE_INVAL)
8203
8204     self.disk = instance.FindDisk(self.op.disk)
8205
8206     if instance.disk_template != constants.DT_FILE:
8207       # TODO: check the free disk space for file, when that feature will be
8208       # supported
8209       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8210
8211   def Exec(self, feedback_fn):
8212     """Execute disk grow.
8213
8214     """
8215     instance = self.instance
8216     disk = self.disk
8217
8218     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8219     if not disks_ok:
8220       raise errors.OpExecError("Cannot activate block device to grow")
8221
8222     for node in instance.all_nodes:
8223       self.cfg.SetDiskID(disk, node)
8224       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8225       result.Raise("Grow request failed to node %s" % node)
8226
8227       # TODO: Rewrite code to work properly
8228       # DRBD goes into sync mode for a short amount of time after executing the
8229       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8230       # calling "resize" in sync mode fails. Sleeping for a short amount of
8231       # time is a work-around.
8232       time.sleep(5)
8233
8234     disk.RecordGrow(self.op.amount)
8235     self.cfg.Update(instance, feedback_fn)
8236     if self.op.wait_for_sync:
8237       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8238       if disk_abort:
8239         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8240                              " status.\nPlease check the instance.")
8241       if not instance.admin_up:
8242         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8243     elif not instance.admin_up:
8244       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8245                            " not supposed to be running because no wait for"
8246                            " sync mode was requested.")
8247
8248
8249 class LUQueryInstanceData(NoHooksLU):
8250   """Query runtime instance data.
8251
8252   """
8253   _OP_REQP = [
8254     ("instances", _TListOf(_TNonEmptyString)),
8255     ("static", _TBool),
8256     ]
8257   REQ_BGL = False
8258
8259   def ExpandNames(self):
8260     self.needed_locks = {}
8261     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8262
8263     if self.op.instances:
8264       self.wanted_names = []
8265       for name in self.op.instances:
8266         full_name = _ExpandInstanceName(self.cfg, name)
8267         self.wanted_names.append(full_name)
8268       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8269     else:
8270       self.wanted_names = None
8271       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8272
8273     self.needed_locks[locking.LEVEL_NODE] = []
8274     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8275
8276   def DeclareLocks(self, level):
8277     if level == locking.LEVEL_NODE:
8278       self._LockInstancesNodes()
8279
8280   def CheckPrereq(self):
8281     """Check prerequisites.
8282
8283     This only checks the optional instance list against the existing names.
8284
8285     """
8286     if self.wanted_names is None:
8287       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8288
8289     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8290                              in self.wanted_names]
8291
8292   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8293     """Returns the status of a block device
8294
8295     """
8296     if self.op.static or not node:
8297       return None
8298
8299     self.cfg.SetDiskID(dev, node)
8300
8301     result = self.rpc.call_blockdev_find(node, dev)
8302     if result.offline:
8303       return None
8304
8305     result.Raise("Can't compute disk status for %s" % instance_name)
8306
8307     status = result.payload
8308     if status is None:
8309       return None
8310
8311     return (status.dev_path, status.major, status.minor,
8312             status.sync_percent, status.estimated_time,
8313             status.is_degraded, status.ldisk_status)
8314
8315   def _ComputeDiskStatus(self, instance, snode, dev):
8316     """Compute block device status.
8317
8318     """
8319     if dev.dev_type in constants.LDS_DRBD:
8320       # we change the snode then (otherwise we use the one passed in)
8321       if dev.logical_id[0] == instance.primary_node:
8322         snode = dev.logical_id[1]
8323       else:
8324         snode = dev.logical_id[0]
8325
8326     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8327                                               instance.name, dev)
8328     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8329
8330     if dev.children:
8331       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8332                       for child in dev.children]
8333     else:
8334       dev_children = []
8335
8336     data = {
8337       "iv_name": dev.iv_name,
8338       "dev_type": dev.dev_type,
8339       "logical_id": dev.logical_id,
8340       "physical_id": dev.physical_id,
8341       "pstatus": dev_pstatus,
8342       "sstatus": dev_sstatus,
8343       "children": dev_children,
8344       "mode": dev.mode,
8345       "size": dev.size,
8346       }
8347
8348     return data
8349
8350   def Exec(self, feedback_fn):
8351     """Gather and return data"""
8352     result = {}
8353
8354     cluster = self.cfg.GetClusterInfo()
8355
8356     for instance in self.wanted_instances:
8357       if not self.op.static:
8358         remote_info = self.rpc.call_instance_info(instance.primary_node,
8359                                                   instance.name,
8360                                                   instance.hypervisor)
8361         remote_info.Raise("Error checking node %s" % instance.primary_node)
8362         remote_info = remote_info.payload
8363         if remote_info and "state" in remote_info:
8364           remote_state = "up"
8365         else:
8366           remote_state = "down"
8367       else:
8368         remote_state = None
8369       if instance.admin_up:
8370         config_state = "up"
8371       else:
8372         config_state = "down"
8373
8374       disks = [self._ComputeDiskStatus(instance, None, device)
8375                for device in instance.disks]
8376
8377       idict = {
8378         "name": instance.name,
8379         "config_state": config_state,
8380         "run_state": remote_state,
8381         "pnode": instance.primary_node,
8382         "snodes": instance.secondary_nodes,
8383         "os": instance.os,
8384         # this happens to be the same format used for hooks
8385         "nics": _NICListToTuple(self, instance.nics),
8386         "disk_template": instance.disk_template,
8387         "disks": disks,
8388         "hypervisor": instance.hypervisor,
8389         "network_port": instance.network_port,
8390         "hv_instance": instance.hvparams,
8391         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8392         "be_instance": instance.beparams,
8393         "be_actual": cluster.FillBE(instance),
8394         "os_instance": instance.osparams,
8395         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8396         "serial_no": instance.serial_no,
8397         "mtime": instance.mtime,
8398         "ctime": instance.ctime,
8399         "uuid": instance.uuid,
8400         }
8401
8402       result[instance.name] = idict
8403
8404     return result
8405
8406
8407 class LUSetInstanceParams(LogicalUnit):
8408   """Modifies an instances's parameters.
8409
8410   """
8411   HPATH = "instance-modify"
8412   HTYPE = constants.HTYPE_INSTANCE
8413   _OP_REQP = [("instance_name", _TNonEmptyString)]
8414   _OP_DEFS = [
8415     ("nics", _EmptyList),
8416     ("disks", _EmptyList),
8417     ("beparams", _EmptyDict),
8418     ("hvparams", _EmptyDict),
8419     ("disk_template", None),
8420     ("remote_node", None),
8421     ("os_name", None),
8422     ("force_variant", False),
8423     ("osparams", None),
8424     ("force", False),
8425     ]
8426   REQ_BGL = False
8427
8428   def CheckArguments(self):
8429     if not (self.op.nics or self.op.disks or self.op.disk_template or
8430             self.op.hvparams or self.op.beparams or self.op.os_name):
8431       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8432
8433     if self.op.hvparams:
8434       _CheckGlobalHvParams(self.op.hvparams)
8435
8436     # Disk validation
8437     disk_addremove = 0
8438     for disk_op, disk_dict in self.op.disks:
8439       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8440       if disk_op == constants.DDM_REMOVE:
8441         disk_addremove += 1
8442         continue
8443       elif disk_op == constants.DDM_ADD:
8444         disk_addremove += 1
8445       else:
8446         if not isinstance(disk_op, int):
8447           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8448         if not isinstance(disk_dict, dict):
8449           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8450           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8451
8452       if disk_op == constants.DDM_ADD:
8453         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8454         if mode not in constants.DISK_ACCESS_SET:
8455           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8456                                      errors.ECODE_INVAL)
8457         size = disk_dict.get('size', None)
8458         if size is None:
8459           raise errors.OpPrereqError("Required disk parameter size missing",
8460                                      errors.ECODE_INVAL)
8461         try:
8462           size = int(size)
8463         except (TypeError, ValueError), err:
8464           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8465                                      str(err), errors.ECODE_INVAL)
8466         disk_dict['size'] = size
8467       else:
8468         # modification of disk
8469         if 'size' in disk_dict:
8470           raise errors.OpPrereqError("Disk size change not possible, use"
8471                                      " grow-disk", errors.ECODE_INVAL)
8472
8473     if disk_addremove > 1:
8474       raise errors.OpPrereqError("Only one disk add or remove operation"
8475                                  " supported at a time", errors.ECODE_INVAL)
8476
8477     if self.op.disks and self.op.disk_template is not None:
8478       raise errors.OpPrereqError("Disk template conversion and other disk"
8479                                  " changes not supported at the same time",
8480                                  errors.ECODE_INVAL)
8481
8482     if self.op.disk_template:
8483       _CheckDiskTemplate(self.op.disk_template)
8484       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8485           self.op.remote_node is None):
8486         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8487                                    " one requires specifying a secondary node",
8488                                    errors.ECODE_INVAL)
8489
8490     # NIC validation
8491     nic_addremove = 0
8492     for nic_op, nic_dict in self.op.nics:
8493       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8494       if nic_op == constants.DDM_REMOVE:
8495         nic_addremove += 1
8496         continue
8497       elif nic_op == constants.DDM_ADD:
8498         nic_addremove += 1
8499       else:
8500         if not isinstance(nic_op, int):
8501           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8502         if not isinstance(nic_dict, dict):
8503           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8504           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8505
8506       # nic_dict should be a dict
8507       nic_ip = nic_dict.get('ip', None)
8508       if nic_ip is not None:
8509         if nic_ip.lower() == constants.VALUE_NONE:
8510           nic_dict['ip'] = None
8511         else:
8512           if not utils.IsValidIP(nic_ip):
8513             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8514                                        errors.ECODE_INVAL)
8515
8516       nic_bridge = nic_dict.get('bridge', None)
8517       nic_link = nic_dict.get('link', None)
8518       if nic_bridge and nic_link:
8519         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8520                                    " at the same time", errors.ECODE_INVAL)
8521       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8522         nic_dict['bridge'] = None
8523       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8524         nic_dict['link'] = None
8525
8526       if nic_op == constants.DDM_ADD:
8527         nic_mac = nic_dict.get('mac', None)
8528         if nic_mac is None:
8529           nic_dict['mac'] = constants.VALUE_AUTO
8530
8531       if 'mac' in nic_dict:
8532         nic_mac = nic_dict['mac']
8533         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8534           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8535
8536         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8537           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8538                                      " modifying an existing nic",
8539                                      errors.ECODE_INVAL)
8540
8541     if nic_addremove > 1:
8542       raise errors.OpPrereqError("Only one NIC add or remove operation"
8543                                  " supported at a time", errors.ECODE_INVAL)
8544
8545   def ExpandNames(self):
8546     self._ExpandAndLockInstance()
8547     self.needed_locks[locking.LEVEL_NODE] = []
8548     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8549
8550   def DeclareLocks(self, level):
8551     if level == locking.LEVEL_NODE:
8552       self._LockInstancesNodes()
8553       if self.op.disk_template and self.op.remote_node:
8554         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8555         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8556
8557   def BuildHooksEnv(self):
8558     """Build hooks env.
8559
8560     This runs on the master, primary and secondaries.
8561
8562     """
8563     args = dict()
8564     if constants.BE_MEMORY in self.be_new:
8565       args['memory'] = self.be_new[constants.BE_MEMORY]
8566     if constants.BE_VCPUS in self.be_new:
8567       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8568     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8569     # information at all.
8570     if self.op.nics:
8571       args['nics'] = []
8572       nic_override = dict(self.op.nics)
8573       for idx, nic in enumerate(self.instance.nics):
8574         if idx in nic_override:
8575           this_nic_override = nic_override[idx]
8576         else:
8577           this_nic_override = {}
8578         if 'ip' in this_nic_override:
8579           ip = this_nic_override['ip']
8580         else:
8581           ip = nic.ip
8582         if 'mac' in this_nic_override:
8583           mac = this_nic_override['mac']
8584         else:
8585           mac = nic.mac
8586         if idx in self.nic_pnew:
8587           nicparams = self.nic_pnew[idx]
8588         else:
8589           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8590         mode = nicparams[constants.NIC_MODE]
8591         link = nicparams[constants.NIC_LINK]
8592         args['nics'].append((ip, mac, mode, link))
8593       if constants.DDM_ADD in nic_override:
8594         ip = nic_override[constants.DDM_ADD].get('ip', None)
8595         mac = nic_override[constants.DDM_ADD]['mac']
8596         nicparams = self.nic_pnew[constants.DDM_ADD]
8597         mode = nicparams[constants.NIC_MODE]
8598         link = nicparams[constants.NIC_LINK]
8599         args['nics'].append((ip, mac, mode, link))
8600       elif constants.DDM_REMOVE in nic_override:
8601         del args['nics'][-1]
8602
8603     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8604     if self.op.disk_template:
8605       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8606     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8607     return env, nl, nl
8608
8609   def CheckPrereq(self):
8610     """Check prerequisites.
8611
8612     This only checks the instance list against the existing names.
8613
8614     """
8615     # checking the new params on the primary/secondary nodes
8616
8617     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8618     cluster = self.cluster = self.cfg.GetClusterInfo()
8619     assert self.instance is not None, \
8620       "Cannot retrieve locked instance %s" % self.op.instance_name
8621     pnode = instance.primary_node
8622     nodelist = list(instance.all_nodes)
8623
8624     # OS change
8625     if self.op.os_name and not self.op.force:
8626       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8627                       self.op.force_variant)
8628       instance_os = self.op.os_name
8629     else:
8630       instance_os = instance.os
8631
8632     if self.op.disk_template:
8633       if instance.disk_template == self.op.disk_template:
8634         raise errors.OpPrereqError("Instance already has disk template %s" %
8635                                    instance.disk_template, errors.ECODE_INVAL)
8636
8637       if (instance.disk_template,
8638           self.op.disk_template) not in self._DISK_CONVERSIONS:
8639         raise errors.OpPrereqError("Unsupported disk template conversion from"
8640                                    " %s to %s" % (instance.disk_template,
8641                                                   self.op.disk_template),
8642                                    errors.ECODE_INVAL)
8643       if self.op.disk_template in constants.DTS_NET_MIRROR:
8644         _CheckNodeOnline(self, self.op.remote_node)
8645         _CheckNodeNotDrained(self, self.op.remote_node)
8646         disks = [{"size": d.size} for d in instance.disks]
8647         required = _ComputeDiskSize(self.op.disk_template, disks)
8648         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8649         _CheckInstanceDown(self, instance, "cannot change disk template")
8650
8651     # hvparams processing
8652     if self.op.hvparams:
8653       hv_type = instance.hypervisor
8654       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8655       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8656       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8657
8658       # local check
8659       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8660       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8661       self.hv_new = hv_new # the new actual values
8662       self.hv_inst = i_hvdict # the new dict (without defaults)
8663     else:
8664       self.hv_new = self.hv_inst = {}
8665
8666     # beparams processing
8667     if self.op.beparams:
8668       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8669                                    use_none=True)
8670       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8671       be_new = cluster.SimpleFillBE(i_bedict)
8672       self.be_new = be_new # the new actual values
8673       self.be_inst = i_bedict # the new dict (without defaults)
8674     else:
8675       self.be_new = self.be_inst = {}
8676
8677     # osparams processing
8678     if self.op.osparams:
8679       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8680       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8681       self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8682       self.os_inst = i_osdict # the new dict (without defaults)
8683     else:
8684       self.os_new = self.os_inst = {}
8685
8686     self.warn = []
8687
8688     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8689       mem_check_list = [pnode]
8690       if be_new[constants.BE_AUTO_BALANCE]:
8691         # either we changed auto_balance to yes or it was from before
8692         mem_check_list.extend(instance.secondary_nodes)
8693       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8694                                                   instance.hypervisor)
8695       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8696                                          instance.hypervisor)
8697       pninfo = nodeinfo[pnode]
8698       msg = pninfo.fail_msg
8699       if msg:
8700         # Assume the primary node is unreachable and go ahead
8701         self.warn.append("Can't get info from primary node %s: %s" %
8702                          (pnode,  msg))
8703       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8704         self.warn.append("Node data from primary node %s doesn't contain"
8705                          " free memory information" % pnode)
8706       elif instance_info.fail_msg:
8707         self.warn.append("Can't get instance runtime information: %s" %
8708                         instance_info.fail_msg)
8709       else:
8710         if instance_info.payload:
8711           current_mem = int(instance_info.payload['memory'])
8712         else:
8713           # Assume instance not running
8714           # (there is a slight race condition here, but it's not very probable,
8715           # and we have no other way to check)
8716           current_mem = 0
8717         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8718                     pninfo.payload['memory_free'])
8719         if miss_mem > 0:
8720           raise errors.OpPrereqError("This change will prevent the instance"
8721                                      " from starting, due to %d MB of memory"
8722                                      " missing on its primary node" % miss_mem,
8723                                      errors.ECODE_NORES)
8724
8725       if be_new[constants.BE_AUTO_BALANCE]:
8726         for node, nres in nodeinfo.items():
8727           if node not in instance.secondary_nodes:
8728             continue
8729           msg = nres.fail_msg
8730           if msg:
8731             self.warn.append("Can't get info from secondary node %s: %s" %
8732                              (node, msg))
8733           elif not isinstance(nres.payload.get('memory_free', None), int):
8734             self.warn.append("Secondary node %s didn't return free"
8735                              " memory information" % node)
8736           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8737             self.warn.append("Not enough memory to failover instance to"
8738                              " secondary node %s" % node)
8739
8740     # NIC processing
8741     self.nic_pnew = {}
8742     self.nic_pinst = {}
8743     for nic_op, nic_dict in self.op.nics:
8744       if nic_op == constants.DDM_REMOVE:
8745         if not instance.nics:
8746           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8747                                      errors.ECODE_INVAL)
8748         continue
8749       if nic_op != constants.DDM_ADD:
8750         # an existing nic
8751         if not instance.nics:
8752           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8753                                      " no NICs" % nic_op,
8754                                      errors.ECODE_INVAL)
8755         if nic_op < 0 or nic_op >= len(instance.nics):
8756           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8757                                      " are 0 to %d" %
8758                                      (nic_op, len(instance.nics) - 1),
8759                                      errors.ECODE_INVAL)
8760         old_nic_params = instance.nics[nic_op].nicparams
8761         old_nic_ip = instance.nics[nic_op].ip
8762       else:
8763         old_nic_params = {}
8764         old_nic_ip = None
8765
8766       update_params_dict = dict([(key, nic_dict[key])
8767                                  for key in constants.NICS_PARAMETERS
8768                                  if key in nic_dict])
8769
8770       if 'bridge' in nic_dict:
8771         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8772
8773       new_nic_params = _GetUpdatedParams(old_nic_params,
8774                                          update_params_dict)
8775       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8776       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8777       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8778       self.nic_pinst[nic_op] = new_nic_params
8779       self.nic_pnew[nic_op] = new_filled_nic_params
8780       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8781
8782       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8783         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8784         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8785         if msg:
8786           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8787           if self.op.force:
8788             self.warn.append(msg)
8789           else:
8790             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8791       if new_nic_mode == constants.NIC_MODE_ROUTED:
8792         if 'ip' in nic_dict:
8793           nic_ip = nic_dict['ip']
8794         else:
8795           nic_ip = old_nic_ip
8796         if nic_ip is None:
8797           raise errors.OpPrereqError('Cannot set the nic ip to None'
8798                                      ' on a routed nic', errors.ECODE_INVAL)
8799       if 'mac' in nic_dict:
8800         nic_mac = nic_dict['mac']
8801         if nic_mac is None:
8802           raise errors.OpPrereqError('Cannot set the nic mac to None',
8803                                      errors.ECODE_INVAL)
8804         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8805           # otherwise generate the mac
8806           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8807         else:
8808           # or validate/reserve the current one
8809           try:
8810             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8811           except errors.ReservationError:
8812             raise errors.OpPrereqError("MAC address %s already in use"
8813                                        " in cluster" % nic_mac,
8814                                        errors.ECODE_NOTUNIQUE)
8815
8816     # DISK processing
8817     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8818       raise errors.OpPrereqError("Disk operations not supported for"
8819                                  " diskless instances",
8820                                  errors.ECODE_INVAL)
8821     for disk_op, _ in self.op.disks:
8822       if disk_op == constants.DDM_REMOVE:
8823         if len(instance.disks) == 1:
8824           raise errors.OpPrereqError("Cannot remove the last disk of"
8825                                      " an instance", errors.ECODE_INVAL)
8826         _CheckInstanceDown(self, instance, "cannot remove disks")
8827
8828       if (disk_op == constants.DDM_ADD and
8829           len(instance.nics) >= constants.MAX_DISKS):
8830         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8831                                    " add more" % constants.MAX_DISKS,
8832                                    errors.ECODE_STATE)
8833       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8834         # an existing disk
8835         if disk_op < 0 or disk_op >= len(instance.disks):
8836           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8837                                      " are 0 to %d" %
8838                                      (disk_op, len(instance.disks)),
8839                                      errors.ECODE_INVAL)
8840
8841     return
8842
8843   def _ConvertPlainToDrbd(self, feedback_fn):
8844     """Converts an instance from plain to drbd.
8845
8846     """
8847     feedback_fn("Converting template to drbd")
8848     instance = self.instance
8849     pnode = instance.primary_node
8850     snode = self.op.remote_node
8851
8852     # create a fake disk info for _GenerateDiskTemplate
8853     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8854     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8855                                       instance.name, pnode, [snode],
8856                                       disk_info, None, None, 0)
8857     info = _GetInstanceInfoText(instance)
8858     feedback_fn("Creating aditional volumes...")
8859     # first, create the missing data and meta devices
8860     for disk in new_disks:
8861       # unfortunately this is... not too nice
8862       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8863                             info, True)
8864       for child in disk.children:
8865         _CreateSingleBlockDev(self, snode, instance, child, info, True)
8866     # at this stage, all new LVs have been created, we can rename the
8867     # old ones
8868     feedback_fn("Renaming original volumes...")
8869     rename_list = [(o, n.children[0].logical_id)
8870                    for (o, n) in zip(instance.disks, new_disks)]
8871     result = self.rpc.call_blockdev_rename(pnode, rename_list)
8872     result.Raise("Failed to rename original LVs")
8873
8874     feedback_fn("Initializing DRBD devices...")
8875     # all child devices are in place, we can now create the DRBD devices
8876     for disk in new_disks:
8877       for node in [pnode, snode]:
8878         f_create = node == pnode
8879         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8880
8881     # at this point, the instance has been modified
8882     instance.disk_template = constants.DT_DRBD8
8883     instance.disks = new_disks
8884     self.cfg.Update(instance, feedback_fn)
8885
8886     # disks are created, waiting for sync
8887     disk_abort = not _WaitForSync(self, instance)
8888     if disk_abort:
8889       raise errors.OpExecError("There are some degraded disks for"
8890                                " this instance, please cleanup manually")
8891
8892   def _ConvertDrbdToPlain(self, feedback_fn):
8893     """Converts an instance from drbd to plain.
8894
8895     """
8896     instance = self.instance
8897     assert len(instance.secondary_nodes) == 1
8898     pnode = instance.primary_node
8899     snode = instance.secondary_nodes[0]
8900     feedback_fn("Converting template to plain")
8901
8902     old_disks = instance.disks
8903     new_disks = [d.children[0] for d in old_disks]
8904
8905     # copy over size and mode
8906     for parent, child in zip(old_disks, new_disks):
8907       child.size = parent.size
8908       child.mode = parent.mode
8909
8910     # update instance structure
8911     instance.disks = new_disks
8912     instance.disk_template = constants.DT_PLAIN
8913     self.cfg.Update(instance, feedback_fn)
8914
8915     feedback_fn("Removing volumes on the secondary node...")
8916     for disk in old_disks:
8917       self.cfg.SetDiskID(disk, snode)
8918       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8919       if msg:
8920         self.LogWarning("Could not remove block device %s on node %s,"
8921                         " continuing anyway: %s", disk.iv_name, snode, msg)
8922
8923     feedback_fn("Removing unneeded volumes on the primary node...")
8924     for idx, disk in enumerate(old_disks):
8925       meta = disk.children[1]
8926       self.cfg.SetDiskID(meta, pnode)
8927       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8928       if msg:
8929         self.LogWarning("Could not remove metadata for disk %d on node %s,"
8930                         " continuing anyway: %s", idx, pnode, msg)
8931
8932
8933   def Exec(self, feedback_fn):
8934     """Modifies an instance.
8935
8936     All parameters take effect only at the next restart of the instance.
8937
8938     """
8939     # Process here the warnings from CheckPrereq, as we don't have a
8940     # feedback_fn there.
8941     for warn in self.warn:
8942       feedback_fn("WARNING: %s" % warn)
8943
8944     result = []
8945     instance = self.instance
8946     # disk changes
8947     for disk_op, disk_dict in self.op.disks:
8948       if disk_op == constants.DDM_REMOVE:
8949         # remove the last disk
8950         device = instance.disks.pop()
8951         device_idx = len(instance.disks)
8952         for node, disk in device.ComputeNodeTree(instance.primary_node):
8953           self.cfg.SetDiskID(disk, node)
8954           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8955           if msg:
8956             self.LogWarning("Could not remove disk/%d on node %s: %s,"
8957                             " continuing anyway", device_idx, node, msg)
8958         result.append(("disk/%d" % device_idx, "remove"))
8959       elif disk_op == constants.DDM_ADD:
8960         # add a new disk
8961         if instance.disk_template == constants.DT_FILE:
8962           file_driver, file_path = instance.disks[0].logical_id
8963           file_path = os.path.dirname(file_path)
8964         else:
8965           file_driver = file_path = None
8966         disk_idx_base = len(instance.disks)
8967         new_disk = _GenerateDiskTemplate(self,
8968                                          instance.disk_template,
8969                                          instance.name, instance.primary_node,
8970                                          instance.secondary_nodes,
8971                                          [disk_dict],
8972                                          file_path,
8973                                          file_driver,
8974                                          disk_idx_base)[0]
8975         instance.disks.append(new_disk)
8976         info = _GetInstanceInfoText(instance)
8977
8978         logging.info("Creating volume %s for instance %s",
8979                      new_disk.iv_name, instance.name)
8980         # Note: this needs to be kept in sync with _CreateDisks
8981         #HARDCODE
8982         for node in instance.all_nodes:
8983           f_create = node == instance.primary_node
8984           try:
8985             _CreateBlockDev(self, node, instance, new_disk,
8986                             f_create, info, f_create)
8987           except errors.OpExecError, err:
8988             self.LogWarning("Failed to create volume %s (%s) on"
8989                             " node %s: %s",
8990                             new_disk.iv_name, new_disk, node, err)
8991         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8992                        (new_disk.size, new_disk.mode)))
8993       else:
8994         # change a given disk
8995         instance.disks[disk_op].mode = disk_dict['mode']
8996         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8997
8998     if self.op.disk_template:
8999       r_shut = _ShutdownInstanceDisks(self, instance)
9000       if not r_shut:
9001         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9002                                  " proceed with disk template conversion")
9003       mode = (instance.disk_template, self.op.disk_template)
9004       try:
9005         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9006       except:
9007         self.cfg.ReleaseDRBDMinors(instance.name)
9008         raise
9009       result.append(("disk_template", self.op.disk_template))
9010
9011     # NIC changes
9012     for nic_op, nic_dict in self.op.nics:
9013       if nic_op == constants.DDM_REMOVE:
9014         # remove the last nic
9015         del instance.nics[-1]
9016         result.append(("nic.%d" % len(instance.nics), "remove"))
9017       elif nic_op == constants.DDM_ADD:
9018         # mac and bridge should be set, by now
9019         mac = nic_dict['mac']
9020         ip = nic_dict.get('ip', None)
9021         nicparams = self.nic_pinst[constants.DDM_ADD]
9022         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9023         instance.nics.append(new_nic)
9024         result.append(("nic.%d" % (len(instance.nics) - 1),
9025                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9026                        (new_nic.mac, new_nic.ip,
9027                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9028                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9029                        )))
9030       else:
9031         for key in 'mac', 'ip':
9032           if key in nic_dict:
9033             setattr(instance.nics[nic_op], key, nic_dict[key])
9034         if nic_op in self.nic_pinst:
9035           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9036         for key, val in nic_dict.iteritems():
9037           result.append(("nic.%s/%d" % (key, nic_op), val))
9038
9039     # hvparams changes
9040     if self.op.hvparams:
9041       instance.hvparams = self.hv_inst
9042       for key, val in self.op.hvparams.iteritems():
9043         result.append(("hv/%s" % key, val))
9044
9045     # beparams changes
9046     if self.op.beparams:
9047       instance.beparams = self.be_inst
9048       for key, val in self.op.beparams.iteritems():
9049         result.append(("be/%s" % key, val))
9050
9051     # OS change
9052     if self.op.os_name:
9053       instance.os = self.op.os_name
9054
9055     # osparams changes
9056     if self.op.osparams:
9057       instance.osparams = self.os_inst
9058       for key, val in self.op.osparams.iteritems():
9059         result.append(("os/%s" % key, val))
9060
9061     self.cfg.Update(instance, feedback_fn)
9062
9063     return result
9064
9065   _DISK_CONVERSIONS = {
9066     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9067     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9068     }
9069
9070
9071 class LUQueryExports(NoHooksLU):
9072   """Query the exports list
9073
9074   """
9075   _OP_REQP = [("nodes", _TListOf(_TNonEmptyString))]
9076   REQ_BGL = False
9077
9078   def ExpandNames(self):
9079     self.needed_locks = {}
9080     self.share_locks[locking.LEVEL_NODE] = 1
9081     if not self.op.nodes:
9082       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9083     else:
9084       self.needed_locks[locking.LEVEL_NODE] = \
9085         _GetWantedNodes(self, self.op.nodes)
9086
9087   def Exec(self, feedback_fn):
9088     """Compute the list of all the exported system images.
9089
9090     @rtype: dict
9091     @return: a dictionary with the structure node->(export-list)
9092         where export-list is a list of the instances exported on
9093         that node.
9094
9095     """
9096     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9097     rpcresult = self.rpc.call_export_list(self.nodes)
9098     result = {}
9099     for node in rpcresult:
9100       if rpcresult[node].fail_msg:
9101         result[node] = False
9102       else:
9103         result[node] = rpcresult[node].payload
9104
9105     return result
9106
9107
9108 class LUPrepareExport(NoHooksLU):
9109   """Prepares an instance for an export and returns useful information.
9110
9111   """
9112   _OP_REQP = [
9113     ("instance_name", _TNonEmptyString),
9114     ("mode", _TElemOf(constants.EXPORT_MODES)),
9115     ]
9116   REQ_BGL = False
9117
9118   def ExpandNames(self):
9119     self._ExpandAndLockInstance()
9120
9121   def CheckPrereq(self):
9122     """Check prerequisites.
9123
9124     """
9125     instance_name = self.op.instance_name
9126
9127     self.instance = self.cfg.GetInstanceInfo(instance_name)
9128     assert self.instance is not None, \
9129           "Cannot retrieve locked instance %s" % self.op.instance_name
9130     _CheckNodeOnline(self, self.instance.primary_node)
9131
9132     self._cds = _GetClusterDomainSecret()
9133
9134   def Exec(self, feedback_fn):
9135     """Prepares an instance for an export.
9136
9137     """
9138     instance = self.instance
9139
9140     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9141       salt = utils.GenerateSecret(8)
9142
9143       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9144       result = self.rpc.call_x509_cert_create(instance.primary_node,
9145                                               constants.RIE_CERT_VALIDITY)
9146       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9147
9148       (name, cert_pem) = result.payload
9149
9150       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9151                                              cert_pem)
9152
9153       return {
9154         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9155         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9156                           salt),
9157         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9158         }
9159
9160     return None
9161
9162
9163 class LUExportInstance(LogicalUnit):
9164   """Export an instance to an image in the cluster.
9165
9166   """
9167   HPATH = "instance-export"
9168   HTYPE = constants.HTYPE_INSTANCE
9169   _OP_REQP = [
9170     ("instance_name", _TNonEmptyString),
9171     ("target_node", _TNonEmptyString),
9172     ("shutdown", _TBool),
9173     ("mode", _TElemOf(constants.EXPORT_MODES)),
9174     ]
9175   _OP_DEFS = [
9176     ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT),
9177     ("remove_instance", False),
9178     ("ignore_remove_failures", False),
9179     ("mode", constants.EXPORT_MODE_LOCAL),
9180     ("x509_key_name", None),
9181     ("destination_x509_ca", None),
9182     ]
9183   REQ_BGL = False
9184
9185   def CheckArguments(self):
9186     """Check the arguments.
9187
9188     """
9189     self.x509_key_name = self.op.x509_key_name
9190     self.dest_x509_ca_pem = self.op.destination_x509_ca
9191
9192     if self.op.remove_instance and not self.op.shutdown:
9193       raise errors.OpPrereqError("Can not remove instance without shutting it"
9194                                  " down before")
9195
9196     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9197       if not self.x509_key_name:
9198         raise errors.OpPrereqError("Missing X509 key name for encryption",
9199                                    errors.ECODE_INVAL)
9200
9201       if not self.dest_x509_ca_pem:
9202         raise errors.OpPrereqError("Missing destination X509 CA",
9203                                    errors.ECODE_INVAL)
9204
9205   def ExpandNames(self):
9206     self._ExpandAndLockInstance()
9207
9208     # Lock all nodes for local exports
9209     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9210       # FIXME: lock only instance primary and destination node
9211       #
9212       # Sad but true, for now we have do lock all nodes, as we don't know where
9213       # the previous export might be, and in this LU we search for it and
9214       # remove it from its current node. In the future we could fix this by:
9215       #  - making a tasklet to search (share-lock all), then create the
9216       #    new one, then one to remove, after
9217       #  - removing the removal operation altogether
9218       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9219
9220   def DeclareLocks(self, level):
9221     """Last minute lock declaration."""
9222     # All nodes are locked anyway, so nothing to do here.
9223
9224   def BuildHooksEnv(self):
9225     """Build hooks env.
9226
9227     This will run on the master, primary node and target node.
9228
9229     """
9230     env = {
9231       "EXPORT_MODE": self.op.mode,
9232       "EXPORT_NODE": self.op.target_node,
9233       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9234       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9235       # TODO: Generic function for boolean env variables
9236       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9237       }
9238
9239     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9240
9241     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9242
9243     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9244       nl.append(self.op.target_node)
9245
9246     return env, nl, nl
9247
9248   def CheckPrereq(self):
9249     """Check prerequisites.
9250
9251     This checks that the instance and node names are valid.
9252
9253     """
9254     instance_name = self.op.instance_name
9255
9256     self.instance = self.cfg.GetInstanceInfo(instance_name)
9257     assert self.instance is not None, \
9258           "Cannot retrieve locked instance %s" % self.op.instance_name
9259     _CheckNodeOnline(self, self.instance.primary_node)
9260
9261     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9262       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9263       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9264       assert self.dst_node is not None
9265
9266       _CheckNodeOnline(self, self.dst_node.name)
9267       _CheckNodeNotDrained(self, self.dst_node.name)
9268
9269       self._cds = None
9270       self.dest_disk_info = None
9271       self.dest_x509_ca = None
9272
9273     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9274       self.dst_node = None
9275
9276       if len(self.op.target_node) != len(self.instance.disks):
9277         raise errors.OpPrereqError(("Received destination information for %s"
9278                                     " disks, but instance %s has %s disks") %
9279                                    (len(self.op.target_node), instance_name,
9280                                     len(self.instance.disks)),
9281                                    errors.ECODE_INVAL)
9282
9283       cds = _GetClusterDomainSecret()
9284
9285       # Check X509 key name
9286       try:
9287         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9288       except (TypeError, ValueError), err:
9289         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9290
9291       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9292         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9293                                    errors.ECODE_INVAL)
9294
9295       # Load and verify CA
9296       try:
9297         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9298       except OpenSSL.crypto.Error, err:
9299         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9300                                    (err, ), errors.ECODE_INVAL)
9301
9302       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9303       if errcode is not None:
9304         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9305                                    (msg, ), errors.ECODE_INVAL)
9306
9307       self.dest_x509_ca = cert
9308
9309       # Verify target information
9310       disk_info = []
9311       for idx, disk_data in enumerate(self.op.target_node):
9312         try:
9313           (host, port, magic) = \
9314             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9315         except errors.GenericError, err:
9316           raise errors.OpPrereqError("Target info for disk %s: %s" %
9317                                      (idx, err), errors.ECODE_INVAL)
9318
9319         disk_info.append((host, port, magic))
9320
9321       assert len(disk_info) == len(self.op.target_node)
9322       self.dest_disk_info = disk_info
9323
9324     else:
9325       raise errors.ProgrammerError("Unhandled export mode %r" %
9326                                    self.op.mode)
9327
9328     # instance disk type verification
9329     # TODO: Implement export support for file-based disks
9330     for disk in self.instance.disks:
9331       if disk.dev_type == constants.LD_FILE:
9332         raise errors.OpPrereqError("Export not supported for instances with"
9333                                    " file-based disks", errors.ECODE_INVAL)
9334
9335   def _CleanupExports(self, feedback_fn):
9336     """Removes exports of current instance from all other nodes.
9337
9338     If an instance in a cluster with nodes A..D was exported to node C, its
9339     exports will be removed from the nodes A, B and D.
9340
9341     """
9342     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9343
9344     nodelist = self.cfg.GetNodeList()
9345     nodelist.remove(self.dst_node.name)
9346
9347     # on one-node clusters nodelist will be empty after the removal
9348     # if we proceed the backup would be removed because OpQueryExports
9349     # substitutes an empty list with the full cluster node list.
9350     iname = self.instance.name
9351     if nodelist:
9352       feedback_fn("Removing old exports for instance %s" % iname)
9353       exportlist = self.rpc.call_export_list(nodelist)
9354       for node in exportlist:
9355         if exportlist[node].fail_msg:
9356           continue
9357         if iname in exportlist[node].payload:
9358           msg = self.rpc.call_export_remove(node, iname).fail_msg
9359           if msg:
9360             self.LogWarning("Could not remove older export for instance %s"
9361                             " on node %s: %s", iname, node, msg)
9362
9363   def Exec(self, feedback_fn):
9364     """Export an instance to an image in the cluster.
9365
9366     """
9367     assert self.op.mode in constants.EXPORT_MODES
9368
9369     instance = self.instance
9370     src_node = instance.primary_node
9371
9372     if self.op.shutdown:
9373       # shutdown the instance, but not the disks
9374       feedback_fn("Shutting down instance %s" % instance.name)
9375       result = self.rpc.call_instance_shutdown(src_node, instance,
9376                                                self.op.shutdown_timeout)
9377       # TODO: Maybe ignore failures if ignore_remove_failures is set
9378       result.Raise("Could not shutdown instance %s on"
9379                    " node %s" % (instance.name, src_node))
9380
9381     # set the disks ID correctly since call_instance_start needs the
9382     # correct drbd minor to create the symlinks
9383     for disk in instance.disks:
9384       self.cfg.SetDiskID(disk, src_node)
9385
9386     activate_disks = (not instance.admin_up)
9387
9388     if activate_disks:
9389       # Activate the instance disks if we'exporting a stopped instance
9390       feedback_fn("Activating disks for %s" % instance.name)
9391       _StartInstanceDisks(self, instance, None)
9392
9393     try:
9394       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9395                                                      instance)
9396
9397       helper.CreateSnapshots()
9398       try:
9399         if (self.op.shutdown and instance.admin_up and
9400             not self.op.remove_instance):
9401           assert not activate_disks
9402           feedback_fn("Starting instance %s" % instance.name)
9403           result = self.rpc.call_instance_start(src_node, instance, None, None)
9404           msg = result.fail_msg
9405           if msg:
9406             feedback_fn("Failed to start instance: %s" % msg)
9407             _ShutdownInstanceDisks(self, instance)
9408             raise errors.OpExecError("Could not start instance: %s" % msg)
9409
9410         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9411           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9412         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9413           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9414           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9415
9416           (key_name, _, _) = self.x509_key_name
9417
9418           dest_ca_pem = \
9419             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9420                                             self.dest_x509_ca)
9421
9422           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9423                                                      key_name, dest_ca_pem,
9424                                                      timeouts)
9425       finally:
9426         helper.Cleanup()
9427
9428       # Check for backwards compatibility
9429       assert len(dresults) == len(instance.disks)
9430       assert compat.all(isinstance(i, bool) for i in dresults), \
9431              "Not all results are boolean: %r" % dresults
9432
9433     finally:
9434       if activate_disks:
9435         feedback_fn("Deactivating disks for %s" % instance.name)
9436         _ShutdownInstanceDisks(self, instance)
9437
9438     # Remove instance if requested
9439     if self.op.remove_instance:
9440       if not (compat.all(dresults) and fin_resu):
9441         feedback_fn("Not removing instance %s as parts of the export failed" %
9442                     instance.name)
9443       else:
9444         feedback_fn("Removing instance %s" % instance.name)
9445         _RemoveInstance(self, feedback_fn, instance,
9446                         self.op.ignore_remove_failures)
9447
9448     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9449       self._CleanupExports(feedback_fn)
9450
9451     return fin_resu, dresults
9452
9453
9454 class LURemoveExport(NoHooksLU):
9455   """Remove exports related to the named instance.
9456
9457   """
9458   _OP_REQP = [("instance_name", _TNonEmptyString)]
9459   REQ_BGL = False
9460
9461   def ExpandNames(self):
9462     self.needed_locks = {}
9463     # We need all nodes to be locked in order for RemoveExport to work, but we
9464     # don't need to lock the instance itself, as nothing will happen to it (and
9465     # we can remove exports also for a removed instance)
9466     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9467
9468   def Exec(self, feedback_fn):
9469     """Remove any export.
9470
9471     """
9472     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9473     # If the instance was not found we'll try with the name that was passed in.
9474     # This will only work if it was an FQDN, though.
9475     fqdn_warn = False
9476     if not instance_name:
9477       fqdn_warn = True
9478       instance_name = self.op.instance_name
9479
9480     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9481     exportlist = self.rpc.call_export_list(locked_nodes)
9482     found = False
9483     for node in exportlist:
9484       msg = exportlist[node].fail_msg
9485       if msg:
9486         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9487         continue
9488       if instance_name in exportlist[node].payload:
9489         found = True
9490         result = self.rpc.call_export_remove(node, instance_name)
9491         msg = result.fail_msg
9492         if msg:
9493           logging.error("Could not remove export for instance %s"
9494                         " on node %s: %s", instance_name, node, msg)
9495
9496     if fqdn_warn and not found:
9497       feedback_fn("Export not found. If trying to remove an export belonging"
9498                   " to a deleted instance please use its Fully Qualified"
9499                   " Domain Name.")
9500
9501
9502 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9503   """Generic tags LU.
9504
9505   This is an abstract class which is the parent of all the other tags LUs.
9506
9507   """
9508
9509   def ExpandNames(self):
9510     self.needed_locks = {}
9511     if self.op.kind == constants.TAG_NODE:
9512       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9513       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9514     elif self.op.kind == constants.TAG_INSTANCE:
9515       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9516       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9517
9518   def CheckPrereq(self):
9519     """Check prerequisites.
9520
9521     """
9522     if self.op.kind == constants.TAG_CLUSTER:
9523       self.target = self.cfg.GetClusterInfo()
9524     elif self.op.kind == constants.TAG_NODE:
9525       self.target = self.cfg.GetNodeInfo(self.op.name)
9526     elif self.op.kind == constants.TAG_INSTANCE:
9527       self.target = self.cfg.GetInstanceInfo(self.op.name)
9528     else:
9529       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9530                                  str(self.op.kind), errors.ECODE_INVAL)
9531
9532
9533 class LUGetTags(TagsLU):
9534   """Returns the tags of a given object.
9535
9536   """
9537   _OP_REQP = [
9538     ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9539     ("name", _TNonEmptyString),
9540     ]
9541   REQ_BGL = False
9542
9543   def Exec(self, feedback_fn):
9544     """Returns the tag list.
9545
9546     """
9547     return list(self.target.GetTags())
9548
9549
9550 class LUSearchTags(NoHooksLU):
9551   """Searches the tags for a given pattern.
9552
9553   """
9554   _OP_REQP = [("pattern", _TNonEmptyString)]
9555   REQ_BGL = False
9556
9557   def ExpandNames(self):
9558     self.needed_locks = {}
9559
9560   def CheckPrereq(self):
9561     """Check prerequisites.
9562
9563     This checks the pattern passed for validity by compiling it.
9564
9565     """
9566     try:
9567       self.re = re.compile(self.op.pattern)
9568     except re.error, err:
9569       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9570                                  (self.op.pattern, err), errors.ECODE_INVAL)
9571
9572   def Exec(self, feedback_fn):
9573     """Returns the tag list.
9574
9575     """
9576     cfg = self.cfg
9577     tgts = [("/cluster", cfg.GetClusterInfo())]
9578     ilist = cfg.GetAllInstancesInfo().values()
9579     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9580     nlist = cfg.GetAllNodesInfo().values()
9581     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9582     results = []
9583     for path, target in tgts:
9584       for tag in target.GetTags():
9585         if self.re.search(tag):
9586           results.append((path, tag))
9587     return results
9588
9589
9590 class LUAddTags(TagsLU):
9591   """Sets a tag on a given object.
9592
9593   """
9594   _OP_REQP = [
9595     ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9596     ("name", _TNonEmptyString),
9597     ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9598     ]
9599   REQ_BGL = False
9600
9601   def CheckPrereq(self):
9602     """Check prerequisites.
9603
9604     This checks the type and length of the tag name and value.
9605
9606     """
9607     TagsLU.CheckPrereq(self)
9608     for tag in self.op.tags:
9609       objects.TaggableObject.ValidateTag(tag)
9610
9611   def Exec(self, feedback_fn):
9612     """Sets the tag.
9613
9614     """
9615     try:
9616       for tag in self.op.tags:
9617         self.target.AddTag(tag)
9618     except errors.TagError, err:
9619       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9620     self.cfg.Update(self.target, feedback_fn)
9621
9622
9623 class LUDelTags(TagsLU):
9624   """Delete a list of tags from a given object.
9625
9626   """
9627   _OP_REQP = [
9628     ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9629     ("name", _TNonEmptyString),
9630     ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9631     ]
9632   REQ_BGL = False
9633
9634   def CheckPrereq(self):
9635     """Check prerequisites.
9636
9637     This checks that we have the given tag.
9638
9639     """
9640     TagsLU.CheckPrereq(self)
9641     for tag in self.op.tags:
9642       objects.TaggableObject.ValidateTag(tag)
9643     del_tags = frozenset(self.op.tags)
9644     cur_tags = self.target.GetTags()
9645     if not del_tags <= cur_tags:
9646       diff_tags = del_tags - cur_tags
9647       diff_names = ["'%s'" % tag for tag in diff_tags]
9648       diff_names.sort()
9649       raise errors.OpPrereqError("Tag(s) %s not found" %
9650                                  (",".join(diff_names)), errors.ECODE_NOENT)
9651
9652   def Exec(self, feedback_fn):
9653     """Remove the tag from the object.
9654
9655     """
9656     for tag in self.op.tags:
9657       self.target.RemoveTag(tag)
9658     self.cfg.Update(self.target, feedback_fn)
9659
9660
9661 class LUTestDelay(NoHooksLU):
9662   """Sleep for a specified amount of time.
9663
9664   This LU sleeps on the master and/or nodes for a specified amount of
9665   time.
9666
9667   """
9668   _OP_REQP = [
9669     ("duration", _TFloat),
9670     ("on_master", _TBool),
9671     ("on_nodes", _TListOf(_TNonEmptyString)),
9672     ("repeat", _TPositiveInt)
9673     ]
9674   _OP_DEFS = [
9675     ("repeat", 0),
9676     ]
9677   REQ_BGL = False
9678
9679   def ExpandNames(self):
9680     """Expand names and set required locks.
9681
9682     This expands the node list, if any.
9683
9684     """
9685     self.needed_locks = {}
9686     if self.op.on_nodes:
9687       # _GetWantedNodes can be used here, but is not always appropriate to use
9688       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9689       # more information.
9690       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9691       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9692
9693   def _TestDelay(self):
9694     """Do the actual sleep.
9695
9696     """
9697     if self.op.on_master:
9698       if not utils.TestDelay(self.op.duration):
9699         raise errors.OpExecError("Error during master delay test")
9700     if self.op.on_nodes:
9701       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9702       for node, node_result in result.items():
9703         node_result.Raise("Failure during rpc call to node %s" % node)
9704
9705   def Exec(self, feedback_fn):
9706     """Execute the test delay opcode, with the wanted repetitions.
9707
9708     """
9709     if self.op.repeat == 0:
9710       self._TestDelay()
9711     else:
9712       top_value = self.op.repeat - 1
9713       for i in range(self.op.repeat):
9714         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9715         self._TestDelay()
9716
9717
9718 class IAllocator(object):
9719   """IAllocator framework.
9720
9721   An IAllocator instance has three sets of attributes:
9722     - cfg that is needed to query the cluster
9723     - input data (all members of the _KEYS class attribute are required)
9724     - four buffer attributes (in|out_data|text), that represent the
9725       input (to the external script) in text and data structure format,
9726       and the output from it, again in two formats
9727     - the result variables from the script (success, info, nodes) for
9728       easy usage
9729
9730   """
9731   # pylint: disable-msg=R0902
9732   # lots of instance attributes
9733   _ALLO_KEYS = [
9734     "name", "mem_size", "disks", "disk_template",
9735     "os", "tags", "nics", "vcpus", "hypervisor",
9736     ]
9737   _RELO_KEYS = [
9738     "name", "relocate_from",
9739     ]
9740   _EVAC_KEYS = [
9741     "evac_nodes",
9742     ]
9743
9744   def __init__(self, cfg, rpc, mode, **kwargs):
9745     self.cfg = cfg
9746     self.rpc = rpc
9747     # init buffer variables
9748     self.in_text = self.out_text = self.in_data = self.out_data = None
9749     # init all input fields so that pylint is happy
9750     self.mode = mode
9751     self.mem_size = self.disks = self.disk_template = None
9752     self.os = self.tags = self.nics = self.vcpus = None
9753     self.hypervisor = None
9754     self.relocate_from = None
9755     self.name = None
9756     self.evac_nodes = None
9757     # computed fields
9758     self.required_nodes = None
9759     # init result fields
9760     self.success = self.info = self.result = None
9761     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9762       keyset = self._ALLO_KEYS
9763       fn = self._AddNewInstance
9764     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9765       keyset = self._RELO_KEYS
9766       fn = self._AddRelocateInstance
9767     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9768       keyset = self._EVAC_KEYS
9769       fn = self._AddEvacuateNodes
9770     else:
9771       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9772                                    " IAllocator" % self.mode)
9773     for key in kwargs:
9774       if key not in keyset:
9775         raise errors.ProgrammerError("Invalid input parameter '%s' to"
9776                                      " IAllocator" % key)
9777       setattr(self, key, kwargs[key])
9778
9779     for key in keyset:
9780       if key not in kwargs:
9781         raise errors.ProgrammerError("Missing input parameter '%s' to"
9782                                      " IAllocator" % key)
9783     self._BuildInputData(fn)
9784
9785   def _ComputeClusterData(self):
9786     """Compute the generic allocator input data.
9787
9788     This is the data that is independent of the actual operation.
9789
9790     """
9791     cfg = self.cfg
9792     cluster_info = cfg.GetClusterInfo()
9793     # cluster data
9794     data = {
9795       "version": constants.IALLOCATOR_VERSION,
9796       "cluster_name": cfg.GetClusterName(),
9797       "cluster_tags": list(cluster_info.GetTags()),
9798       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9799       # we don't have job IDs
9800       }
9801     iinfo = cfg.GetAllInstancesInfo().values()
9802     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9803
9804     # node data
9805     node_results = {}
9806     node_list = cfg.GetNodeList()
9807
9808     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9809       hypervisor_name = self.hypervisor
9810     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9811       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9812     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9813       hypervisor_name = cluster_info.enabled_hypervisors[0]
9814
9815     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9816                                         hypervisor_name)
9817     node_iinfo = \
9818       self.rpc.call_all_instances_info(node_list,
9819                                        cluster_info.enabled_hypervisors)
9820     for nname, nresult in node_data.items():
9821       # first fill in static (config-based) values
9822       ninfo = cfg.GetNodeInfo(nname)
9823       pnr = {
9824         "tags": list(ninfo.GetTags()),
9825         "primary_ip": ninfo.primary_ip,
9826         "secondary_ip": ninfo.secondary_ip,
9827         "offline": ninfo.offline,
9828         "drained": ninfo.drained,
9829         "master_candidate": ninfo.master_candidate,
9830         }
9831
9832       if not (ninfo.offline or ninfo.drained):
9833         nresult.Raise("Can't get data for node %s" % nname)
9834         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9835                                 nname)
9836         remote_info = nresult.payload
9837
9838         for attr in ['memory_total', 'memory_free', 'memory_dom0',
9839                      'vg_size', 'vg_free', 'cpu_total']:
9840           if attr not in remote_info:
9841             raise errors.OpExecError("Node '%s' didn't return attribute"
9842                                      " '%s'" % (nname, attr))
9843           if not isinstance(remote_info[attr], int):
9844             raise errors.OpExecError("Node '%s' returned invalid value"
9845                                      " for '%s': %s" %
9846                                      (nname, attr, remote_info[attr]))
9847         # compute memory used by primary instances
9848         i_p_mem = i_p_up_mem = 0
9849         for iinfo, beinfo in i_list:
9850           if iinfo.primary_node == nname:
9851             i_p_mem += beinfo[constants.BE_MEMORY]
9852             if iinfo.name not in node_iinfo[nname].payload:
9853               i_used_mem = 0
9854             else:
9855               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9856             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9857             remote_info['memory_free'] -= max(0, i_mem_diff)
9858
9859             if iinfo.admin_up:
9860               i_p_up_mem += beinfo[constants.BE_MEMORY]
9861
9862         # compute memory used by instances
9863         pnr_dyn = {
9864           "total_memory": remote_info['memory_total'],
9865           "reserved_memory": remote_info['memory_dom0'],
9866           "free_memory": remote_info['memory_free'],
9867           "total_disk": remote_info['vg_size'],
9868           "free_disk": remote_info['vg_free'],
9869           "total_cpus": remote_info['cpu_total'],
9870           "i_pri_memory": i_p_mem,
9871           "i_pri_up_memory": i_p_up_mem,
9872           }
9873         pnr.update(pnr_dyn)
9874
9875       node_results[nname] = pnr
9876     data["nodes"] = node_results
9877
9878     # instance data
9879     instance_data = {}
9880     for iinfo, beinfo in i_list:
9881       nic_data = []
9882       for nic in iinfo.nics:
9883         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9884         nic_dict = {"mac": nic.mac,
9885                     "ip": nic.ip,
9886                     "mode": filled_params[constants.NIC_MODE],
9887                     "link": filled_params[constants.NIC_LINK],
9888                    }
9889         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9890           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9891         nic_data.append(nic_dict)
9892       pir = {
9893         "tags": list(iinfo.GetTags()),
9894         "admin_up": iinfo.admin_up,
9895         "vcpus": beinfo[constants.BE_VCPUS],
9896         "memory": beinfo[constants.BE_MEMORY],
9897         "os": iinfo.os,
9898         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9899         "nics": nic_data,
9900         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9901         "disk_template": iinfo.disk_template,
9902         "hypervisor": iinfo.hypervisor,
9903         }
9904       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9905                                                  pir["disks"])
9906       instance_data[iinfo.name] = pir
9907
9908     data["instances"] = instance_data
9909
9910     self.in_data = data
9911
9912   def _AddNewInstance(self):
9913     """Add new instance data to allocator structure.
9914
9915     This in combination with _AllocatorGetClusterData will create the
9916     correct structure needed as input for the allocator.
9917
9918     The checks for the completeness of the opcode must have already been
9919     done.
9920
9921     """
9922     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9923
9924     if self.disk_template in constants.DTS_NET_MIRROR:
9925       self.required_nodes = 2
9926     else:
9927       self.required_nodes = 1
9928     request = {
9929       "name": self.name,
9930       "disk_template": self.disk_template,
9931       "tags": self.tags,
9932       "os": self.os,
9933       "vcpus": self.vcpus,
9934       "memory": self.mem_size,
9935       "disks": self.disks,
9936       "disk_space_total": disk_space,
9937       "nics": self.nics,
9938       "required_nodes": self.required_nodes,
9939       }
9940     return request
9941
9942   def _AddRelocateInstance(self):
9943     """Add relocate instance data to allocator structure.
9944
9945     This in combination with _IAllocatorGetClusterData will create the
9946     correct structure needed as input for the allocator.
9947
9948     The checks for the completeness of the opcode must have already been
9949     done.
9950
9951     """
9952     instance = self.cfg.GetInstanceInfo(self.name)
9953     if instance is None:
9954       raise errors.ProgrammerError("Unknown instance '%s' passed to"
9955                                    " IAllocator" % self.name)
9956
9957     if instance.disk_template not in constants.DTS_NET_MIRROR:
9958       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9959                                  errors.ECODE_INVAL)
9960
9961     if len(instance.secondary_nodes) != 1:
9962       raise errors.OpPrereqError("Instance has not exactly one secondary node",
9963                                  errors.ECODE_STATE)
9964
9965     self.required_nodes = 1
9966     disk_sizes = [{'size': disk.size} for disk in instance.disks]
9967     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9968
9969     request = {
9970       "name": self.name,
9971       "disk_space_total": disk_space,
9972       "required_nodes": self.required_nodes,
9973       "relocate_from": self.relocate_from,
9974       }
9975     return request
9976
9977   def _AddEvacuateNodes(self):
9978     """Add evacuate nodes data to allocator structure.
9979
9980     """
9981     request = {
9982       "evac_nodes": self.evac_nodes
9983       }
9984     return request
9985
9986   def _BuildInputData(self, fn):
9987     """Build input data structures.
9988
9989     """
9990     self._ComputeClusterData()
9991
9992     request = fn()
9993     request["type"] = self.mode
9994     self.in_data["request"] = request
9995
9996     self.in_text = serializer.Dump(self.in_data)
9997
9998   def Run(self, name, validate=True, call_fn=None):
9999     """Run an instance allocator and return the results.
10000
10001     """
10002     if call_fn is None:
10003       call_fn = self.rpc.call_iallocator_runner
10004
10005     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10006     result.Raise("Failure while running the iallocator script")
10007
10008     self.out_text = result.payload
10009     if validate:
10010       self._ValidateResult()
10011
10012   def _ValidateResult(self):
10013     """Process the allocator results.
10014
10015     This will process and if successful save the result in
10016     self.out_data and the other parameters.
10017
10018     """
10019     try:
10020       rdict = serializer.Load(self.out_text)
10021     except Exception, err:
10022       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10023
10024     if not isinstance(rdict, dict):
10025       raise errors.OpExecError("Can't parse iallocator results: not a dict")
10026
10027     # TODO: remove backwards compatiblity in later versions
10028     if "nodes" in rdict and "result" not in rdict:
10029       rdict["result"] = rdict["nodes"]
10030       del rdict["nodes"]
10031
10032     for key in "success", "info", "result":
10033       if key not in rdict:
10034         raise errors.OpExecError("Can't parse iallocator results:"
10035                                  " missing key '%s'" % key)
10036       setattr(self, key, rdict[key])
10037
10038     if not isinstance(rdict["result"], list):
10039       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10040                                " is not a list")
10041     self.out_data = rdict
10042
10043
10044 class LUTestAllocator(NoHooksLU):
10045   """Run allocator tests.
10046
10047   This LU runs the allocator tests
10048
10049   """
10050   _OP_REQP = [
10051     ("direction", _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10052     ("mode", _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10053     ("name", _TNonEmptyString),
10054     ("nics", _TOr(_TNone, _TListOf(
10055       _TDictOf(_TElemOf(["mac", "ip", "bridge"]), _TNonEmptyString)))),
10056     ("disks", _TOr(_TNone, _TList)),
10057     ]
10058   _OP_DEFS = [
10059     ("hypervisor", None),
10060     ("allocator", None),
10061     ("nics", None),
10062     ("disks", None),
10063     ]
10064
10065   def CheckPrereq(self):
10066     """Check prerequisites.
10067
10068     This checks the opcode parameters depending on the director and mode test.
10069
10070     """
10071     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10072       for attr in ["mem_size", "disks", "disk_template",
10073                    "os", "tags", "nics", "vcpus"]:
10074         if not hasattr(self.op, attr):
10075           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10076                                      attr, errors.ECODE_INVAL)
10077       iname = self.cfg.ExpandInstanceName(self.op.name)
10078       if iname is not None:
10079         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10080                                    iname, errors.ECODE_EXISTS)
10081       if not isinstance(self.op.nics, list):
10082         raise errors.OpPrereqError("Invalid parameter 'nics'",
10083                                    errors.ECODE_INVAL)
10084       if not isinstance(self.op.disks, list):
10085         raise errors.OpPrereqError("Invalid parameter 'disks'",
10086                                    errors.ECODE_INVAL)
10087       for row in self.op.disks:
10088         if (not isinstance(row, dict) or
10089             "size" not in row or
10090             not isinstance(row["size"], int) or
10091             "mode" not in row or
10092             row["mode"] not in ['r', 'w']):
10093           raise errors.OpPrereqError("Invalid contents of the 'disks'"
10094                                      " parameter", errors.ECODE_INVAL)
10095       if self.op.hypervisor is None:
10096         self.op.hypervisor = self.cfg.GetHypervisorType()
10097     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10098       fname = _ExpandInstanceName(self.cfg, self.op.name)
10099       self.op.name = fname
10100       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10101     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10102       if not hasattr(self.op, "evac_nodes"):
10103         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10104                                    " opcode input", errors.ECODE_INVAL)
10105     else:
10106       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10107                                  self.op.mode, errors.ECODE_INVAL)
10108
10109     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10110       if self.op.allocator is None:
10111         raise errors.OpPrereqError("Missing allocator name",
10112                                    errors.ECODE_INVAL)
10113     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10114       raise errors.OpPrereqError("Wrong allocator test '%s'" %
10115                                  self.op.direction, errors.ECODE_INVAL)
10116
10117   def Exec(self, feedback_fn):
10118     """Run the allocator test.
10119
10120     """
10121     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10122       ial = IAllocator(self.cfg, self.rpc,
10123                        mode=self.op.mode,
10124                        name=self.op.name,
10125                        mem_size=self.op.mem_size,
10126                        disks=self.op.disks,
10127                        disk_template=self.op.disk_template,
10128                        os=self.op.os,
10129                        tags=self.op.tags,
10130                        nics=self.op.nics,
10131                        vcpus=self.op.vcpus,
10132                        hypervisor=self.op.hypervisor,
10133                        )
10134     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10135       ial = IAllocator(self.cfg, self.rpc,
10136                        mode=self.op.mode,
10137                        name=self.op.name,
10138                        relocate_from=list(self.relocate_from),
10139                        )
10140     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10141       ial = IAllocator(self.cfg, self.rpc,
10142                        mode=self.op.mode,
10143                        evac_nodes=self.op.evac_nodes)
10144     else:
10145       raise errors.ProgrammerError("Uncatched mode %s in"
10146                                    " LUTestAllocator.Exec", self.op.mode)
10147
10148     if self.op.direction == constants.IALLOCATOR_DIR_IN:
10149       result = ial.in_text
10150     else:
10151       ial.Run(self.op.allocator, validate=False)
10152       result = ial.out_text
10153     return result