code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0613,W0201
  25
  26 import os
  27 import os.path
  28 import time
  29 import re
  30 import platform
  31 import logging
  32 import copy
  33
  34 from ganeti import ssh
  35 from ganeti import utils
  36 from ganeti import errors
  37 from ganeti import hypervisor
  38 from ganeti import locking
  39 from ganeti import constants
  40 from ganeti import objects
  41 from ganeti import serializer
  42 from ganeti import ssconf
  43
  44
  45 class LogicalUnit(object):
  46   """Logical Unit base class.
  47
  48   Subclasses must follow these rules:
  49     - implement ExpandNames
  50     - implement CheckPrereq (except when tasklets are used)
  51     - implement Exec (except when tasklets are used)
  52     - implement BuildHooksEnv
  53     - redefine HPATH and HTYPE
  54     - optionally redefine their run requirements:
  55         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  56
  57   Note that all commands require root permissions.
  58
  59   @ivar dry_run_result: the value (if any) that will be returned to the caller
  60       in dry-run mode (signalled by opcode dry_run parameter)
  61
  62   """
  63   HPATH = None
  64   HTYPE = None
  65   _OP_REQP = []
  66   REQ_BGL = True
  67
  68   def __init__(self, processor, op, context, rpc):
  69     """Constructor for LogicalUnit.
  70
  71     This needs to be overridden in derived classes in order to check op
  72     validity.
  73
  74     """
  75     self.proc = processor
  76     self.op = op
  77     self.cfg = context.cfg
  78     self.context = context
  79     self.rpc = rpc
  80     # Dicts used to declare locking needs to mcpu
  81     self.needed_locks = None
  82     self.acquired_locks = {}
  83     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
  84     self.add_locks = {}
  85     self.remove_locks = {}
  86     # Used to force good behavior when calling helper functions
  87     self.recalculate_locks = {}
  88     self.__ssh = None
  89     # logging
  90     self.LogWarning = processor.LogWarning
  91     self.LogInfo = processor.LogInfo
  92     self.LogStep = processor.LogStep
  93     # support for dry-run
  94     self.dry_run_result = None
  95
  96     # Tasklets
  97     self.tasklets = None
  98
  99     for attr_name in self._OP_REQP:
 100       attr_val = getattr(op, attr_name, None)
 101       if attr_val is None:
 102         raise errors.OpPrereqError("Required parameter '%s' missing" %
 103                                    attr_name)
 104
 105     self.CheckArguments()
 106
 107   def __GetSSH(self):
 108     """Returns the SshRunner object
 109
 110     """
 111     if not self.__ssh:
 112       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 113     return self.__ssh
 114
 115   ssh = property(fget=__GetSSH)
 116
 117   def CheckArguments(self):
 118     """Check syntactic validity for the opcode arguments.
 119
 120     This method is for doing a simple syntactic check and ensure
 121     validity of opcode parameters, without any cluster-related
 122     checks. While the same can be accomplished in ExpandNames and/or
 123     CheckPrereq, doing these separate is better because:
 124
 125       - ExpandNames is left as as purely a lock-related function
 126       - CheckPrereq is run after we have acquired locks (and possible
 127         waited for them)
 128
 129     The function is allowed to change the self.op attribute so that
 130     later methods can no longer worry about missing parameters.
 131
 132     """
 133     pass
 134
 135   def ExpandNames(self):
 136     """Expand names for this LU.
 137
 138     This method is called before starting to execute the opcode, and it should
 139     update all the parameters of the opcode to their canonical form (e.g. a
 140     short node name must be fully expanded after this method has successfully
 141     completed). This way locking, hooks, logging, ecc. can work correctly.
 142
 143     LUs which implement this method must also populate the self.needed_locks
 144     member, as a dict with lock levels as keys, and a list of needed lock names
 145     as values. Rules:
 146
 147       - use an empty dict if you don't need any lock
 148       - if you don't need any lock at a particular level omit that level
 149       - don't put anything for the BGL level
 150       - if you want all locks at a level use locking.ALL_SET as a value
 151
 152     If you need to share locks (rather than acquire them exclusively) at one
 153     level you can modify self.share_locks, setting a true value (usually 1) for
 154     that level. By default locks are not shared.
 155
 156     This function can also define a list of tasklets, which then will be
 157     executed in order instead of the usual LU-level CheckPrereq and Exec
 158     functions, if those are not defined by the LU.
 159
 160     Examples::
 161
 162       # Acquire all nodes and one instance
 163       self.needed_locks = {
 164         locking.LEVEL_NODE: locking.ALL_SET,
 165         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 166       }
 167       # Acquire just two nodes
 168       self.needed_locks = {
 169         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 170       }
 171       # Acquire no locks
 172       self.needed_locks = {} # No, you can't leave it to the default value None
 173
 174     """
 175     # The implementation of this method is mandatory only if the new LU is
 176     # concurrent, so that old LUs don't need to be changed all at the same
 177     # time.
 178     if self.REQ_BGL:
 179       self.needed_locks = {} # Exclusive LUs don't need locks.
 180     else:
 181       raise NotImplementedError
 182
 183   def DeclareLocks(self, level):
 184     """Declare LU locking needs for a level
 185
 186     While most LUs can just declare their locking needs at ExpandNames time,
 187     sometimes there's the need to calculate some locks after having acquired
 188     the ones before. This function is called just before acquiring locks at a
 189     particular level, but after acquiring the ones at lower levels, and permits
 190     such calculations. It can be used to modify self.needed_locks, and by
 191     default it does nothing.
 192
 193     This function is only called if you have something already set in
 194     self.needed_locks for the level.
 195
 196     @param level: Locking level which is going to be locked
 197     @type level: member of ganeti.locking.LEVELS
 198
 199     """
 200
 201   def CheckPrereq(self):
 202     """Check prerequisites for this LU.
 203
 204     This method should check that the prerequisites for the execution
 205     of this LU are fulfilled. It can do internode communication, but
 206     it should be idempotent - no cluster or system changes are
 207     allowed.
 208
 209     The method should raise errors.OpPrereqError in case something is
 210     not fulfilled. Its return value is ignored.
 211
 212     This method should also update all the parameters of the opcode to
 213     their canonical form if it hasn't been done by ExpandNames before.
 214
 215     """
 216     if self.tasklets is not None:
 217       for (idx, tl) in enumerate(self.tasklets):
 218         logging.debug("Checking prerequisites for tasklet %s/%s",
 219                       idx + 1, len(self.tasklets))
 220         tl.CheckPrereq()
 221     else:
 222       raise NotImplementedError
 223
 224   def Exec(self, feedback_fn):
 225     """Execute the LU.
 226
 227     This method should implement the actual work. It should raise
 228     errors.OpExecError for failures that are somewhat dealt with in
 229     code, or expected.
 230
 231     """
 232     if self.tasklets is not None:
 233       for (idx, tl) in enumerate(self.tasklets):
 234         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 235         tl.Exec(feedback_fn)
 236     else:
 237       raise NotImplementedError
 238
 239   def BuildHooksEnv(self):
 240     """Build hooks environment for this LU.
 241
 242     This method should return a three-node tuple consisting of: a dict
 243     containing the environment that will be used for running the
 244     specific hook for this LU, a list of node names on which the hook
 245     should run before the execution, and a list of node names on which
 246     the hook should run after the execution.
 247
 248     The keys of the dict must not have 'GANETI_' prefixed as this will
 249     be handled in the hooks runner. Also note additional keys will be
 250     added by the hooks runner. If the LU doesn't define any
 251     environment, an empty dict (and not None) should be returned.
 252
 253     No nodes should be returned as an empty list (and not None).
 254
 255     Note that if the HPATH for a LU class is None, this function will
 256     not be called.
 257
 258     """
 259     raise NotImplementedError
 260
 261   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 262     """Notify the LU about the results of its hooks.
 263
 264     This method is called every time a hooks phase is executed, and notifies
 265     the Logical Unit about the hooks' result. The LU can then use it to alter
 266     its result based on the hooks.  By default the method does nothing and the
 267     previous result is passed back unchanged but any LU can define it if it
 268     wants to use the local cluster hook-scripts somehow.
 269
 270     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 271         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 272     @param hook_results: the results of the multi-node hooks rpc call
 273     @param feedback_fn: function used send feedback back to the caller
 274     @param lu_result: the previous Exec result this LU had, or None
 275         in the PRE phase
 276     @return: the new Exec result, based on the previous result
 277         and hook results
 278
 279     """
 280     return lu_result
 281
 282   def _ExpandAndLockInstance(self):
 283     """Helper function to expand and lock an instance.
 284
 285     Many LUs that work on an instance take its name in self.op.instance_name
 286     and need to expand it and then declare the expanded name for locking. This
 287     function does it, and then updates self.op.instance_name to the expanded
 288     name. It also initializes needed_locks as a dict, if this hasn't been done
 289     before.
 290
 291     """
 292     if self.needed_locks is None:
 293       self.needed_locks = {}
 294     else:
 295       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 296         "_ExpandAndLockInstance called with instance-level locks set"
 297     expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
 298     if expanded_name is None:
 299       raise errors.OpPrereqError("Instance '%s' not known" %
 300                                   self.op.instance_name)
 301     self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
 302     self.op.instance_name = expanded_name
 303
 304   def _LockInstancesNodes(self, primary_only=False):
 305     """Helper function to declare instances' nodes for locking.
 306
 307     This function should be called after locking one or more instances to lock
 308     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 309     with all primary or secondary nodes for instances already locked and
 310     present in self.needed_locks[locking.LEVEL_INSTANCE].
 311
 312     It should be called from DeclareLocks, and for safety only works if
 313     self.recalculate_locks[locking.LEVEL_NODE] is set.
 314
 315     In the future it may grow parameters to just lock some instance's nodes, or
 316     to just lock primaries or secondary nodes, if needed.
 317
 318     If should be called in DeclareLocks in a way similar to::
 319
 320       if level == locking.LEVEL_NODE:
 321         self._LockInstancesNodes()
 322
 323     @type primary_only: boolean
 324     @param primary_only: only lock primary nodes of locked instances
 325
 326     """
 327     assert locking.LEVEL_NODE in self.recalculate_locks, \
 328       "_LockInstancesNodes helper function called with no nodes to recalculate"
 329
 330     # TODO: check if we're really been called with the instance locks held
 331
 332     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 333     # future we might want to have different behaviors depending on the value
 334     # of self.recalculate_locks[locking.LEVEL_NODE]
 335     wanted_nodes = []
 336     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 337       instance = self.context.cfg.GetInstanceInfo(instance_name)
 338       wanted_nodes.append(instance.primary_node)
 339       if not primary_only:
 340         wanted_nodes.extend(instance.secondary_nodes)
 341
 342     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 343       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 344     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 345       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 346
 347     del self.recalculate_locks[locking.LEVEL_NODE]
 348
 349
 350 class NoHooksLU(LogicalUnit):
 351   """Simple LU which runs no hooks.
 352
 353   This LU is intended as a parent for other LogicalUnits which will
 354   run no hooks, in order to reduce duplicate code.
 355
 356   """
 357   HPATH = None
 358   HTYPE = None
 359
 360
 361 class Tasklet:
 362   """Tasklet base class.
 363
 364   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 365   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 366   tasklets know nothing about locks.
 367
 368   Subclasses must follow these rules:
 369     - Implement CheckPrereq
 370     - Implement Exec
 371
 372   """
 373   def __init__(self, lu):
 374     self.lu = lu
 375
 376     # Shortcuts
 377     self.cfg = lu.cfg
 378     self.rpc = lu.rpc
 379
 380   def CheckPrereq(self):
 381     """Check prerequisites for this tasklets.
 382
 383     This method should check whether the prerequisites for the execution of
 384     this tasklet are fulfilled. It can do internode communication, but it
 385     should be idempotent - no cluster or system changes are allowed.
 386
 387     The method should raise errors.OpPrereqError in case something is not
 388     fulfilled. Its return value is ignored.
 389
 390     This method should also update all parameters to their canonical form if it
 391     hasn't been done before.
 392
 393     """
 394     raise NotImplementedError
 395
 396   def Exec(self, feedback_fn):
 397     """Execute the tasklet.
 398
 399     This method should implement the actual work. It should raise
 400     errors.OpExecError for failures that are somewhat dealt with in code, or
 401     expected.
 402
 403     """
 404     raise NotImplementedError
 405
 406
 407 def _GetWantedNodes(lu, nodes):
 408   """Returns list of checked and expanded node names.
 409
 410   @type lu: L{LogicalUnit}
 411   @param lu: the logical unit on whose behalf we execute
 412   @type nodes: list
 413   @param nodes: list of node names or None for all nodes
 414   @rtype: list
 415   @return: the list of nodes, sorted
 416   @raise errors.OpProgrammerError: if the nodes parameter is wrong type
 417
 418   """
 419   if not isinstance(nodes, list):
 420     raise errors.OpPrereqError("Invalid argument type 'nodes'")
 421
 422   if not nodes:
 423     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 424       " non-empty list of nodes whose name is to be expanded.")
 425
 426   wanted = []
 427   for name in nodes:
 428     node = lu.cfg.ExpandNodeName(name)
 429     if node is None:
 430       raise errors.OpPrereqError("No such node name '%s'" % name)
 431     wanted.append(node)
 432
 433   return utils.NiceSort(wanted)
 434
 435
 436 def _GetWantedInstances(lu, instances):
 437   """Returns list of checked and expanded instance names.
 438
 439   @type lu: L{LogicalUnit}
 440   @param lu: the logical unit on whose behalf we execute
 441   @type instances: list
 442   @param instances: list of instance names or None for all instances
 443   @rtype: list
 444   @return: the list of instances, sorted
 445   @raise errors.OpPrereqError: if the instances parameter is wrong type
 446   @raise errors.OpPrereqError: if any of the passed instances is not found
 447
 448   """
 449   if not isinstance(instances, list):
 450     raise errors.OpPrereqError("Invalid argument type 'instances'")
 451
 452   if instances:
 453     wanted = []
 454
 455     for name in instances:
 456       instance = lu.cfg.ExpandInstanceName(name)
 457       if instance is None:
 458         raise errors.OpPrereqError("No such instance name '%s'" % name)
 459       wanted.append(instance)
 460
 461   else:
 462     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 463   return wanted
 464
 465
 466 def _CheckOutputFields(static, dynamic, selected):
 467   """Checks whether all selected fields are valid.
 468
 469   @type static: L{utils.FieldSet}
 470   @param static: static fields set
 471   @type dynamic: L{utils.FieldSet}
 472   @param dynamic: dynamic fields set
 473
 474   """
 475   f = utils.FieldSet()
 476   f.Extend(static)
 477   f.Extend(dynamic)
 478
 479   delta = f.NonMatching(selected)
 480   if delta:
 481     raise errors.OpPrereqError("Unknown output fields selected: %s"
 482                                % ",".join(delta))
 483
 484
 485 def _CheckBooleanOpField(op, name):
 486   """Validates boolean opcode parameters.
 487
 488   This will ensure that an opcode parameter is either a boolean value,
 489   or None (but that it always exists).
 490
 491   """
 492   val = getattr(op, name, None)
 493   if not (val is None or isinstance(val, bool)):
 494     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 495                                (name, str(val)))
 496   setattr(op, name, val)
 497
 498
 499 def _CheckNodeOnline(lu, node):
 500   """Ensure that a given node is online.
 501
 502   @param lu: the LU on behalf of which we make the check
 503   @param node: the node to check
 504   @raise errors.OpPrereqError: if the node is offline
 505
 506   """
 507   if lu.cfg.GetNodeInfo(node).offline:
 508     raise errors.OpPrereqError("Can't use offline node %s" % node)
 509
 510
 511 def _CheckNodeNotDrained(lu, node):
 512   """Ensure that a given node is not drained.
 513
 514   @param lu: the LU on behalf of which we make the check
 515   @param node: the node to check
 516   @raise errors.OpPrereqError: if the node is drained
 517
 518   """
 519   if lu.cfg.GetNodeInfo(node).drained:
 520     raise errors.OpPrereqError("Can't use drained node %s" % node)
 521
 522
 523 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 524                           memory, vcpus, nics, disk_template, disks,
 525                           bep, hvp, hypervisor_name):
 526   """Builds instance related env variables for hooks
 527
 528   This builds the hook environment from individual variables.
 529
 530   @type name: string
 531   @param name: the name of the instance
 532   @type primary_node: string
 533   @param primary_node: the name of the instance's primary node
 534   @type secondary_nodes: list
 535   @param secondary_nodes: list of secondary nodes as strings
 536   @type os_type: string
 537   @param os_type: the name of the instance's OS
 538   @type status: boolean
 539   @param status: the should_run status of the instance
 540   @type memory: string
 541   @param memory: the memory size of the instance
 542   @type vcpus: string
 543   @param vcpus: the count of VCPUs the instance has
 544   @type nics: list
 545   @param nics: list of tuples (ip, mac, mode, link) representing
 546       the NICs the instance has
 547   @type disk_template: string
 548   @param disk_template: the disk template of the instance
 549   @type disks: list
 550   @param disks: the list of (size, mode) pairs
 551   @type bep: dict
 552   @param bep: the backend parameters for the instance
 553   @type hvp: dict
 554   @param hvp: the hypervisor parameters for the instance
 555   @type hypervisor_name: string
 556   @param hypervisor_name: the hypervisor for the instance
 557   @rtype: dict
 558   @return: the hook environment for this instance
 559
 560   """
 561   if status:
 562     str_status = "up"
 563   else:
 564     str_status = "down"
 565   env = {
 566     "OP_TARGET": name,
 567     "INSTANCE_NAME": name,
 568     "INSTANCE_PRIMARY": primary_node,
 569     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 570     "INSTANCE_OS_TYPE": os_type,
 571     "INSTANCE_STATUS": str_status,
 572     "INSTANCE_MEMORY": memory,
 573     "INSTANCE_VCPUS": vcpus,
 574     "INSTANCE_DISK_TEMPLATE": disk_template,
 575     "INSTANCE_HYPERVISOR": hypervisor_name,
 576   }
 577
 578   if nics:
 579     nic_count = len(nics)
 580     for idx, (ip, mac, mode, link) in enumerate(nics):
 581       if ip is None:
 582         ip = ""
 583       env["INSTANCE_NIC%d_IP" % idx] = ip
 584       env["INSTANCE_NIC%d_MAC" % idx] = mac
 585       env["INSTANCE_NIC%d_MODE" % idx] = mode
 586       env["INSTANCE_NIC%d_LINK" % idx] = link
 587       if mode == constants.NIC_MODE_BRIDGED:
 588         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 589   else:
 590     nic_count = 0
 591
 592   env["INSTANCE_NIC_COUNT"] = nic_count
 593
 594   if disks:
 595     disk_count = len(disks)
 596     for idx, (size, mode) in enumerate(disks):
 597       env["INSTANCE_DISK%d_SIZE" % idx] = size
 598       env["INSTANCE_DISK%d_MODE" % idx] = mode
 599   else:
 600     disk_count = 0
 601
 602   env["INSTANCE_DISK_COUNT"] = disk_count
 603
 604   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 605     for key, value in source.items():
 606       env["INSTANCE_%s_%s" % (kind, key)] = value
 607
 608   return env
 609
 610
 611 def _NICListToTuple(lu, nics):
 612   """Build a list of nic information tuples.
 613
 614   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 615   value in LUQueryInstanceData.
 616
 617   @type lu:  L{LogicalUnit}
 618   @param lu: the logical unit on whose behalf we execute
 619   @type nics: list of L{objects.NIC}
 620   @param nics: list of nics to convert to hooks tuples
 621
 622   """
 623   hooks_nics = []
 624   c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
 625   for nic in nics:
 626     ip = nic.ip
 627     mac = nic.mac
 628     filled_params = objects.FillDict(c_nicparams, nic.nicparams)
 629     mode = filled_params[constants.NIC_MODE]
 630     link = filled_params[constants.NIC_LINK]
 631     hooks_nics.append((ip, mac, mode, link))
 632   return hooks_nics
 633
 634
 635 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 636   """Builds instance related env variables for hooks from an object.
 637
 638   @type lu: L{LogicalUnit}
 639   @param lu: the logical unit on whose behalf we execute
 640   @type instance: L{objects.Instance}
 641   @param instance: the instance for which we should build the
 642       environment
 643   @type override: dict
 644   @param override: dictionary with key/values that will override
 645       our values
 646   @rtype: dict
 647   @return: the hook environment dictionary
 648
 649   """
 650   cluster = lu.cfg.GetClusterInfo()
 651   bep = cluster.FillBE(instance)
 652   hvp = cluster.FillHV(instance)
 653   args = {
 654     'name': instance.name,
 655     'primary_node': instance.primary_node,
 656     'secondary_nodes': instance.secondary_nodes,
 657     'os_type': instance.os,
 658     'status': instance.admin_up,
 659     'memory': bep[constants.BE_MEMORY],
 660     'vcpus': bep[constants.BE_VCPUS],
 661     'nics': _NICListToTuple(lu, instance.nics),
 662     'disk_template': instance.disk_template,
 663     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 664     'bep': bep,
 665     'hvp': hvp,
 666     'hypervisor_name': instance.hypervisor,
 667   }
 668   if override:
 669     args.update(override)
 670   return _BuildInstanceHookEnv(**args)
 671
 672
 673 def _AdjustCandidatePool(lu):
 674   """Adjust the candidate pool after node operations.
 675
 676   """
 677   mod_list = lu.cfg.MaintainCandidatePool()
 678   if mod_list:
 679     lu.LogInfo("Promoted nodes to master candidate role: %s",
 680                ", ".join(node.name for node in mod_list))
 681     for name in mod_list:
 682       lu.context.ReaddNode(name)
 683   mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
 684   if mc_now > mc_max:
 685     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 686                (mc_now, mc_max))
 687
 688
 689 def _CheckNicsBridgesExist(lu, target_nics, target_node,
 690                                profile=constants.PP_DEFAULT):
 691   """Check that the brigdes needed by a list of nics exist.
 692
 693   """
 694   c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
 695   paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
 696                 for nic in target_nics]
 697   brlist = [params[constants.NIC_LINK] for params in paramslist
 698             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 699   if brlist:
 700     result = lu.rpc.call_bridges_exist(target_node, brlist)
 701     result.Raise("Error checking bridges on destination node '%s'" %
 702                  target_node, prereq=True)
 703
 704
 705 def _CheckInstanceBridgesExist(lu, instance, node=None):
 706   """Check that the brigdes needed by an instance exist.
 707
 708   """
 709   if node is None:
 710     node = instance.primary_node
 711   _CheckNicsBridgesExist(lu, instance.nics, node)
 712
 713
 714 def _GetNodeInstancesInner(cfg, fn):
 715   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 716
 717
 718 def _GetNodeInstances(cfg, node_name):
 719   """Returns a list of all primary and secondary instances on a node.
 720
 721   """
 722
 723   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
 724
 725
 726 def _GetNodePrimaryInstances(cfg, node_name):
 727   """Returns primary instances on a node.
 728
 729   """
 730   return _GetNodeInstancesInner(cfg,
 731                                 lambda inst: node_name == inst.primary_node)
 732
 733
 734 def _GetNodeSecondaryInstances(cfg, node_name):
 735   """Returns secondary instances on a node.
 736
 737   """
 738   return _GetNodeInstancesInner(cfg,
 739                                 lambda inst: node_name in inst.secondary_nodes)
 740
 741
 742 def _GetStorageTypeArgs(cfg, storage_type):
 743   """Returns the arguments for a storage type.
 744
 745   """
 746   # Special case for file storage
 747   if storage_type == constants.ST_FILE:
 748     # storage.FileStorage wants a list of storage directories
 749     return [[cfg.GetFileStorageDir()]]
 750
 751   return []
 752
 753
 754 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
 755   faulty = []
 756
 757   for dev in instance.disks:
 758     cfg.SetDiskID(dev, node_name)
 759
 760   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
 761   result.Raise("Failed to get disk status from node %s" % node_name,
 762                prereq=prereq)
 763
 764   for idx, bdev_status in enumerate(result.payload):
 765     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
 766       faulty.append(idx)
 767
 768   return faulty
 769
 770
 771 class LUPostInitCluster(LogicalUnit):
 772   """Logical unit for running hooks after cluster initialization.
 773
 774   """
 775   HPATH = "cluster-init"
 776   HTYPE = constants.HTYPE_CLUSTER
 777   _OP_REQP = []
 778
 779   def BuildHooksEnv(self):
 780     """Build hooks env.
 781
 782     """
 783     env = {"OP_TARGET": self.cfg.GetClusterName()}
 784     mn = self.cfg.GetMasterNode()
 785     return env, [], [mn]
 786
 787   def CheckPrereq(self):
 788     """No prerequisites to check.
 789
 790     """
 791     return True
 792
 793   def Exec(self, feedback_fn):
 794     """Nothing to do.
 795
 796     """
 797     return True
 798
 799
 800 class LUDestroyCluster(LogicalUnit):
 801   """Logical unit for destroying the cluster.
 802
 803   """
 804   HPATH = "cluster-destroy"
 805   HTYPE = constants.HTYPE_CLUSTER
 806   _OP_REQP = []
 807
 808   def BuildHooksEnv(self):
 809     """Build hooks env.
 810
 811     """
 812     env = {"OP_TARGET": self.cfg.GetClusterName()}
 813     return env, [], []
 814
 815   def CheckPrereq(self):
 816     """Check prerequisites.
 817
 818     This checks whether the cluster is empty.
 819
 820     Any errors are signaled by raising errors.OpPrereqError.
 821
 822     """
 823     master = self.cfg.GetMasterNode()
 824
 825     nodelist = self.cfg.GetNodeList()
 826     if len(nodelist) != 1 or nodelist[0] != master:
 827       raise errors.OpPrereqError("There are still %d node(s) in"
 828                                  " this cluster." % (len(nodelist) - 1))
 829     instancelist = self.cfg.GetInstanceList()
 830     if instancelist:
 831       raise errors.OpPrereqError("There are still %d instance(s) in"
 832                                  " this cluster." % len(instancelist))
 833
 834   def Exec(self, feedback_fn):
 835     """Destroys the cluster.
 836
 837     """
 838     master = self.cfg.GetMasterNode()
 839
 840     # Run post hooks on master node before it's removed
 841     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
 842     try:
 843       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
 844     except:
 845       self.LogWarning("Errors occurred running hooks on %s" % master)
 846
 847     result = self.rpc.call_node_stop_master(master, False)
 848     result.Raise("Could not disable the master role")
 849     priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
 850     utils.CreateBackup(priv_key)
 851     utils.CreateBackup(pub_key)
 852     return master
 853
 854
 855 class LUVerifyCluster(LogicalUnit):
 856   """Verifies the cluster status.
 857
 858   """
 859   HPATH = "cluster-verify"
 860   HTYPE = constants.HTYPE_CLUSTER
 861   _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
 862   REQ_BGL = False
 863
 864   TCLUSTER = "cluster"
 865   TNODE = "node"
 866   TINSTANCE = "instance"
 867
 868   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
 869   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
 870   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
 871   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
 872   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
 873   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
 874   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
 875   ENODEDRBD = (TNODE, "ENODEDRBD")
 876   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
 877   ENODEHOOKS = (TNODE, "ENODEHOOKS")
 878   ENODEHV = (TNODE, "ENODEHV")
 879   ENODELVM = (TNODE, "ENODELVM")
 880   ENODEN1 = (TNODE, "ENODEN1")
 881   ENODENET = (TNODE, "ENODENET")
 882   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
 883   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
 884   ENODERPC = (TNODE, "ENODERPC")
 885   ENODESSH = (TNODE, "ENODESSH")
 886   ENODEVERSION = (TNODE, "ENODEVERSION")
 887
 888   ETYPE_FIELD = "code"
 889   ETYPE_ERROR = "ERROR"
 890   ETYPE_WARNING = "WARNING"
 891
 892   def ExpandNames(self):
 893     self.needed_locks = {
 894       locking.LEVEL_NODE: locking.ALL_SET,
 895       locking.LEVEL_INSTANCE: locking.ALL_SET,
 896     }
 897     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
 898
 899   def _Error(self, ecode, item, msg, *args, **kwargs):
 900     """Format an error message.
 901
 902     Based on the opcode's error_codes parameter, either format a
 903     parseable error code, or a simpler error string.
 904
 905     This must be called only from Exec and functions called from Exec.
 906
 907     """
 908     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
 909     itype, etxt = ecode
 910     # first complete the msg
 911     if args:
 912       msg = msg % args
 913     # then format the whole message
 914     if self.op.error_codes:
 915       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
 916     else:
 917       if item:
 918         item = " " + item
 919       else:
 920         item = ""
 921       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
 922     # and finally report it via the feedback_fn
 923     self._feedback_fn("  - %s" % msg)
 924
 925   def _ErrorIf(self, cond, *args, **kwargs):
 926     """Log an error message if the passed condition is True.
 927
 928     """
 929     cond = bool(cond) or self.op.debug_simulate_errors
 930     if cond:
 931       self._Error(*args, **kwargs)
 932     # do not mark the operation as failed for WARN cases only
 933     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
 934       self.bad = self.bad or cond
 935
 936   def _VerifyNode(self, nodeinfo, file_list, local_cksum,
 937                   node_result, master_files, drbd_map, vg_name):
 938     """Run multiple tests against a node.
 939
 940     Test list:
 941
 942       - compares ganeti version
 943       - checks vg existence and size > 20G
 944       - checks config file checksum
 945       - checks ssh to other nodes
 946
 947     @type nodeinfo: L{objects.Node}
 948     @param nodeinfo: the node to check
 949     @param file_list: required list of files
 950     @param local_cksum: dictionary of local files and their checksums
 951     @param node_result: the results from the node
 952     @param master_files: list of files that only masters should have
 953     @param drbd_map: the useddrbd minors for this node, in
 954         form of minor: (instance, must_exist) which correspond to instances
 955         and their running status
 956     @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
 957
 958     """
 959     node = nodeinfo.name
 960     _ErrorIf = self._ErrorIf
 961
 962     # main result, node_result should be a non-empty dict
 963     test = not node_result or not isinstance(node_result, dict)
 964     _ErrorIf(test, self.ENODERPC, node,
 965                   "unable to verify node: no data returned")
 966     if test:
 967       return
 968
 969     # compares ganeti version
 970     local_version = constants.PROTOCOL_VERSION
 971     remote_version = node_result.get('version', None)
 972     test = not (remote_version and
 973                 isinstance(remote_version, (list, tuple)) and
 974                 len(remote_version) == 2)
 975     _ErrorIf(test, self.ENODERPC, node,
 976              "connection to node returned invalid data")
 977     if test:
 978       return
 979
 980     test = local_version != remote_version[0]
 981     _ErrorIf(test, self.ENODEVERSION, node,
 982              "incompatible protocol versions: master %s,"
 983              " node %s", local_version, remote_version[0])
 984     if test:
 985       return
 986
 987     # node seems compatible, we can actually try to look into its results
 988
 989     # full package version
 990     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
 991                   self.ENODEVERSION, node,
 992                   "software version mismatch: master %s, node %s",
 993                   constants.RELEASE_VERSION, remote_version[1],
 994                   code=self.ETYPE_WARNING)
 995
 996     # checks vg existence and size > 20G
 997     if vg_name is not None:
 998       vglist = node_result.get(constants.NV_VGLIST, None)
 999       test = not vglist
1000       _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1001       if not test:
1002         vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1003                                               constants.MIN_VG_SIZE)
1004         _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1005
1006     # checks config file checksum
1007
1008     remote_cksum = node_result.get(constants.NV_FILELIST, None)
1009     test = not isinstance(remote_cksum, dict)
1010     _ErrorIf(test, self.ENODEFILECHECK, node,
1011              "node hasn't returned file checksum data")
1012     if not test:
1013       for file_name in file_list:
1014         node_is_mc = nodeinfo.master_candidate
1015         must_have = (file_name not in master_files) or node_is_mc
1016         # missing
1017         test1 = file_name not in remote_cksum
1018         # invalid checksum
1019         test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1020         # existing and good
1021         test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1022         _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1023                  "file '%s' missing", file_name)
1024         _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1025                  "file '%s' has wrong checksum", file_name)
1026         # not candidate and this is not a must-have file
1027         _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1028                  "file '%s' should not exist on non master"
1029                  " candidates (and the file is outdated)", file_name)
1030         # all good, except non-master/non-must have combination
1031         _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1032                  "file '%s' should not exist"
1033                  " on non master candidates", file_name)
1034
1035     # checks ssh to any
1036
1037     test = constants.NV_NODELIST not in node_result
1038     _ErrorIf(test, self.ENODESSH, node,
1039              "node hasn't returned node ssh connectivity data")
1040     if not test:
1041       if node_result[constants.NV_NODELIST]:
1042         for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1043           _ErrorIf(True, self.ENODESSH, node,
1044                    "ssh communication with node '%s': %s", a_node, a_msg)
1045
1046     test = constants.NV_NODENETTEST not in node_result
1047     _ErrorIf(test, self.ENODENET, node,
1048              "node hasn't returned node tcp connectivity data")
1049     if not test:
1050       if node_result[constants.NV_NODENETTEST]:
1051         nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1052         for anode in nlist:
1053           _ErrorIf(True, self.ENODENET, node,
1054                    "tcp communication with node '%s': %s",
1055                    anode, node_result[constants.NV_NODENETTEST][anode])
1056
1057     hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1058     if isinstance(hyp_result, dict):
1059       for hv_name, hv_result in hyp_result.iteritems():
1060         test = hv_result is not None
1061         _ErrorIf(test, self.ENODEHV, node,
1062                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1063
1064     # check used drbd list
1065     if vg_name is not None:
1066       used_minors = node_result.get(constants.NV_DRBDLIST, [])
1067       test = not isinstance(used_minors, (tuple, list))
1068       _ErrorIf(test, self.ENODEDRBD, node,
1069                "cannot parse drbd status file: %s", str(used_minors))
1070       if not test:
1071         for minor, (iname, must_exist) in drbd_map.items():
1072           test = minor not in used_minors and must_exist
1073           _ErrorIf(test, self.ENODEDRBD, node,
1074                    "drbd minor %d of instance %s is not active",
1075                    minor, iname)
1076         for minor in used_minors:
1077           test = minor not in drbd_map
1078           _ErrorIf(test, self.ENODEDRBD, node,
1079                    "unallocated drbd minor %d is in use", minor)
1080
1081   def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1082                       node_instance, n_offline):
1083     """Verify an instance.
1084
1085     This function checks to see if the required block devices are
1086     available on the instance's node.
1087
1088     """
1089     _ErrorIf = self._ErrorIf
1090     node_current = instanceconfig.primary_node
1091
1092     node_vol_should = {}
1093     instanceconfig.MapLVsByNode(node_vol_should)
1094
1095     for node in node_vol_should:
1096       if node in n_offline:
1097         # ignore missing volumes on offline nodes
1098         continue
1099       for volume in node_vol_should[node]:
1100         test = node not in node_vol_is or volume not in node_vol_is[node]
1101         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1102                  "volume %s missing on node %s", volume, node)
1103
1104     if instanceconfig.admin_up:
1105       test = ((node_current not in node_instance or
1106                not instance in node_instance[node_current]) and
1107               node_current not in n_offline)
1108       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1109                "instance not running on its primary node %s",
1110                node_current)
1111
1112     for node in node_instance:
1113       if (not node == node_current):
1114         test = instance in node_instance[node]
1115         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1116                  "instance should not run on node %s", node)
1117
1118   def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1119     """Verify if there are any unknown volumes in the cluster.
1120
1121     The .os, .swap and backup volumes are ignored. All other volumes are
1122     reported as unknown.
1123
1124     """
1125     for node in node_vol_is:
1126       for volume in node_vol_is[node]:
1127         test = (node not in node_vol_should or
1128                 volume not in node_vol_should[node])
1129         self._ErrorIf(test, self.ENODEORPHANLV, node,
1130                       "volume %s is unknown", volume)
1131
1132   def _VerifyOrphanInstances(self, instancelist, node_instance):
1133     """Verify the list of running instances.
1134
1135     This checks what instances are running but unknown to the cluster.
1136
1137     """
1138     for node in node_instance:
1139       for o_inst in node_instance[node]:
1140         test = o_inst not in instancelist
1141         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1142                       "instance %s on node %s should not exist", o_inst, node)
1143
1144   def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1145     """Verify N+1 Memory Resilience.
1146
1147     Check that if one single node dies we can still start all the instances it
1148     was primary for.
1149
1150     """
1151     for node, nodeinfo in node_info.iteritems():
1152       # This code checks that every node which is now listed as secondary has
1153       # enough memory to host all instances it is supposed to should a single
1154       # other node in the cluster fail.
1155       # FIXME: not ready for failover to an arbitrary node
1156       # FIXME: does not support file-backed instances
1157       # WARNING: we currently take into account down instances as well as up
1158       # ones, considering that even if they're down someone might want to start
1159       # them even in the event of a node failure.
1160       for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1161         needed_mem = 0
1162         for instance in instances:
1163           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1164           if bep[constants.BE_AUTO_BALANCE]:
1165             needed_mem += bep[constants.BE_MEMORY]
1166         test = nodeinfo['mfree'] < needed_mem
1167         self._ErrorIf(test, self.ENODEN1, node,
1168                       "not enough memory on to accommodate"
1169                       " failovers should peer node %s fail", prinode)
1170
1171   def CheckPrereq(self):
1172     """Check prerequisites.
1173
1174     Transform the list of checks we're going to skip into a set and check that
1175     all its members are valid.
1176
1177     """
1178     self.skip_set = frozenset(self.op.skip_checks)
1179     if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1180       raise errors.OpPrereqError("Invalid checks to be skipped specified")
1181
1182   def BuildHooksEnv(self):
1183     """Build hooks env.
1184
1185     Cluster-Verify hooks just ran in the post phase and their failure makes
1186     the output be logged in the verify output and the verification to fail.
1187
1188     """
1189     all_nodes = self.cfg.GetNodeList()
1190     env = {
1191       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1192       }
1193     for node in self.cfg.GetAllNodesInfo().values():
1194       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1195
1196     return env, [], all_nodes
1197
1198   def Exec(self, feedback_fn):
1199     """Verify integrity of cluster, performing various test on nodes.
1200
1201     """
1202     self.bad = False
1203     _ErrorIf = self._ErrorIf
1204     verbose = self.op.verbose
1205     self._feedback_fn = feedback_fn
1206     feedback_fn("* Verifying global settings")
1207     for msg in self.cfg.VerifyConfig():
1208       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1209
1210     vg_name = self.cfg.GetVGName()
1211     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1212     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1213     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1214     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1215     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1216                         for iname in instancelist)
1217     i_non_redundant = [] # Non redundant instances
1218     i_non_a_balanced = [] # Non auto-balanced instances
1219     n_offline = [] # List of offline nodes
1220     n_drained = [] # List of nodes being drained
1221     node_volume = {}
1222     node_instance = {}
1223     node_info = {}
1224     instance_cfg = {}
1225
1226     # FIXME: verify OS list
1227     # do local checksums
1228     master_files = [constants.CLUSTER_CONF_FILE]
1229
1230     file_names = ssconf.SimpleStore().GetFileList()
1231     file_names.append(constants.SSL_CERT_FILE)
1232     file_names.append(constants.RAPI_CERT_FILE)
1233     file_names.extend(master_files)
1234
1235     local_checksums = utils.FingerprintFiles(file_names)
1236
1237     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1238     node_verify_param = {
1239       constants.NV_FILELIST: file_names,
1240       constants.NV_NODELIST: [node.name for node in nodeinfo
1241                               if not node.offline],
1242       constants.NV_HYPERVISOR: hypervisors,
1243       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1244                                   node.secondary_ip) for node in nodeinfo
1245                                  if not node.offline],
1246       constants.NV_INSTANCELIST: hypervisors,
1247       constants.NV_VERSION: None,
1248       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1249       }
1250     if vg_name is not None:
1251       node_verify_param[constants.NV_VGLIST] = None
1252       node_verify_param[constants.NV_LVLIST] = vg_name
1253       node_verify_param[constants.NV_DRBDLIST] = None
1254     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1255                                            self.cfg.GetClusterName())
1256
1257     cluster = self.cfg.GetClusterInfo()
1258     master_node = self.cfg.GetMasterNode()
1259     all_drbd_map = self.cfg.ComputeDRBDMap()
1260
1261     feedback_fn("* Verifying node status")
1262     for node_i in nodeinfo:
1263       node = node_i.name
1264
1265       if node_i.offline:
1266         if verbose:
1267           feedback_fn("* Skipping offline node %s" % (node,))
1268         n_offline.append(node)
1269         continue
1270
1271       if node == master_node:
1272         ntype = "master"
1273       elif node_i.master_candidate:
1274         ntype = "master candidate"
1275       elif node_i.drained:
1276         ntype = "drained"
1277         n_drained.append(node)
1278       else:
1279         ntype = "regular"
1280       if verbose:
1281         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1282
1283       msg = all_nvinfo[node].fail_msg
1284       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1285       if msg:
1286         continue
1287
1288       nresult = all_nvinfo[node].payload
1289       node_drbd = {}
1290       for minor, instance in all_drbd_map[node].items():
1291         test = instance not in instanceinfo
1292         _ErrorIf(test, self.ECLUSTERCFG, None,
1293                  "ghost instance '%s' in temporary DRBD map", instance)
1294           # ghost instance should not be running, but otherwise we
1295           # don't give double warnings (both ghost instance and
1296           # unallocated minor in use)
1297         if test:
1298           node_drbd[minor] = (instance, False)
1299         else:
1300           instance = instanceinfo[instance]
1301           node_drbd[minor] = (instance.name, instance.admin_up)
1302       self._VerifyNode(node_i, file_names, local_checksums,
1303                        nresult, master_files, node_drbd, vg_name)
1304
1305       lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1306       if vg_name is None:
1307         node_volume[node] = {}
1308       elif isinstance(lvdata, basestring):
1309         _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1310                  utils.SafeEncode(lvdata))
1311         node_volume[node] = {}
1312       elif not isinstance(lvdata, dict):
1313         _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1314         continue
1315       else:
1316         node_volume[node] = lvdata
1317
1318       # node_instance
1319       idata = nresult.get(constants.NV_INSTANCELIST, None)
1320       test = not isinstance(idata, list)
1321       _ErrorIf(test, self.ENODEHV, node,
1322                "rpc call to node failed (instancelist)")
1323       if test:
1324         continue
1325
1326       node_instance[node] = idata
1327
1328       # node_info
1329       nodeinfo = nresult.get(constants.NV_HVINFO, None)
1330       test = not isinstance(nodeinfo, dict)
1331       _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1332       if test:
1333         continue
1334
1335       try:
1336         node_info[node] = {
1337           "mfree": int(nodeinfo['memory_free']),
1338           "pinst": [],
1339           "sinst": [],
1340           # dictionary holding all instances this node is secondary for,
1341           # grouped by their primary node. Each key is a cluster node, and each
1342           # value is a list of instances which have the key as primary and the
1343           # current node as secondary.  this is handy to calculate N+1 memory
1344           # availability if you can only failover from a primary to its
1345           # secondary.
1346           "sinst-by-pnode": {},
1347         }
1348         # FIXME: devise a free space model for file based instances as well
1349         if vg_name is not None:
1350           test = (constants.NV_VGLIST not in nresult or
1351                   vg_name not in nresult[constants.NV_VGLIST])
1352           _ErrorIf(test, self.ENODELVM, node,
1353                    "node didn't return data for the volume group '%s'"
1354                    " - it is either missing or broken", vg_name)
1355           if test:
1356             continue
1357           node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1358       except (ValueError, KeyError):
1359         _ErrorIf(True, self.ENODERPC, node,
1360                  "node returned invalid nodeinfo, check lvm/hypervisor")
1361         continue
1362
1363     node_vol_should = {}
1364
1365     feedback_fn("* Verifying instance status")
1366     for instance in instancelist:
1367       if verbose:
1368         feedback_fn("* Verifying instance %s" % instance)
1369       inst_config = instanceinfo[instance]
1370       self._VerifyInstance(instance, inst_config, node_volume,
1371                            node_instance, n_offline)
1372       inst_nodes_offline = []
1373
1374       inst_config.MapLVsByNode(node_vol_should)
1375
1376       instance_cfg[instance] = inst_config
1377
1378       pnode = inst_config.primary_node
1379       _ErrorIf(pnode not in node_info and pnode not in n_offline,
1380                self.ENODERPC, pnode, "instance %s, connection to"
1381                " primary node failed", instance)
1382       if pnode in node_info:
1383         node_info[pnode]['pinst'].append(instance)
1384
1385       if pnode in n_offline:
1386         inst_nodes_offline.append(pnode)
1387
1388       # If the instance is non-redundant we cannot survive losing its primary
1389       # node, so we are not N+1 compliant. On the other hand we have no disk
1390       # templates with more than one secondary so that situation is not well
1391       # supported either.
1392       # FIXME: does not support file-backed instances
1393       if len(inst_config.secondary_nodes) == 0:
1394         i_non_redundant.append(instance)
1395       _ErrorIf(len(inst_config.secondary_nodes) > 1,
1396                self.EINSTANCELAYOUT, instance,
1397                "instance has multiple secondary nodes", code="WARNING")
1398
1399       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1400         i_non_a_balanced.append(instance)
1401
1402       for snode in inst_config.secondary_nodes:
1403         _ErrorIf(snode not in node_info and snode not in n_offline,
1404                  self.ENODERPC, snode,
1405                  "instance %s, connection to secondary node"
1406                  "failed", instance)
1407
1408         if snode in node_info:
1409           node_info[snode]['sinst'].append(instance)
1410           if pnode not in node_info[snode]['sinst-by-pnode']:
1411             node_info[snode]['sinst-by-pnode'][pnode] = []
1412           node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1413
1414         if snode in n_offline:
1415           inst_nodes_offline.append(snode)
1416
1417       # warn that the instance lives on offline nodes
1418       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1419                "instance lives on offline node(s) %s",
1420                ", ".join(inst_nodes_offline))
1421
1422     feedback_fn("* Verifying orphan volumes")
1423     self._VerifyOrphanVolumes(node_vol_should, node_volume)
1424
1425     feedback_fn("* Verifying remaining instances")
1426     self._VerifyOrphanInstances(instancelist, node_instance)
1427
1428     if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1429       feedback_fn("* Verifying N+1 Memory redundancy")
1430       self._VerifyNPlusOneMemory(node_info, instance_cfg)
1431
1432     feedback_fn("* Other Notes")
1433     if i_non_redundant:
1434       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1435                   % len(i_non_redundant))
1436
1437     if i_non_a_balanced:
1438       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1439                   % len(i_non_a_balanced))
1440
1441     if n_offline:
1442       feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1443
1444     if n_drained:
1445       feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1446
1447     return not self.bad
1448
1449   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1450     """Analyze the post-hooks' result
1451
1452     This method analyses the hook result, handles it, and sends some
1453     nicely-formatted feedback back to the user.
1454
1455     @param phase: one of L{constants.HOOKS_PHASE_POST} or
1456         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1457     @param hooks_results: the results of the multi-node hooks rpc call
1458     @param feedback_fn: function used send feedback back to the caller
1459     @param lu_result: previous Exec result
1460     @return: the new Exec result, based on the previous result
1461         and hook results
1462
1463     """
1464     # We only really run POST phase hooks, and are only interested in
1465     # their results
1466     if phase == constants.HOOKS_PHASE_POST:
1467       # Used to change hooks' output to proper indentation
1468       indent_re = re.compile('^', re.M)
1469       feedback_fn("* Hooks Results")
1470       assert hooks_results, "invalid result from hooks"
1471
1472       for node_name in hooks_results:
1473         show_node_header = True
1474         res = hooks_results[node_name]
1475         msg = res.fail_msg
1476         test = msg and not res.offline
1477         self._ErrorIf(test, self.ENODEHOOKS, node_name,
1478                       "Communication failure in hooks execution: %s", msg)
1479         if test:
1480           # override manually lu_result here as _ErrorIf only
1481           # overrides self.bad
1482           lu_result = 1
1483           continue
1484         for script, hkr, output in res.payload:
1485           test = hkr == constants.HKR_FAIL
1486           self._ErrorIf(test, self.ENODEHOOKS, node_name,
1487                         "Script %s failed, output:", script)
1488           if test:
1489             output = indent_re.sub('      ', output)
1490             feedback_fn("%s" % output)
1491             lu_result = 1
1492
1493       return lu_result
1494
1495
1496 class LUVerifyDisks(NoHooksLU):
1497   """Verifies the cluster disks status.
1498
1499   """
1500   _OP_REQP = []
1501   REQ_BGL = False
1502
1503   def ExpandNames(self):
1504     self.needed_locks = {
1505       locking.LEVEL_NODE: locking.ALL_SET,
1506       locking.LEVEL_INSTANCE: locking.ALL_SET,
1507     }
1508     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1509
1510   def CheckPrereq(self):
1511     """Check prerequisites.
1512
1513     This has no prerequisites.
1514
1515     """
1516     pass
1517
1518   def Exec(self, feedback_fn):
1519     """Verify integrity of cluster disks.
1520
1521     @rtype: tuple of three items
1522     @return: a tuple of (dict of node-to-node_error, list of instances
1523         which need activate-disks, dict of instance: (node, volume) for
1524         missing volumes
1525
1526     """
1527     result = res_nodes, res_instances, res_missing = {}, [], {}
1528
1529     vg_name = self.cfg.GetVGName()
1530     nodes = utils.NiceSort(self.cfg.GetNodeList())
1531     instances = [self.cfg.GetInstanceInfo(name)
1532                  for name in self.cfg.GetInstanceList()]
1533
1534     nv_dict = {}
1535     for inst in instances:
1536       inst_lvs = {}
1537       if (not inst.admin_up or
1538           inst.disk_template not in constants.DTS_NET_MIRROR):
1539         continue
1540       inst.MapLVsByNode(inst_lvs)
1541       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1542       for node, vol_list in inst_lvs.iteritems():
1543         for vol in vol_list:
1544           nv_dict[(node, vol)] = inst
1545
1546     if not nv_dict:
1547       return result
1548
1549     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1550
1551     for node in nodes:
1552       # node_volume
1553       node_res = node_lvs[node]
1554       if node_res.offline:
1555         continue
1556       msg = node_res.fail_msg
1557       if msg:
1558         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1559         res_nodes[node] = msg
1560         continue
1561
1562       lvs = node_res.payload
1563       for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1564         inst = nv_dict.pop((node, lv_name), None)
1565         if (not lv_online and inst is not None
1566             and inst.name not in res_instances):
1567           res_instances.append(inst.name)
1568
1569     # any leftover items in nv_dict are missing LVs, let's arrange the
1570     # data better
1571     for key, inst in nv_dict.iteritems():
1572       if inst.name not in res_missing:
1573         res_missing[inst.name] = []
1574       res_missing[inst.name].append(key)
1575
1576     return result
1577
1578
1579 class LURepairDiskSizes(NoHooksLU):
1580   """Verifies the cluster disks sizes.
1581
1582   """
1583   _OP_REQP = ["instances"]
1584   REQ_BGL = False
1585
1586   def ExpandNames(self):
1587     if not isinstance(self.op.instances, list):
1588       raise errors.OpPrereqError("Invalid argument type 'instances'")
1589
1590     if self.op.instances:
1591       self.wanted_names = []
1592       for name in self.op.instances:
1593         full_name = self.cfg.ExpandInstanceName(name)
1594         if full_name is None:
1595           raise errors.OpPrereqError("Instance '%s' not known" % name)
1596         self.wanted_names.append(full_name)
1597       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
1598       self.needed_locks = {
1599         locking.LEVEL_NODE: [],
1600         locking.LEVEL_INSTANCE: self.wanted_names,
1601         }
1602       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1603     else:
1604       self.wanted_names = None
1605       self.needed_locks = {
1606         locking.LEVEL_NODE: locking.ALL_SET,
1607         locking.LEVEL_INSTANCE: locking.ALL_SET,
1608         }
1609     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1610
1611   def DeclareLocks(self, level):
1612     if level == locking.LEVEL_NODE and self.wanted_names is not None:
1613       self._LockInstancesNodes(primary_only=True)
1614
1615   def CheckPrereq(self):
1616     """Check prerequisites.
1617
1618     This only checks the optional instance list against the existing names.
1619
1620     """
1621     if self.wanted_names is None:
1622       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1623
1624     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1625                              in self.wanted_names]
1626
1627   def Exec(self, feedback_fn):
1628     """Verify the size of cluster disks.
1629
1630     """
1631     # TODO: check child disks too
1632     # TODO: check differences in size between primary/secondary nodes
1633     per_node_disks = {}
1634     for instance in self.wanted_instances:
1635       pnode = instance.primary_node
1636       if pnode not in per_node_disks:
1637         per_node_disks[pnode] = []
1638       for idx, disk in enumerate(instance.disks):
1639         per_node_disks[pnode].append((instance, idx, disk))
1640
1641     changed = []
1642     for node, dskl in per_node_disks.items():
1643       result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl])
1644       if result.fail_msg:
1645         self.LogWarning("Failure in blockdev_getsizes call to node"
1646                         " %s, ignoring", node)
1647         continue
1648       if len(result.data) != len(dskl):
1649         self.LogWarning("Invalid result from node %s, ignoring node results",
1650                         node)
1651         continue
1652       for ((instance, idx, disk), size) in zip(dskl, result.data):
1653         if size is None:
1654           self.LogWarning("Disk %d of instance %s did not return size"
1655                           " information, ignoring", idx, instance.name)
1656           continue
1657         if not isinstance(size, (int, long)):
1658           self.LogWarning("Disk %d of instance %s did not return valid"
1659                           " size information, ignoring", idx, instance.name)
1660           continue
1661         size = size >> 20
1662         if size != disk.size:
1663           self.LogInfo("Disk %d of instance %s has mismatched size,"
1664                        " correcting: recorded %d, actual %d", idx,
1665                        instance.name, disk.size, size)
1666           disk.size = size
1667           self.cfg.Update(instance)
1668           changed.append((instance.name, idx, size))
1669     return changed
1670
1671
1672 class LURenameCluster(LogicalUnit):
1673   """Rename the cluster.
1674
1675   """
1676   HPATH = "cluster-rename"
1677   HTYPE = constants.HTYPE_CLUSTER
1678   _OP_REQP = ["name"]
1679
1680   def BuildHooksEnv(self):
1681     """Build hooks env.
1682
1683     """
1684     env = {
1685       "OP_TARGET": self.cfg.GetClusterName(),
1686       "NEW_NAME": self.op.name,
1687       }
1688     mn = self.cfg.GetMasterNode()
1689     return env, [mn], [mn]
1690
1691   def CheckPrereq(self):
1692     """Verify that the passed name is a valid one.
1693
1694     """
1695     hostname = utils.HostInfo(self.op.name)
1696
1697     new_name = hostname.name
1698     self.ip = new_ip = hostname.ip
1699     old_name = self.cfg.GetClusterName()
1700     old_ip = self.cfg.GetMasterIP()
1701     if new_name == old_name and new_ip == old_ip:
1702       raise errors.OpPrereqError("Neither the name nor the IP address of the"
1703                                  " cluster has changed")
1704     if new_ip != old_ip:
1705       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1706         raise errors.OpPrereqError("The given cluster IP address (%s) is"
1707                                    " reachable on the network. Aborting." %
1708                                    new_ip)
1709
1710     self.op.name = new_name
1711
1712   def Exec(self, feedback_fn):
1713     """Rename the cluster.
1714
1715     """
1716     clustername = self.op.name
1717     ip = self.ip
1718
1719     # shutdown the master IP
1720     master = self.cfg.GetMasterNode()
1721     result = self.rpc.call_node_stop_master(master, False)
1722     result.Raise("Could not disable the master role")
1723
1724     try:
1725       cluster = self.cfg.GetClusterInfo()
1726       cluster.cluster_name = clustername
1727       cluster.master_ip = ip
1728       self.cfg.Update(cluster)
1729
1730       # update the known hosts file
1731       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1732       node_list = self.cfg.GetNodeList()
1733       try:
1734         node_list.remove(master)
1735       except ValueError:
1736         pass
1737       result = self.rpc.call_upload_file(node_list,
1738                                          constants.SSH_KNOWN_HOSTS_FILE)
1739       for to_node, to_result in result.iteritems():
1740         msg = to_result.fail_msg
1741         if msg:
1742           msg = ("Copy of file %s to node %s failed: %s" %
1743                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1744           self.proc.LogWarning(msg)
1745
1746     finally:
1747       result = self.rpc.call_node_start_master(master, False, False)
1748       msg = result.fail_msg
1749       if msg:
1750         self.LogWarning("Could not re-enable the master role on"
1751                         " the master, please restart manually: %s", msg)
1752
1753
1754 def _RecursiveCheckIfLVMBased(disk):
1755   """Check if the given disk or its children are lvm-based.
1756
1757   @type disk: L{objects.Disk}
1758   @param disk: the disk to check
1759   @rtype: boolean
1760   @return: boolean indicating whether a LD_LV dev_type was found or not
1761
1762   """
1763   if disk.children:
1764     for chdisk in disk.children:
1765       if _RecursiveCheckIfLVMBased(chdisk):
1766         return True
1767   return disk.dev_type == constants.LD_LV
1768
1769
1770 class LUSetClusterParams(LogicalUnit):
1771   """Change the parameters of the cluster.
1772
1773   """
1774   HPATH = "cluster-modify"
1775   HTYPE = constants.HTYPE_CLUSTER
1776   _OP_REQP = []
1777   REQ_BGL = False
1778
1779   def CheckArguments(self):
1780     """Check parameters
1781
1782     """
1783     if not hasattr(self.op, "candidate_pool_size"):
1784       self.op.candidate_pool_size = None
1785     if self.op.candidate_pool_size is not None:
1786       try:
1787         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1788       except (ValueError, TypeError), err:
1789         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1790                                    str(err))
1791       if self.op.candidate_pool_size < 1:
1792         raise errors.OpPrereqError("At least one master candidate needed")
1793
1794   def ExpandNames(self):
1795     # FIXME: in the future maybe other cluster params won't require checking on
1796     # all nodes to be modified.
1797     self.needed_locks = {
1798       locking.LEVEL_NODE: locking.ALL_SET,
1799     }
1800     self.share_locks[locking.LEVEL_NODE] = 1
1801
1802   def BuildHooksEnv(self):
1803     """Build hooks env.
1804
1805     """
1806     env = {
1807       "OP_TARGET": self.cfg.GetClusterName(),
1808       "NEW_VG_NAME": self.op.vg_name,
1809       }
1810     mn = self.cfg.GetMasterNode()
1811     return env, [mn], [mn]
1812
1813   def CheckPrereq(self):
1814     """Check prerequisites.
1815
1816     This checks whether the given params don't conflict and
1817     if the given volume group is valid.
1818
1819     """
1820     if self.op.vg_name is not None and not self.op.vg_name:
1821       instances = self.cfg.GetAllInstancesInfo().values()
1822       for inst in instances:
1823         for disk in inst.disks:
1824           if _RecursiveCheckIfLVMBased(disk):
1825             raise errors.OpPrereqError("Cannot disable lvm storage while"
1826                                        " lvm-based instances exist")
1827
1828     node_list = self.acquired_locks[locking.LEVEL_NODE]
1829
1830     # if vg_name not None, checks given volume group on all nodes
1831     if self.op.vg_name:
1832       vglist = self.rpc.call_vg_list(node_list)
1833       for node in node_list:
1834         msg = vglist[node].fail_msg
1835         if msg:
1836           # ignoring down node
1837           self.LogWarning("Error while gathering data on node %s"
1838                           " (ignoring node): %s", node, msg)
1839           continue
1840         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1841                                               self.op.vg_name,
1842                                               constants.MIN_VG_SIZE)
1843         if vgstatus:
1844           raise errors.OpPrereqError("Error on node '%s': %s" %
1845                                      (node, vgstatus))
1846
1847     self.cluster = cluster = self.cfg.GetClusterInfo()
1848     # validate params changes
1849     if self.op.beparams:
1850       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1851       self.new_beparams = objects.FillDict(
1852         cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1853
1854     if self.op.nicparams:
1855       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1856       self.new_nicparams = objects.FillDict(
1857         cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1858       objects.NIC.CheckParameterSyntax(self.new_nicparams)
1859
1860     # hypervisor list/parameters
1861     self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1862     if self.op.hvparams:
1863       if not isinstance(self.op.hvparams, dict):
1864         raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1865       for hv_name, hv_dict in self.op.hvparams.items():
1866         if hv_name not in self.new_hvparams:
1867           self.new_hvparams[hv_name] = hv_dict
1868         else:
1869           self.new_hvparams[hv_name].update(hv_dict)
1870
1871     if self.op.enabled_hypervisors is not None:
1872       self.hv_list = self.op.enabled_hypervisors
1873       if not self.hv_list:
1874         raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1875                                    " least one member")
1876       invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1877       if invalid_hvs:
1878         raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1879                                    " entries: %s" %
1880                                    utils.CommaJoin(invalid_hvs))
1881     else:
1882       self.hv_list = cluster.enabled_hypervisors
1883
1884     if self.op.hvparams or self.op.enabled_hypervisors is not None:
1885       # either the enabled list has changed, or the parameters have, validate
1886       for hv_name, hv_params in self.new_hvparams.items():
1887         if ((self.op.hvparams and hv_name in self.op.hvparams) or
1888             (self.op.enabled_hypervisors and
1889              hv_name in self.op.enabled_hypervisors)):
1890           # either this is a new hypervisor, or its parameters have changed
1891           hv_class = hypervisor.GetHypervisor(hv_name)
1892           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1893           hv_class.CheckParameterSyntax(hv_params)
1894           _CheckHVParams(self, node_list, hv_name, hv_params)
1895
1896   def Exec(self, feedback_fn):
1897     """Change the parameters of the cluster.
1898
1899     """
1900     if self.op.vg_name is not None:
1901       new_volume = self.op.vg_name
1902       if not new_volume:
1903         new_volume = None
1904       if new_volume != self.cfg.GetVGName():
1905         self.cfg.SetVGName(new_volume)
1906       else:
1907         feedback_fn("Cluster LVM configuration already in desired"
1908                     " state, not changing")
1909     if self.op.hvparams:
1910       self.cluster.hvparams = self.new_hvparams
1911     if self.op.enabled_hypervisors is not None:
1912       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1913     if self.op.beparams:
1914       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1915     if self.op.nicparams:
1916       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1917
1918     if self.op.candidate_pool_size is not None:
1919       self.cluster.candidate_pool_size = self.op.candidate_pool_size
1920       # we need to update the pool size here, otherwise the save will fail
1921       _AdjustCandidatePool(self)
1922
1923     self.cfg.Update(self.cluster)
1924
1925
1926 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1927   """Distribute additional files which are part of the cluster configuration.
1928
1929   ConfigWriter takes care of distributing the config and ssconf files, but
1930   there are more files which should be distributed to all nodes. This function
1931   makes sure those are copied.
1932
1933   @param lu: calling logical unit
1934   @param additional_nodes: list of nodes not in the config to distribute to
1935
1936   """
1937   # 1. Gather target nodes
1938   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1939   dist_nodes = lu.cfg.GetNodeList()
1940   if additional_nodes is not None:
1941     dist_nodes.extend(additional_nodes)
1942   if myself.name in dist_nodes:
1943     dist_nodes.remove(myself.name)
1944   # 2. Gather files to distribute
1945   dist_files = set([constants.ETC_HOSTS,
1946                     constants.SSH_KNOWN_HOSTS_FILE,
1947                     constants.RAPI_CERT_FILE,
1948                     constants.RAPI_USERS_FILE,
1949                     constants.HMAC_CLUSTER_KEY,
1950                    ])
1951
1952   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1953   for hv_name in enabled_hypervisors:
1954     hv_class = hypervisor.GetHypervisor(hv_name)
1955     dist_files.update(hv_class.GetAncillaryFiles())
1956
1957   # 3. Perform the files upload
1958   for fname in dist_files:
1959     if os.path.exists(fname):
1960       result = lu.rpc.call_upload_file(dist_nodes, fname)
1961       for to_node, to_result in result.items():
1962         msg = to_result.fail_msg
1963         if msg:
1964           msg = ("Copy of file %s to node %s failed: %s" %
1965                  (fname, to_node, msg))
1966           lu.proc.LogWarning(msg)
1967
1968
1969 class LURedistributeConfig(NoHooksLU):
1970   """Force the redistribution of cluster configuration.
1971
1972   This is a very simple LU.
1973
1974   """
1975   _OP_REQP = []
1976   REQ_BGL = False
1977
1978   def ExpandNames(self):
1979     self.needed_locks = {
1980       locking.LEVEL_NODE: locking.ALL_SET,
1981     }
1982     self.share_locks[locking.LEVEL_NODE] = 1
1983
1984   def CheckPrereq(self):
1985     """Check prerequisites.
1986
1987     """
1988
1989   def Exec(self, feedback_fn):
1990     """Redistribute the configuration.
1991
1992     """
1993     self.cfg.Update(self.cfg.GetClusterInfo())
1994     _RedistributeAncillaryFiles(self)
1995
1996
1997 def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1998   """Sleep and poll for an instance's disk to sync.
1999
2000   """
2001   if not instance.disks:
2002     return True
2003
2004   if not oneshot:
2005     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2006
2007   node = instance.primary_node
2008
2009   for dev in instance.disks:
2010     lu.cfg.SetDiskID(dev, node)
2011
2012   retries = 0
2013   degr_retries = 10 # in seconds, as we sleep 1 second each time
2014   while True:
2015     max_time = 0
2016     done = True
2017     cumul_degraded = False
2018     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2019     msg = rstats.fail_msg
2020     if msg:
2021       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2022       retries += 1
2023       if retries >= 10:
2024         raise errors.RemoteError("Can't contact node %s for mirror data,"
2025                                  " aborting." % node)
2026       time.sleep(6)
2027       continue
2028     rstats = rstats.payload
2029     retries = 0
2030     for i, mstat in enumerate(rstats):
2031       if mstat is None:
2032         lu.LogWarning("Can't compute data for node %s/%s",
2033                            node, instance.disks[i].iv_name)
2034         continue
2035
2036       cumul_degraded = (cumul_degraded or
2037                         (mstat.is_degraded and mstat.sync_percent is None))
2038       if mstat.sync_percent is not None:
2039         done = False
2040         if mstat.estimated_time is not None:
2041           rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2042           max_time = mstat.estimated_time
2043         else:
2044           rem_time = "no time estimate"
2045         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2046                         (instance.disks[i].iv_name, mstat.sync_percent,
2047                          rem_time))
2048
2049     # if we're done but degraded, let's do a few small retries, to
2050     # make sure we see a stable and not transient situation; therefore
2051     # we force restart of the loop
2052     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2053       logging.info("Degraded disks found, %d retries left", degr_retries)
2054       degr_retries -= 1
2055       time.sleep(1)
2056       continue
2057
2058     if done or oneshot:
2059       break
2060
2061     time.sleep(min(60, max_time))
2062
2063   if done:
2064     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2065   return not cumul_degraded
2066
2067
2068 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2069   """Check that mirrors are not degraded.
2070
2071   The ldisk parameter, if True, will change the test from the
2072   is_degraded attribute (which represents overall non-ok status for
2073   the device(s)) to the ldisk (representing the local storage status).
2074
2075   """
2076   lu.cfg.SetDiskID(dev, node)
2077
2078   result = True
2079
2080   if on_primary or dev.AssembleOnSecondary():
2081     rstats = lu.rpc.call_blockdev_find(node, dev)
2082     msg = rstats.fail_msg
2083     if msg:
2084       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2085       result = False
2086     elif not rstats.payload:
2087       lu.LogWarning("Can't find disk on node %s", node)
2088       result = False
2089     else:
2090       if ldisk:
2091         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2092       else:
2093         result = result and not rstats.payload.is_degraded
2094
2095   if dev.children:
2096     for child in dev.children:
2097       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2098
2099   return result
2100
2101
2102 class LUDiagnoseOS(NoHooksLU):
2103   """Logical unit for OS diagnose/query.
2104
2105   """
2106   _OP_REQP = ["output_fields", "names"]
2107   REQ_BGL = False
2108   _FIELDS_STATIC = utils.FieldSet()
2109   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
2110
2111   def ExpandNames(self):
2112     if self.op.names:
2113       raise errors.OpPrereqError("Selective OS query not supported")
2114
2115     _CheckOutputFields(static=self._FIELDS_STATIC,
2116                        dynamic=self._FIELDS_DYNAMIC,
2117                        selected=self.op.output_fields)
2118
2119     # Lock all nodes, in shared mode
2120     # Temporary removal of locks, should be reverted later
2121     # TODO: reintroduce locks when they are lighter-weight
2122     self.needed_locks = {}
2123     #self.share_locks[locking.LEVEL_NODE] = 1
2124     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2125
2126   def CheckPrereq(self):
2127     """Check prerequisites.
2128
2129     """
2130
2131   @staticmethod
2132   def _DiagnoseByOS(node_list, rlist):
2133     """Remaps a per-node return list into an a per-os per-node dictionary
2134
2135     @param node_list: a list with the names of all nodes
2136     @param rlist: a map with node names as keys and OS objects as values
2137
2138     @rtype: dict
2139     @return: a dictionary with osnames as keys and as value another map, with
2140         nodes as keys and tuples of (path, status, diagnose) as values, eg::
2141
2142           {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2143                                      (/srv/..., False, "invalid api")],
2144                            "node2": [(/srv/..., True, "")]}
2145           }
2146
2147     """
2148     all_os = {}
2149     # we build here the list of nodes that didn't fail the RPC (at RPC
2150     # level), so that nodes with a non-responding node daemon don't
2151     # make all OSes invalid
2152     good_nodes = [node_name for node_name in rlist
2153                   if not rlist[node_name].fail_msg]
2154     for node_name, nr in rlist.items():
2155       if nr.fail_msg or not nr.payload:
2156         continue
2157       for name, path, status, diagnose in nr.payload:
2158         if name not in all_os:
2159           # build a list of nodes for this os containing empty lists
2160           # for each node in node_list
2161           all_os[name] = {}
2162           for nname in good_nodes:
2163             all_os[name][nname] = []
2164         all_os[name][node_name].append((path, status, diagnose))
2165     return all_os
2166
2167   def Exec(self, feedback_fn):
2168     """Compute the list of OSes.
2169
2170     """
2171     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2172     node_data = self.rpc.call_os_diagnose(valid_nodes)
2173     pol = self._DiagnoseByOS(valid_nodes, node_data)
2174     output = []
2175     for os_name, os_data in pol.items():
2176       row = []
2177       for field in self.op.output_fields:
2178         if field == "name":
2179           val = os_name
2180         elif field == "valid":
2181           val = utils.all([osl and osl[0][1] for osl in os_data.values()])
2182         elif field == "node_status":
2183           # this is just a copy of the dict
2184           val = {}
2185           for node_name, nos_list in os_data.items():
2186             val[node_name] = nos_list
2187         else:
2188           raise errors.ParameterError(field)
2189         row.append(val)
2190       output.append(row)
2191
2192     return output
2193
2194
2195 class LURemoveNode(LogicalUnit):
2196   """Logical unit for removing a node.
2197
2198   """
2199   HPATH = "node-remove"
2200   HTYPE = constants.HTYPE_NODE
2201   _OP_REQP = ["node_name"]
2202
2203   def BuildHooksEnv(self):
2204     """Build hooks env.
2205
2206     This doesn't run on the target node in the pre phase as a failed
2207     node would then be impossible to remove.
2208
2209     """
2210     env = {
2211       "OP_TARGET": self.op.node_name,
2212       "NODE_NAME": self.op.node_name,
2213       }
2214     all_nodes = self.cfg.GetNodeList()
2215     if self.op.node_name in all_nodes:
2216       all_nodes.remove(self.op.node_name)
2217     return env, all_nodes, all_nodes
2218
2219   def CheckPrereq(self):
2220     """Check prerequisites.
2221
2222     This checks:
2223      - the node exists in the configuration
2224      - it does not have primary or secondary instances
2225      - it's not the master
2226
2227     Any errors are signaled by raising errors.OpPrereqError.
2228
2229     """
2230     node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2231     if node is None:
2232       raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
2233
2234     instance_list = self.cfg.GetInstanceList()
2235
2236     masternode = self.cfg.GetMasterNode()
2237     if node.name == masternode:
2238       raise errors.OpPrereqError("Node is the master node,"
2239                                  " you need to failover first.")
2240
2241     for instance_name in instance_list:
2242       instance = self.cfg.GetInstanceInfo(instance_name)
2243       if node.name in instance.all_nodes:
2244         raise errors.OpPrereqError("Instance %s is still running on the node,"
2245                                    " please remove first." % instance_name)
2246     self.op.node_name = node.name
2247     self.node = node
2248
2249   def Exec(self, feedback_fn):
2250     """Removes the node from the cluster.
2251
2252     """
2253     node = self.node
2254     logging.info("Stopping the node daemon and removing configs from node %s",
2255                  node.name)
2256
2257     self.context.RemoveNode(node.name)
2258
2259     # Run post hooks on the node before it's removed
2260     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2261     try:
2262       h_results = hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2263     except:
2264       self.LogWarning("Errors occurred running hooks on %s" % node.name)
2265
2266     result = self.rpc.call_node_leave_cluster(node.name)
2267     msg = result.fail_msg
2268     if msg:
2269       self.LogWarning("Errors encountered on the remote node while leaving"
2270                       " the cluster: %s", msg)
2271
2272     # Promote nodes to master candidate as needed
2273     _AdjustCandidatePool(self)
2274
2275
2276 class LUQueryNodes(NoHooksLU):
2277   """Logical unit for querying nodes.
2278
2279   """
2280   _OP_REQP = ["output_fields", "names", "use_locking"]
2281   REQ_BGL = False
2282
2283   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2284                     "master_candidate", "offline", "drained"]
2285
2286   _FIELDS_DYNAMIC = utils.FieldSet(
2287     "dtotal", "dfree",
2288     "mtotal", "mnode", "mfree",
2289     "bootid",
2290     "ctotal", "cnodes", "csockets",
2291     )
2292
2293   _FIELDS_STATIC = utils.FieldSet(*[
2294     "pinst_cnt", "sinst_cnt",
2295     "pinst_list", "sinst_list",
2296     "pip", "sip", "tags",
2297     "master",
2298     "role"] + _SIMPLE_FIELDS
2299     )
2300
2301   def ExpandNames(self):
2302     _CheckOutputFields(static=self._FIELDS_STATIC,
2303                        dynamic=self._FIELDS_DYNAMIC,
2304                        selected=self.op.output_fields)
2305
2306     self.needed_locks = {}
2307     self.share_locks[locking.LEVEL_NODE] = 1
2308
2309     if self.op.names:
2310       self.wanted = _GetWantedNodes(self, self.op.names)
2311     else:
2312       self.wanted = locking.ALL_SET
2313
2314     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2315     self.do_locking = self.do_node_query and self.op.use_locking
2316     if self.do_locking:
2317       # if we don't request only static fields, we need to lock the nodes
2318       self.needed_locks[locking.LEVEL_NODE] = self.wanted
2319
2320
2321   def CheckPrereq(self):
2322     """Check prerequisites.
2323
2324     """
2325     # The validation of the node list is done in the _GetWantedNodes,
2326     # if non empty, and if empty, there's no validation to do
2327     pass
2328
2329   def Exec(self, feedback_fn):
2330     """Computes the list of nodes and their attributes.
2331
2332     """
2333     all_info = self.cfg.GetAllNodesInfo()
2334     if self.do_locking:
2335       nodenames = self.acquired_locks[locking.LEVEL_NODE]
2336     elif self.wanted != locking.ALL_SET:
2337       nodenames = self.wanted
2338       missing = set(nodenames).difference(all_info.keys())
2339       if missing:
2340         raise errors.OpExecError(
2341           "Some nodes were removed before retrieving their data: %s" % missing)
2342     else:
2343       nodenames = all_info.keys()
2344
2345     nodenames = utils.NiceSort(nodenames)
2346     nodelist = [all_info[name] for name in nodenames]
2347
2348     # begin data gathering
2349
2350     if self.do_node_query:
2351       live_data = {}
2352       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2353                                           self.cfg.GetHypervisorType())
2354       for name in nodenames:
2355         nodeinfo = node_data[name]
2356         if not nodeinfo.fail_msg and nodeinfo.payload:
2357           nodeinfo = nodeinfo.payload
2358           fn = utils.TryConvert
2359           live_data[name] = {
2360             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2361             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2362             "mfree": fn(int, nodeinfo.get('memory_free', None)),
2363             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2364             "dfree": fn(int, nodeinfo.get('vg_free', None)),
2365             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2366             "bootid": nodeinfo.get('bootid', None),
2367             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2368             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2369             }
2370         else:
2371           live_data[name] = {}
2372     else:
2373       live_data = dict.fromkeys(nodenames, {})
2374
2375     node_to_primary = dict([(name, set()) for name in nodenames])
2376     node_to_secondary = dict([(name, set()) for name in nodenames])
2377
2378     inst_fields = frozenset(("pinst_cnt", "pinst_list",
2379                              "sinst_cnt", "sinst_list"))
2380     if inst_fields & frozenset(self.op.output_fields):
2381       instancelist = self.cfg.GetInstanceList()
2382
2383       for instance_name in instancelist:
2384         inst = self.cfg.GetInstanceInfo(instance_name)
2385         if inst.primary_node in node_to_primary:
2386           node_to_primary[inst.primary_node].add(inst.name)
2387         for secnode in inst.secondary_nodes:
2388           if secnode in node_to_secondary:
2389             node_to_secondary[secnode].add(inst.name)
2390
2391     master_node = self.cfg.GetMasterNode()
2392
2393     # end data gathering
2394
2395     output = []
2396     for node in nodelist:
2397       node_output = []
2398       for field in self.op.output_fields:
2399         if field in self._SIMPLE_FIELDS:
2400           val = getattr(node, field)
2401         elif field == "pinst_list":
2402           val = list(node_to_primary[node.name])
2403         elif field == "sinst_list":
2404           val = list(node_to_secondary[node.name])
2405         elif field == "pinst_cnt":
2406           val = len(node_to_primary[node.name])
2407         elif field == "sinst_cnt":
2408           val = len(node_to_secondary[node.name])
2409         elif field == "pip":
2410           val = node.primary_ip
2411         elif field == "sip":
2412           val = node.secondary_ip
2413         elif field == "tags":
2414           val = list(node.GetTags())
2415         elif field == "master":
2416           val = node.name == master_node
2417         elif self._FIELDS_DYNAMIC.Matches(field):
2418           val = live_data[node.name].get(field, None)
2419         elif field == "role":
2420           if node.name == master_node:
2421             val = "M"
2422           elif node.master_candidate:
2423             val = "C"
2424           elif node.drained:
2425             val = "D"
2426           elif node.offline:
2427             val = "O"
2428           else:
2429             val = "R"
2430         else:
2431           raise errors.ParameterError(field)
2432         node_output.append(val)
2433       output.append(node_output)
2434
2435     return output
2436
2437
2438 class LUQueryNodeVolumes(NoHooksLU):
2439   """Logical unit for getting volumes on node(s).
2440
2441   """
2442   _OP_REQP = ["nodes", "output_fields"]
2443   REQ_BGL = False
2444   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2445   _FIELDS_STATIC = utils.FieldSet("node")
2446
2447   def ExpandNames(self):
2448     _CheckOutputFields(static=self._FIELDS_STATIC,
2449                        dynamic=self._FIELDS_DYNAMIC,
2450                        selected=self.op.output_fields)
2451
2452     self.needed_locks = {}
2453     self.share_locks[locking.LEVEL_NODE] = 1
2454     if not self.op.nodes:
2455       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2456     else:
2457       self.needed_locks[locking.LEVEL_NODE] = \
2458         _GetWantedNodes(self, self.op.nodes)
2459
2460   def CheckPrereq(self):
2461     """Check prerequisites.
2462
2463     This checks that the fields required are valid output fields.
2464
2465     """
2466     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2467
2468   def Exec(self, feedback_fn):
2469     """Computes the list of nodes and their attributes.
2470
2471     """
2472     nodenames = self.nodes
2473     volumes = self.rpc.call_node_volumes(nodenames)
2474
2475     ilist = [self.cfg.GetInstanceInfo(iname) for iname
2476              in self.cfg.GetInstanceList()]
2477
2478     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2479
2480     output = []
2481     for node in nodenames:
2482       nresult = volumes[node]
2483       if nresult.offline:
2484         continue
2485       msg = nresult.fail_msg
2486       if msg:
2487         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2488         continue
2489
2490       node_vols = nresult.payload[:]
2491       node_vols.sort(key=lambda vol: vol['dev'])
2492
2493       for vol in node_vols:
2494         node_output = []
2495         for field in self.op.output_fields:
2496           if field == "node":
2497             val = node
2498           elif field == "phys":
2499             val = vol['dev']
2500           elif field == "vg":
2501             val = vol['vg']
2502           elif field == "name":
2503             val = vol['name']
2504           elif field == "size":
2505             val = int(float(vol['size']))
2506           elif field == "instance":
2507             for inst in ilist:
2508               if node not in lv_by_node[inst]:
2509                 continue
2510               if vol['name'] in lv_by_node[inst][node]:
2511                 val = inst.name
2512                 break
2513             else:
2514               val = '-'
2515           else:
2516             raise errors.ParameterError(field)
2517           node_output.append(str(val))
2518
2519         output.append(node_output)
2520
2521     return output
2522
2523
2524 class LUQueryNodeStorage(NoHooksLU):
2525   """Logical unit for getting information on storage units on node(s).
2526
2527   """
2528   _OP_REQP = ["nodes", "storage_type", "output_fields"]
2529   REQ_BGL = False
2530   _FIELDS_STATIC = utils.FieldSet("node")
2531
2532   def ExpandNames(self):
2533     storage_type = self.op.storage_type
2534
2535     if storage_type not in constants.VALID_STORAGE_FIELDS:
2536       raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2537
2538     dynamic_fields = constants.VALID_STORAGE_FIELDS[storage_type]
2539
2540     _CheckOutputFields(static=self._FIELDS_STATIC,
2541                        dynamic=utils.FieldSet(*dynamic_fields),
2542                        selected=self.op.output_fields)
2543
2544     self.needed_locks = {}
2545     self.share_locks[locking.LEVEL_NODE] = 1
2546
2547     if self.op.nodes:
2548       self.needed_locks[locking.LEVEL_NODE] = \
2549         _GetWantedNodes(self, self.op.nodes)
2550     else:
2551       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2552
2553   def CheckPrereq(self):
2554     """Check prerequisites.
2555
2556     This checks that the fields required are valid output fields.
2557
2558     """
2559     self.op.name = getattr(self.op, "name", None)
2560
2561     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2562
2563   def Exec(self, feedback_fn):
2564     """Computes the list of nodes and their attributes.
2565
2566     """
2567     # Always get name to sort by
2568     if constants.SF_NAME in self.op.output_fields:
2569       fields = self.op.output_fields[:]
2570     else:
2571       fields = [constants.SF_NAME] + self.op.output_fields
2572
2573     # Never ask for node as it's only known to the LU
2574     while "node" in fields:
2575       fields.remove("node")
2576
2577     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2578     name_idx = field_idx[constants.SF_NAME]
2579
2580     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2581     data = self.rpc.call_storage_list(self.nodes,
2582                                       self.op.storage_type, st_args,
2583                                       self.op.name, fields)
2584
2585     result = []
2586
2587     for node in utils.NiceSort(self.nodes):
2588       nresult = data[node]
2589       if nresult.offline:
2590         continue
2591
2592       msg = nresult.fail_msg
2593       if msg:
2594         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2595         continue
2596
2597       rows = dict([(row[name_idx], row) for row in nresult.payload])
2598
2599       for name in utils.NiceSort(rows.keys()):
2600         row = rows[name]
2601
2602         out = []
2603
2604         for field in self.op.output_fields:
2605           if field == "node":
2606             val = node
2607           elif field in field_idx:
2608             val = row[field_idx[field]]
2609           else:
2610             raise errors.ParameterError(field)
2611
2612           out.append(val)
2613
2614         result.append(out)
2615
2616     return result
2617
2618
2619 class LUModifyNodeStorage(NoHooksLU):
2620   """Logical unit for modifying a storage volume on a node.
2621
2622   """
2623   _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2624   REQ_BGL = False
2625
2626   def CheckArguments(self):
2627     node_name = self.cfg.ExpandNodeName(self.op.node_name)
2628     if node_name is None:
2629       raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2630
2631     self.op.node_name = node_name
2632
2633     storage_type = self.op.storage_type
2634     if storage_type not in constants.VALID_STORAGE_FIELDS:
2635       raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2636
2637   def ExpandNames(self):
2638     self.needed_locks = {
2639       locking.LEVEL_NODE: self.op.node_name,
2640       }
2641
2642   def CheckPrereq(self):
2643     """Check prerequisites.
2644
2645     """
2646     storage_type = self.op.storage_type
2647
2648     try:
2649       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2650     except KeyError:
2651       raise errors.OpPrereqError("Storage units of type '%s' can not be"
2652                                  " modified" % storage_type)
2653
2654     diff = set(self.op.changes.keys()) - modifiable
2655     if diff:
2656       raise errors.OpPrereqError("The following fields can not be modified for"
2657                                  " storage units of type '%s': %r" %
2658                                  (storage_type, list(diff)))
2659
2660   def Exec(self, feedback_fn):
2661     """Computes the list of nodes and their attributes.
2662
2663     """
2664     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2665     result = self.rpc.call_storage_modify(self.op.node_name,
2666                                           self.op.storage_type, st_args,
2667                                           self.op.name, self.op.changes)
2668     result.Raise("Failed to modify storage unit '%s' on %s" %
2669                  (self.op.name, self.op.node_name))
2670
2671
2672 class LUAddNode(LogicalUnit):
2673   """Logical unit for adding node to the cluster.
2674
2675   """
2676   HPATH = "node-add"
2677   HTYPE = constants.HTYPE_NODE
2678   _OP_REQP = ["node_name"]
2679
2680   def BuildHooksEnv(self):
2681     """Build hooks env.
2682
2683     This will run on all nodes before, and on all nodes + the new node after.
2684
2685     """
2686     env = {
2687       "OP_TARGET": self.op.node_name,
2688       "NODE_NAME": self.op.node_name,
2689       "NODE_PIP": self.op.primary_ip,
2690       "NODE_SIP": self.op.secondary_ip,
2691       }
2692     nodes_0 = self.cfg.GetNodeList()
2693     nodes_1 = nodes_0 + [self.op.node_name, ]
2694     return env, nodes_0, nodes_1
2695
2696   def CheckPrereq(self):
2697     """Check prerequisites.
2698
2699     This checks:
2700      - the new node is not already in the config
2701      - it is resolvable
2702      - its parameters (single/dual homed) matches the cluster
2703
2704     Any errors are signaled by raising errors.OpPrereqError.
2705
2706     """
2707     node_name = self.op.node_name
2708     cfg = self.cfg
2709
2710     dns_data = utils.HostInfo(node_name)
2711
2712     node = dns_data.name
2713     primary_ip = self.op.primary_ip = dns_data.ip
2714     secondary_ip = getattr(self.op, "secondary_ip", None)
2715     if secondary_ip is None:
2716       secondary_ip = primary_ip
2717     if not utils.IsValidIP(secondary_ip):
2718       raise errors.OpPrereqError("Invalid secondary IP given")
2719     self.op.secondary_ip = secondary_ip
2720
2721     node_list = cfg.GetNodeList()
2722     if not self.op.readd and node in node_list:
2723       raise errors.OpPrereqError("Node %s is already in the configuration" %
2724                                  node)
2725     elif self.op.readd and node not in node_list:
2726       raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2727
2728     for existing_node_name in node_list:
2729       existing_node = cfg.GetNodeInfo(existing_node_name)
2730
2731       if self.op.readd and node == existing_node_name:
2732         if (existing_node.primary_ip != primary_ip or
2733             existing_node.secondary_ip != secondary_ip):
2734           raise errors.OpPrereqError("Readded node doesn't have the same IP"
2735                                      " address configuration as before")
2736         continue
2737
2738       if (existing_node.primary_ip == primary_ip or
2739           existing_node.secondary_ip == primary_ip or
2740           existing_node.primary_ip == secondary_ip or
2741           existing_node.secondary_ip == secondary_ip):
2742         raise errors.OpPrereqError("New node ip address(es) conflict with"
2743                                    " existing node %s" % existing_node.name)
2744
2745     # check that the type of the node (single versus dual homed) is the
2746     # same as for the master
2747     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2748     master_singlehomed = myself.secondary_ip == myself.primary_ip
2749     newbie_singlehomed = secondary_ip == primary_ip
2750     if master_singlehomed != newbie_singlehomed:
2751       if master_singlehomed:
2752         raise errors.OpPrereqError("The master has no private ip but the"
2753                                    " new node has one")
2754       else:
2755         raise errors.OpPrereqError("The master has a private ip but the"
2756                                    " new node doesn't have one")
2757
2758     # checks reachability
2759     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2760       raise errors.OpPrereqError("Node not reachable by ping")
2761
2762     if not newbie_singlehomed:
2763       # check reachability from my secondary ip to newbie's secondary ip
2764       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2765                            source=myself.secondary_ip):
2766         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2767                                    " based ping to noded port")
2768
2769     cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2770     if self.op.readd:
2771       exceptions = [node]
2772     else:
2773       exceptions = []
2774     mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
2775     # the new node will increase mc_max with one, so:
2776     mc_max = min(mc_max + 1, cp_size)
2777     self.master_candidate = mc_now < mc_max
2778
2779     if self.op.readd:
2780       self.new_node = self.cfg.GetNodeInfo(node)
2781       assert self.new_node is not None, "Can't retrieve locked node %s" % node
2782     else:
2783       self.new_node = objects.Node(name=node,
2784                                    primary_ip=primary_ip,
2785                                    secondary_ip=secondary_ip,
2786                                    master_candidate=self.master_candidate,
2787                                    offline=False, drained=False)
2788
2789   def Exec(self, feedback_fn):
2790     """Adds the new node to the cluster.
2791
2792     """
2793     new_node = self.new_node
2794     node = new_node.name
2795
2796     # for re-adds, reset the offline/drained/master-candidate flags;
2797     # we need to reset here, otherwise offline would prevent RPC calls
2798     # later in the procedure; this also means that if the re-add
2799     # fails, we are left with a non-offlined, broken node
2800     if self.op.readd:
2801       new_node.drained = new_node.offline = False
2802       self.LogInfo("Readding a node, the offline/drained flags were reset")
2803       # if we demote the node, we do cleanup later in the procedure
2804       new_node.master_candidate = self.master_candidate
2805
2806     # notify the user about any possible mc promotion
2807     if new_node.master_candidate:
2808       self.LogInfo("Node will be a master candidate")
2809
2810     # check connectivity
2811     result = self.rpc.call_version([node])[node]
2812     result.Raise("Can't get version information from node %s" % node)
2813     if constants.PROTOCOL_VERSION == result.payload:
2814       logging.info("Communication to node %s fine, sw version %s match",
2815                    node, result.payload)
2816     else:
2817       raise errors.OpExecError("Version mismatch master version %s,"
2818                                " node version %s" %
2819                                (constants.PROTOCOL_VERSION, result.payload))
2820
2821     # setup ssh on node
2822     logging.info("Copy ssh key to node %s", node)
2823     priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2824     keyarray = []
2825     keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2826                 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2827                 priv_key, pub_key]
2828
2829     for i in keyfiles:
2830       keyarray.append(utils.ReadFile(i))
2831
2832     result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2833                                     keyarray[2],
2834                                     keyarray[3], keyarray[4], keyarray[5])
2835     result.Raise("Cannot transfer ssh keys to the new node")
2836
2837     # Add node to our /etc/hosts, and add key to known_hosts
2838     if self.cfg.GetClusterInfo().modify_etc_hosts:
2839       utils.AddHostToEtcHosts(new_node.name)
2840
2841     if new_node.secondary_ip != new_node.primary_ip:
2842       result = self.rpc.call_node_has_ip_address(new_node.name,
2843                                                  new_node.secondary_ip)
2844       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2845                    prereq=True)
2846       if not result.payload:
2847         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2848                                  " you gave (%s). Please fix and re-run this"
2849                                  " command." % new_node.secondary_ip)
2850
2851     node_verify_list = [self.cfg.GetMasterNode()]
2852     node_verify_param = {
2853       constants.NV_NODELIST: [node],
2854       # TODO: do a node-net-test as well?
2855     }
2856
2857     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2858                                        self.cfg.GetClusterName())
2859     for verifier in node_verify_list:
2860       result[verifier].Raise("Cannot communicate with node %s" % verifier)
2861       nl_payload = result[verifier].payload[constants.NV_NODELIST]
2862       if nl_payload:
2863         for failed in nl_payload:
2864           feedback_fn("ssh/hostname verification failed %s -> %s" %
2865                       (verifier, nl_payload[failed]))
2866         raise errors.OpExecError("ssh/hostname verification failed.")
2867
2868     if self.op.readd:
2869       _RedistributeAncillaryFiles(self)
2870       self.context.ReaddNode(new_node)
2871       # make sure we redistribute the config
2872       self.cfg.Update(new_node)
2873       # and make sure the new node will not have old files around
2874       if not new_node.master_candidate:
2875         result = self.rpc.call_node_demote_from_mc(new_node.name)
2876         msg = result.fail_msg
2877         if msg:
2878           self.LogWarning("Node failed to demote itself from master"
2879                           " candidate status: %s" % msg)
2880     else:
2881       _RedistributeAncillaryFiles(self, additional_nodes=[node])
2882       self.context.AddNode(new_node)
2883
2884
2885 class LUSetNodeParams(LogicalUnit):
2886   """Modifies the parameters of a node.
2887
2888   """
2889   HPATH = "node-modify"
2890   HTYPE = constants.HTYPE_NODE
2891   _OP_REQP = ["node_name"]
2892   REQ_BGL = False
2893
2894   def CheckArguments(self):
2895     node_name = self.cfg.ExpandNodeName(self.op.node_name)
2896     if node_name is None:
2897       raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2898     self.op.node_name = node_name
2899     _CheckBooleanOpField(self.op, 'master_candidate')
2900     _CheckBooleanOpField(self.op, 'offline')
2901     _CheckBooleanOpField(self.op, 'drained')
2902     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2903     if all_mods.count(None) == 3:
2904       raise errors.OpPrereqError("Please pass at least one modification")
2905     if all_mods.count(True) > 1:
2906       raise errors.OpPrereqError("Can't set the node into more than one"
2907                                  " state at the same time")
2908
2909   def ExpandNames(self):
2910     self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2911
2912   def BuildHooksEnv(self):
2913     """Build hooks env.
2914
2915     This runs on the master node.
2916
2917     """
2918     env = {
2919       "OP_TARGET": self.op.node_name,
2920       "MASTER_CANDIDATE": str(self.op.master_candidate),
2921       "OFFLINE": str(self.op.offline),
2922       "DRAINED": str(self.op.drained),
2923       }
2924     nl = [self.cfg.GetMasterNode(),
2925           self.op.node_name]
2926     return env, nl, nl
2927
2928   def CheckPrereq(self):
2929     """Check prerequisites.
2930
2931     This only checks the instance list against the existing names.
2932
2933     """
2934     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2935
2936     if (self.op.master_candidate is not None or
2937         self.op.drained is not None or
2938         self.op.offline is not None):
2939       # we can't change the master's node flags
2940       if self.op.node_name == self.cfg.GetMasterNode():
2941         raise errors.OpPrereqError("The master role can be changed"
2942                                    " only via masterfailover")
2943
2944     if ((self.op.master_candidate == False or self.op.offline == True or
2945          self.op.drained == True) and node.master_candidate):
2946       cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2947       num_candidates, _ = self.cfg.GetMasterCandidateStats()
2948       if num_candidates <= cp_size:
2949         msg = ("Not enough master candidates (desired"
2950                " %d, new value will be %d)" % (cp_size, num_candidates-1))
2951         if self.op.force:
2952           self.LogWarning(msg)
2953         else:
2954           raise errors.OpPrereqError(msg)
2955
2956     if (self.op.master_candidate == True and
2957         ((node.offline and not self.op.offline == False) or
2958          (node.drained and not self.op.drained == False))):
2959       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2960                                  " to master_candidate" % node.name)
2961
2962     return
2963
2964   def Exec(self, feedback_fn):
2965     """Modifies a node.
2966
2967     """
2968     node = self.node
2969
2970     result = []
2971     changed_mc = False
2972
2973     if self.op.offline is not None:
2974       node.offline = self.op.offline
2975       result.append(("offline", str(self.op.offline)))
2976       if self.op.offline == True:
2977         if node.master_candidate:
2978           node.master_candidate = False
2979           changed_mc = True
2980           result.append(("master_candidate", "auto-demotion due to offline"))
2981         if node.drained:
2982           node.drained = False
2983           result.append(("drained", "clear drained status due to offline"))
2984
2985     if self.op.master_candidate is not None:
2986       node.master_candidate = self.op.master_candidate
2987       changed_mc = True
2988       result.append(("master_candidate", str(self.op.master_candidate)))
2989       if self.op.master_candidate == False:
2990         rrc = self.rpc.call_node_demote_from_mc(node.name)
2991         msg = rrc.fail_msg
2992         if msg:
2993           self.LogWarning("Node failed to demote itself: %s" % msg)
2994
2995     if self.op.drained is not None:
2996       node.drained = self.op.drained
2997       result.append(("drained", str(self.op.drained)))
2998       if self.op.drained == True:
2999         if node.master_candidate:
3000           node.master_candidate = False
3001           changed_mc = True
3002           result.append(("master_candidate", "auto-demotion due to drain"))
3003           rrc = self.rpc.call_node_demote_from_mc(node.name)
3004           msg = rrc.fail_msg
3005           if msg:
3006             self.LogWarning("Node failed to demote itself: %s" % msg)
3007         if node.offline:
3008           node.offline = False
3009           result.append(("offline", "clear offline status due to drain"))
3010
3011     # this will trigger configuration file update, if needed
3012     self.cfg.Update(node)
3013     # this will trigger job queue propagation or cleanup
3014     if changed_mc:
3015       self.context.ReaddNode(node)
3016
3017     return result
3018
3019
3020 class LUPowercycleNode(NoHooksLU):
3021   """Powercycles a node.
3022
3023   """
3024   _OP_REQP = ["node_name", "force"]
3025   REQ_BGL = False
3026
3027   def CheckArguments(self):
3028     node_name = self.cfg.ExpandNodeName(self.op.node_name)
3029     if node_name is None:
3030       raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
3031     self.op.node_name = node_name
3032     if node_name == self.cfg.GetMasterNode() and not self.op.force:
3033       raise errors.OpPrereqError("The node is the master and the force"
3034                                  " parameter was not set")
3035
3036   def ExpandNames(self):
3037     """Locking for PowercycleNode.
3038
3039     This is a last-resort option and shouldn't block on other
3040     jobs. Therefore, we grab no locks.
3041
3042     """
3043     self.needed_locks = {}
3044
3045   def CheckPrereq(self):
3046     """Check prerequisites.
3047
3048     This LU has no prereqs.
3049
3050     """
3051     pass
3052
3053   def Exec(self, feedback_fn):
3054     """Reboots a node.
3055
3056     """
3057     result = self.rpc.call_node_powercycle(self.op.node_name,
3058                                            self.cfg.GetHypervisorType())
3059     result.Raise("Failed to schedule the reboot")
3060     return result.payload
3061
3062
3063 class LUQueryClusterInfo(NoHooksLU):
3064   """Query cluster configuration.
3065
3066   """
3067   _OP_REQP = []
3068   REQ_BGL = False
3069
3070   def ExpandNames(self):
3071     self.needed_locks = {}
3072
3073   def CheckPrereq(self):
3074     """No prerequsites needed for this LU.
3075
3076     """
3077     pass
3078
3079   def Exec(self, feedback_fn):
3080     """Return cluster config.
3081
3082     """
3083     cluster = self.cfg.GetClusterInfo()
3084     result = {
3085       "software_version": constants.RELEASE_VERSION,
3086       "protocol_version": constants.PROTOCOL_VERSION,
3087       "config_version": constants.CONFIG_VERSION,
3088       "os_api_version": max(constants.OS_API_VERSIONS),
3089       "export_version": constants.EXPORT_VERSION,
3090       "architecture": (platform.architecture()[0], platform.machine()),
3091       "name": cluster.cluster_name,
3092       "master": cluster.master_node,
3093       "default_hypervisor": cluster.enabled_hypervisors[0],
3094       "enabled_hypervisors": cluster.enabled_hypervisors,
3095       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3096                         for hypervisor_name in cluster.enabled_hypervisors]),
3097       "beparams": cluster.beparams,
3098       "nicparams": cluster.nicparams,
3099       "candidate_pool_size": cluster.candidate_pool_size,
3100       "master_netdev": cluster.master_netdev,
3101       "volume_group_name": cluster.volume_group_name,
3102       "file_storage_dir": cluster.file_storage_dir,
3103       "ctime": cluster.ctime,
3104       "mtime": cluster.mtime,
3105       "uuid": cluster.uuid,
3106       "tags": list(cluster.GetTags()),
3107       }
3108
3109     return result
3110
3111
3112 class LUQueryConfigValues(NoHooksLU):
3113   """Return configuration values.
3114
3115   """
3116   _OP_REQP = []
3117   REQ_BGL = False
3118   _FIELDS_DYNAMIC = utils.FieldSet()
3119   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3120                                   "watcher_pause")
3121
3122   def ExpandNames(self):
3123     self.needed_locks = {}
3124
3125     _CheckOutputFields(static=self._FIELDS_STATIC,
3126                        dynamic=self._FIELDS_DYNAMIC,
3127                        selected=self.op.output_fields)
3128
3129   def CheckPrereq(self):
3130     """No prerequisites.
3131
3132     """
3133     pass
3134
3135   def Exec(self, feedback_fn):
3136     """Dump a representation of the cluster config to the standard output.
3137
3138     """
3139     values = []
3140     for field in self.op.output_fields:
3141       if field == "cluster_name":
3142         entry = self.cfg.GetClusterName()
3143       elif field == "master_node":
3144         entry = self.cfg.GetMasterNode()
3145       elif field == "drain_flag":
3146         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3147       elif field == "watcher_pause":
3148         return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3149       else:
3150         raise errors.ParameterError(field)
3151       values.append(entry)
3152     return values
3153
3154
3155 class LUActivateInstanceDisks(NoHooksLU):
3156   """Bring up an instance's disks.
3157
3158   """
3159   _OP_REQP = ["instance_name"]
3160   REQ_BGL = False
3161
3162   def ExpandNames(self):
3163     self._ExpandAndLockInstance()
3164     self.needed_locks[locking.LEVEL_NODE] = []
3165     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3166
3167   def DeclareLocks(self, level):
3168     if level == locking.LEVEL_NODE:
3169       self._LockInstancesNodes()
3170
3171   def CheckPrereq(self):
3172     """Check prerequisites.
3173
3174     This checks that the instance is in the cluster.
3175
3176     """
3177     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3178     assert self.instance is not None, \
3179       "Cannot retrieve locked instance %s" % self.op.instance_name
3180     _CheckNodeOnline(self, self.instance.primary_node)
3181     if not hasattr(self.op, "ignore_size"):
3182       self.op.ignore_size = False
3183
3184   def Exec(self, feedback_fn):
3185     """Activate the disks.
3186
3187     """
3188     disks_ok, disks_info = \
3189               _AssembleInstanceDisks(self, self.instance,
3190                                      ignore_size=self.op.ignore_size)
3191     if not disks_ok:
3192       raise errors.OpExecError("Cannot activate block devices")
3193
3194     return disks_info
3195
3196
3197 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3198                            ignore_size=False):
3199   """Prepare the block devices for an instance.
3200
3201   This sets up the block devices on all nodes.
3202
3203   @type lu: L{LogicalUnit}
3204   @param lu: the logical unit on whose behalf we execute
3205   @type instance: L{objects.Instance}
3206   @param instance: the instance for whose disks we assemble
3207   @type ignore_secondaries: boolean
3208   @param ignore_secondaries: if true, errors on secondary nodes
3209       won't result in an error return from the function
3210   @type ignore_size: boolean
3211   @param ignore_size: if true, the current known size of the disk
3212       will not be used during the disk activation, useful for cases
3213       when the size is wrong
3214   @return: False if the operation failed, otherwise a list of
3215       (host, instance_visible_name, node_visible_name)
3216       with the mapping from node devices to instance devices
3217
3218   """
3219   device_info = []
3220   disks_ok = True
3221   iname = instance.name
3222   # With the two passes mechanism we try to reduce the window of
3223   # opportunity for the race condition of switching DRBD to primary
3224   # before handshaking occured, but we do not eliminate it
3225
3226   # The proper fix would be to wait (with some limits) until the
3227   # connection has been made and drbd transitions from WFConnection
3228   # into any other network-connected state (Connected, SyncTarget,
3229   # SyncSource, etc.)
3230
3231   # 1st pass, assemble on all nodes in secondary mode
3232   for inst_disk in instance.disks:
3233     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3234       if ignore_size:
3235         node_disk = node_disk.Copy()
3236         node_disk.UnsetSize()
3237       lu.cfg.SetDiskID(node_disk, node)
3238       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3239       msg = result.fail_msg
3240       if msg:
3241         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3242                            " (is_primary=False, pass=1): %s",
3243                            inst_disk.iv_name, node, msg)
3244         if not ignore_secondaries:
3245           disks_ok = False
3246
3247   # FIXME: race condition on drbd migration to primary
3248
3249   # 2nd pass, do only the primary node
3250   for inst_disk in instance.disks:
3251     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3252       if node != instance.primary_node:
3253         continue
3254       if ignore_size:
3255         node_disk = node_disk.Copy()
3256         node_disk.UnsetSize()
3257       lu.cfg.SetDiskID(node_disk, node)
3258       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3259       msg = result.fail_msg
3260       if msg:
3261         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3262                            " (is_primary=True, pass=2): %s",
3263                            inst_disk.iv_name, node, msg)
3264         disks_ok = False
3265     device_info.append((instance.primary_node, inst_disk.iv_name,
3266                         result.payload))
3267
3268   # leave the disks configured for the primary node
3269   # this is a workaround that would be fixed better by
3270   # improving the logical/physical id handling
3271   for disk in instance.disks:
3272     lu.cfg.SetDiskID(disk, instance.primary_node)
3273
3274   return disks_ok, device_info
3275
3276
3277 def _StartInstanceDisks(lu, instance, force):
3278   """Start the disks of an instance.
3279
3280   """
3281   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3282                                            ignore_secondaries=force)
3283   if not disks_ok:
3284     _ShutdownInstanceDisks(lu, instance)
3285     if force is not None and not force:
3286       lu.proc.LogWarning("", hint="If the message above refers to a"
3287                          " secondary node,"
3288                          " you can retry the operation using '--force'.")
3289     raise errors.OpExecError("Disk consistency error")
3290
3291
3292 class LUDeactivateInstanceDisks(NoHooksLU):
3293   """Shutdown an instance's disks.
3294
3295   """
3296   _OP_REQP = ["instance_name"]
3297   REQ_BGL = False
3298
3299   def ExpandNames(self):
3300     self._ExpandAndLockInstance()
3301     self.needed_locks[locking.LEVEL_NODE] = []
3302     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3303
3304   def DeclareLocks(self, level):
3305     if level == locking.LEVEL_NODE:
3306       self._LockInstancesNodes()
3307
3308   def CheckPrereq(self):
3309     """Check prerequisites.
3310
3311     This checks that the instance is in the cluster.
3312
3313     """
3314     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3315     assert self.instance is not None, \
3316       "Cannot retrieve locked instance %s" % self.op.instance_name
3317
3318   def Exec(self, feedback_fn):
3319     """Deactivate the disks
3320
3321     """
3322     instance = self.instance
3323     _SafeShutdownInstanceDisks(self, instance)
3324
3325
3326 def _SafeShutdownInstanceDisks(lu, instance):
3327   """Shutdown block devices of an instance.
3328
3329   This function checks if an instance is running, before calling
3330   _ShutdownInstanceDisks.
3331
3332   """
3333   pnode = instance.primary_node
3334   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3335   ins_l.Raise("Can't contact node %s" % pnode)
3336
3337   if instance.name in ins_l.payload:
3338     raise errors.OpExecError("Instance is running, can't shutdown"
3339                              " block devices.")
3340
3341   _ShutdownInstanceDisks(lu, instance)
3342
3343
3344 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3345   """Shutdown block devices of an instance.
3346
3347   This does the shutdown on all nodes of the instance.
3348
3349   If the ignore_primary is false, errors on the primary node are
3350   ignored.
3351
3352   """
3353   all_result = True
3354   for disk in instance.disks:
3355     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3356       lu.cfg.SetDiskID(top_disk, node)
3357       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3358       msg = result.fail_msg
3359       if msg:
3360         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3361                       disk.iv_name, node, msg)
3362         if not ignore_primary or node != instance.primary_node:
3363           all_result = False
3364   return all_result
3365
3366
3367 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3368   """Checks if a node has enough free memory.
3369
3370   This function check if a given node has the needed amount of free
3371   memory. In case the node has less memory or we cannot get the
3372   information from the node, this function raise an OpPrereqError
3373   exception.
3374
3375   @type lu: C{LogicalUnit}
3376   @param lu: a logical unit from which we get configuration data
3377   @type node: C{str}
3378   @param node: the node to check
3379   @type reason: C{str}
3380   @param reason: string to use in the error message
3381   @type requested: C{int}
3382   @param requested: the amount of memory in MiB to check for
3383   @type hypervisor_name: C{str}
3384   @param hypervisor_name: the hypervisor to ask for memory stats
3385   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3386       we cannot check the node
3387
3388   """
3389   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3390   nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
3391   free_mem = nodeinfo[node].payload.get('memory_free', None)
3392   if not isinstance(free_mem, int):
3393     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3394                                " was '%s'" % (node, free_mem))
3395   if requested > free_mem:
3396     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3397                                " needed %s MiB, available %s MiB" %
3398                                (node, reason, requested, free_mem))
3399
3400
3401 class LUStartupInstance(LogicalUnit):
3402   """Starts an instance.
3403
3404   """
3405   HPATH = "instance-start"
3406   HTYPE = constants.HTYPE_INSTANCE
3407   _OP_REQP = ["instance_name", "force"]
3408   REQ_BGL = False
3409
3410   def ExpandNames(self):
3411     self._ExpandAndLockInstance()
3412
3413   def BuildHooksEnv(self):
3414     """Build hooks env.
3415
3416     This runs on master, primary and secondary nodes of the instance.
3417
3418     """
3419     env = {
3420       "FORCE": self.op.force,
3421       }
3422     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3423     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3424     return env, nl, nl
3425
3426   def CheckPrereq(self):
3427     """Check prerequisites.
3428
3429     This checks that the instance is in the cluster.
3430
3431     """
3432     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3433     assert self.instance is not None, \
3434       "Cannot retrieve locked instance %s" % self.op.instance_name
3435
3436     # extra beparams
3437     self.beparams = getattr(self.op, "beparams", {})
3438     if self.beparams:
3439       if not isinstance(self.beparams, dict):
3440         raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3441                                    " dict" % (type(self.beparams), ))
3442       # fill the beparams dict
3443       utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3444       self.op.beparams = self.beparams
3445
3446     # extra hvparams
3447     self.hvparams = getattr(self.op, "hvparams", {})
3448     if self.hvparams:
3449       if not isinstance(self.hvparams, dict):
3450         raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3451                                    " dict" % (type(self.hvparams), ))
3452
3453       # check hypervisor parameter syntax (locally)
3454       cluster = self.cfg.GetClusterInfo()
3455       utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3456       filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3457                                     instance.hvparams)
3458       filled_hvp.update(self.hvparams)
3459       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3460       hv_type.CheckParameterSyntax(filled_hvp)
3461       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3462       self.op.hvparams = self.hvparams
3463
3464     _CheckNodeOnline(self, instance.primary_node)
3465
3466     bep = self.cfg.GetClusterInfo().FillBE(instance)
3467     # check bridges existence
3468     _CheckInstanceBridgesExist(self, instance)
3469
3470     remote_info = self.rpc.call_instance_info(instance.primary_node,
3471                                               instance.name,
3472                                               instance.hypervisor)
3473     remote_info.Raise("Error checking node %s" % instance.primary_node,
3474                       prereq=True)
3475     if not remote_info.payload: # not running already
3476       _CheckNodeFreeMemory(self, instance.primary_node,
3477                            "starting instance %s" % instance.name,
3478                            bep[constants.BE_MEMORY], instance.hypervisor)
3479
3480   def Exec(self, feedback_fn):
3481     """Start the instance.
3482
3483     """
3484     instance = self.instance
3485     force = self.op.force
3486
3487     self.cfg.MarkInstanceUp(instance.name)
3488
3489     node_current = instance.primary_node
3490
3491     _StartInstanceDisks(self, instance, force)
3492
3493     result = self.rpc.call_instance_start(node_current, instance,
3494                                           self.hvparams, self.beparams)
3495     msg = result.fail_msg
3496     if msg:
3497       _ShutdownInstanceDisks(self, instance)
3498       raise errors.OpExecError("Could not start instance: %s" % msg)
3499
3500
3501 class LURebootInstance(LogicalUnit):
3502   """Reboot an instance.
3503
3504   """
3505   HPATH = "instance-reboot"
3506   HTYPE = constants.HTYPE_INSTANCE
3507   _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3508   REQ_BGL = False
3509
3510   def ExpandNames(self):
3511     if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3512                                    constants.INSTANCE_REBOOT_HARD,
3513                                    constants.INSTANCE_REBOOT_FULL]:
3514       raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3515                                   (constants.INSTANCE_REBOOT_SOFT,
3516                                    constants.INSTANCE_REBOOT_HARD,
3517                                    constants.INSTANCE_REBOOT_FULL))
3518     self._ExpandAndLockInstance()
3519
3520   def BuildHooksEnv(self):
3521     """Build hooks env.
3522
3523     This runs on master, primary and secondary nodes of the instance.
3524
3525     """
3526     env = {
3527       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3528       "REBOOT_TYPE": self.op.reboot_type,
3529       }
3530     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3531     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3532     return env, nl, nl
3533
3534   def CheckPrereq(self):
3535     """Check prerequisites.
3536
3537     This checks that the instance is in the cluster.
3538
3539     """
3540     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3541     assert self.instance is not None, \
3542       "Cannot retrieve locked instance %s" % self.op.instance_name
3543
3544     _CheckNodeOnline(self, instance.primary_node)
3545
3546     # check bridges existence
3547     _CheckInstanceBridgesExist(self, instance)
3548
3549   def Exec(self, feedback_fn):
3550     """Reboot the instance.
3551
3552     """
3553     instance = self.instance
3554     ignore_secondaries = self.op.ignore_secondaries
3555     reboot_type = self.op.reboot_type
3556
3557     node_current = instance.primary_node
3558
3559     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3560                        constants.INSTANCE_REBOOT_HARD]:
3561       for disk in instance.disks:
3562         self.cfg.SetDiskID(disk, node_current)
3563       result = self.rpc.call_instance_reboot(node_current, instance,
3564                                              reboot_type)
3565       result.Raise("Could not reboot instance")
3566     else:
3567       result = self.rpc.call_instance_shutdown(node_current, instance)
3568       result.Raise("Could not shutdown instance for full reboot")
3569       _ShutdownInstanceDisks(self, instance)
3570       _StartInstanceDisks(self, instance, ignore_secondaries)
3571       result = self.rpc.call_instance_start(node_current, instance, None, None)
3572       msg = result.fail_msg
3573       if msg:
3574         _ShutdownInstanceDisks(self, instance)
3575         raise errors.OpExecError("Could not start instance for"
3576                                  " full reboot: %s" % msg)
3577
3578     self.cfg.MarkInstanceUp(instance.name)
3579
3580
3581 class LUShutdownInstance(LogicalUnit):
3582   """Shutdown an instance.
3583
3584   """
3585   HPATH = "instance-stop"
3586   HTYPE = constants.HTYPE_INSTANCE
3587   _OP_REQP = ["instance_name"]
3588   REQ_BGL = False
3589
3590   def ExpandNames(self):
3591     self._ExpandAndLockInstance()
3592
3593   def BuildHooksEnv(self):
3594     """Build hooks env.
3595
3596     This runs on master, primary and secondary nodes of the instance.
3597
3598     """
3599     env = _BuildInstanceHookEnvByObject(self, self.instance)
3600     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3601     return env, nl, nl
3602
3603   def CheckPrereq(self):
3604     """Check prerequisites.
3605
3606     This checks that the instance is in the cluster.
3607
3608     """
3609     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3610     assert self.instance is not None, \
3611       "Cannot retrieve locked instance %s" % self.op.instance_name
3612     _CheckNodeOnline(self, self.instance.primary_node)
3613
3614   def Exec(self, feedback_fn):
3615     """Shutdown the instance.
3616
3617     """
3618     instance = self.instance
3619     node_current = instance.primary_node
3620     self.cfg.MarkInstanceDown(instance.name)
3621     result = self.rpc.call_instance_shutdown(node_current, instance)
3622     msg = result.fail_msg
3623     if msg:
3624       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3625
3626     _ShutdownInstanceDisks(self, instance)
3627
3628
3629 class LUReinstallInstance(LogicalUnit):
3630   """Reinstall an instance.
3631
3632   """
3633   HPATH = "instance-reinstall"
3634   HTYPE = constants.HTYPE_INSTANCE
3635   _OP_REQP = ["instance_name"]
3636   REQ_BGL = False
3637
3638   def ExpandNames(self):
3639     self._ExpandAndLockInstance()
3640
3641   def BuildHooksEnv(self):
3642     """Build hooks env.
3643
3644     This runs on master, primary and secondary nodes of the instance.
3645
3646     """
3647     env = _BuildInstanceHookEnvByObject(self, self.instance)
3648     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3649     return env, nl, nl
3650
3651   def CheckPrereq(self):
3652     """Check prerequisites.
3653
3654     This checks that the instance is in the cluster and is not running.
3655
3656     """
3657     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3658     assert instance is not None, \
3659       "Cannot retrieve locked instance %s" % self.op.instance_name
3660     _CheckNodeOnline(self, instance.primary_node)
3661
3662     if instance.disk_template == constants.DT_DISKLESS:
3663       raise errors.OpPrereqError("Instance '%s' has no disks" %
3664                                  self.op.instance_name)
3665     if instance.admin_up:
3666       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3667                                  self.op.instance_name)
3668     remote_info = self.rpc.call_instance_info(instance.primary_node,
3669                                               instance.name,
3670                                               instance.hypervisor)
3671     remote_info.Raise("Error checking node %s" % instance.primary_node,
3672                       prereq=True)
3673     if remote_info.payload:
3674       raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3675                                  (self.op.instance_name,
3676                                   instance.primary_node))
3677
3678     self.op.os_type = getattr(self.op, "os_type", None)
3679     if self.op.os_type is not None:
3680       # OS verification
3681       pnode = self.cfg.GetNodeInfo(
3682         self.cfg.ExpandNodeName(instance.primary_node))
3683       if pnode is None:
3684         raise errors.OpPrereqError("Primary node '%s' is unknown" %
3685                                    self.op.pnode)
3686       result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3687       result.Raise("OS '%s' not in supported OS list for primary node %s" %
3688                    (self.op.os_type, pnode.name), prereq=True)
3689
3690     self.instance = instance
3691
3692   def Exec(self, feedback_fn):
3693     """Reinstall the instance.
3694
3695     """
3696     inst = self.instance
3697
3698     if self.op.os_type is not None:
3699       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3700       inst.os = self.op.os_type
3701       self.cfg.Update(inst)
3702
3703     _StartInstanceDisks(self, inst, None)
3704     try:
3705       feedback_fn("Running the instance OS create scripts...")
3706       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3707       result.Raise("Could not install OS for instance %s on node %s" %
3708                    (inst.name, inst.primary_node))
3709     finally:
3710       _ShutdownInstanceDisks(self, inst)
3711
3712
3713 class LURecreateInstanceDisks(LogicalUnit):
3714   """Recreate an instance's missing disks.
3715
3716   """
3717   HPATH = "instance-recreate-disks"
3718   HTYPE = constants.HTYPE_INSTANCE
3719   _OP_REQP = ["instance_name", "disks"]
3720   REQ_BGL = False
3721
3722   def CheckArguments(self):
3723     """Check the arguments.
3724
3725     """
3726     if not isinstance(self.op.disks, list):
3727       raise errors.OpPrereqError("Invalid disks parameter")
3728     for item in self.op.disks:
3729       if (not isinstance(item, int) or
3730           item < 0):
3731         raise errors.OpPrereqError("Invalid disk specification '%s'" %
3732                                    str(item))
3733
3734   def ExpandNames(self):
3735     self._ExpandAndLockInstance()
3736
3737   def BuildHooksEnv(self):
3738     """Build hooks env.
3739
3740     This runs on master, primary and secondary nodes of the instance.
3741
3742     """
3743     env = _BuildInstanceHookEnvByObject(self, self.instance)
3744     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3745     return env, nl, nl
3746
3747   def CheckPrereq(self):
3748     """Check prerequisites.
3749
3750     This checks that the instance is in the cluster and is not running.
3751
3752     """
3753     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3754     assert instance is not None, \
3755       "Cannot retrieve locked instance %s" % self.op.instance_name
3756     _CheckNodeOnline(self, instance.primary_node)
3757
3758     if instance.disk_template == constants.DT_DISKLESS:
3759       raise errors.OpPrereqError("Instance '%s' has no disks" %
3760                                  self.op.instance_name)
3761     if instance.admin_up:
3762       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3763                                  self.op.instance_name)
3764     remote_info = self.rpc.call_instance_info(instance.primary_node,
3765                                               instance.name,
3766                                               instance.hypervisor)
3767     remote_info.Raise("Error checking node %s" % instance.primary_node,
3768                       prereq=True)
3769     if remote_info.payload:
3770       raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3771                                  (self.op.instance_name,
3772                                   instance.primary_node))
3773
3774     if not self.op.disks:
3775       self.op.disks = range(len(instance.disks))
3776     else:
3777       for idx in self.op.disks:
3778         if idx >= len(instance.disks):
3779           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx)
3780
3781     self.instance = instance
3782
3783   def Exec(self, feedback_fn):
3784     """Recreate the disks.
3785
3786     """
3787     to_skip = []
3788     for idx, disk in enumerate(self.instance.disks):
3789       if idx not in self.op.disks: # disk idx has not been passed in
3790         to_skip.append(idx)
3791         continue
3792
3793     _CreateDisks(self, self.instance, to_skip=to_skip)
3794
3795
3796 class LURenameInstance(LogicalUnit):
3797   """Rename an instance.
3798
3799   """
3800   HPATH = "instance-rename"
3801   HTYPE = constants.HTYPE_INSTANCE
3802   _OP_REQP = ["instance_name", "new_name"]
3803
3804   def BuildHooksEnv(self):
3805     """Build hooks env.
3806
3807     This runs on master, primary and secondary nodes of the instance.
3808
3809     """
3810     env = _BuildInstanceHookEnvByObject(self, self.instance)
3811     env["INSTANCE_NEW_NAME"] = self.op.new_name
3812     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3813     return env, nl, nl
3814
3815   def CheckPrereq(self):
3816     """Check prerequisites.
3817
3818     This checks that the instance is in the cluster and is not running.
3819
3820     """
3821     instance = self.cfg.GetInstanceInfo(
3822       self.cfg.ExpandInstanceName(self.op.instance_name))
3823     if instance is None:
3824       raise errors.OpPrereqError("Instance '%s' not known" %
3825                                  self.op.instance_name)
3826     _CheckNodeOnline(self, instance.primary_node)
3827
3828     if instance.admin_up:
3829       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3830                                  self.op.instance_name)
3831     remote_info = self.rpc.call_instance_info(instance.primary_node,
3832                                               instance.name,
3833                                               instance.hypervisor)
3834     remote_info.Raise("Error checking node %s" % instance.primary_node,
3835                       prereq=True)
3836     if remote_info.payload:
3837       raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3838                                  (self.op.instance_name,
3839                                   instance.primary_node))
3840     self.instance = instance
3841
3842     # new name verification
3843     name_info = utils.HostInfo(self.op.new_name)
3844
3845     self.op.new_name = new_name = name_info.name
3846     instance_list = self.cfg.GetInstanceList()
3847     if new_name in instance_list:
3848       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3849                                  new_name)
3850
3851     if not getattr(self.op, "ignore_ip", False):
3852       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3853         raise errors.OpPrereqError("IP %s of instance %s already in use" %
3854                                    (name_info.ip, new_name))
3855
3856
3857   def Exec(self, feedback_fn):
3858     """Reinstall the instance.
3859
3860     """
3861     inst = self.instance
3862     old_name = inst.name
3863
3864     if inst.disk_template == constants.DT_FILE:
3865       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3866
3867     self.cfg.RenameInstance(inst.name, self.op.new_name)
3868     # Change the instance lock. This is definitely safe while we hold the BGL
3869     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3870     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3871
3872     # re-read the instance from the configuration after rename
3873     inst = self.cfg.GetInstanceInfo(self.op.new_name)
3874
3875     if inst.disk_template == constants.DT_FILE:
3876       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3877       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3878                                                      old_file_storage_dir,
3879                                                      new_file_storage_dir)
3880       result.Raise("Could not rename on node %s directory '%s' to '%s'"
3881                    " (but the instance has been renamed in Ganeti)" %
3882                    (inst.primary_node, old_file_storage_dir,
3883                     new_file_storage_dir))
3884
3885     _StartInstanceDisks(self, inst, None)
3886     try:
3887       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3888                                                  old_name)
3889       msg = result.fail_msg
3890       if msg:
3891         msg = ("Could not run OS rename script for instance %s on node %s"
3892                " (but the instance has been renamed in Ganeti): %s" %
3893                (inst.name, inst.primary_node, msg))
3894         self.proc.LogWarning(msg)
3895     finally:
3896       _ShutdownInstanceDisks(self, inst)
3897
3898
3899 class LURemoveInstance(LogicalUnit):
3900   """Remove an instance.
3901
3902   """
3903   HPATH = "instance-remove"
3904   HTYPE = constants.HTYPE_INSTANCE
3905   _OP_REQP = ["instance_name", "ignore_failures"]
3906   REQ_BGL = False
3907
3908   def ExpandNames(self):
3909     self._ExpandAndLockInstance()
3910     self.needed_locks[locking.LEVEL_NODE] = []
3911     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3912
3913   def DeclareLocks(self, level):
3914     if level == locking.LEVEL_NODE:
3915       self._LockInstancesNodes()
3916
3917   def BuildHooksEnv(self):
3918     """Build hooks env.
3919
3920     This runs on master, primary and secondary nodes of the instance.
3921
3922     """
3923     env = _BuildInstanceHookEnvByObject(self, self.instance)
3924     nl = [self.cfg.GetMasterNode()]
3925     return env, nl, nl
3926
3927   def CheckPrereq(self):
3928     """Check prerequisites.
3929
3930     This checks that the instance is in the cluster.
3931
3932     """
3933     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3934     assert self.instance is not None, \
3935       "Cannot retrieve locked instance %s" % self.op.instance_name
3936
3937   def Exec(self, feedback_fn):
3938     """Remove the instance.
3939
3940     """
3941     instance = self.instance
3942     logging.info("Shutting down instance %s on node %s",
3943                  instance.name, instance.primary_node)
3944
3945     result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3946     msg = result.fail_msg
3947     if msg:
3948       if self.op.ignore_failures:
3949         feedback_fn("Warning: can't shutdown instance: %s" % msg)
3950       else:
3951         raise errors.OpExecError("Could not shutdown instance %s on"
3952                                  " node %s: %s" %
3953                                  (instance.name, instance.primary_node, msg))
3954
3955     logging.info("Removing block devices for instance %s", instance.name)
3956
3957     if not _RemoveDisks(self, instance):
3958       if self.op.ignore_failures:
3959         feedback_fn("Warning: can't remove instance's disks")
3960       else:
3961         raise errors.OpExecError("Can't remove instance's disks")
3962
3963     logging.info("Removing instance %s out of cluster config", instance.name)
3964
3965     self.cfg.RemoveInstance(instance.name)
3966     self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3967
3968
3969 class LUQueryInstances(NoHooksLU):
3970   """Logical unit for querying instances.
3971
3972   """
3973   _OP_REQP = ["output_fields", "names", "use_locking"]
3974   REQ_BGL = False
3975   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
3976                     "serial_no", "ctime", "mtime", "uuid"]
3977   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3978                                     "admin_state",
3979                                     "disk_template", "ip", "mac", "bridge",
3980                                     "nic_mode", "nic_link",
3981                                     "sda_size", "sdb_size", "vcpus", "tags",
3982                                     "network_port", "beparams",
3983                                     r"(disk)\.(size)/([0-9]+)",
3984                                     r"(disk)\.(sizes)", "disk_usage",
3985                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
3986                                     r"(nic)\.(bridge)/([0-9]+)",
3987                                     r"(nic)\.(macs|ips|modes|links|bridges)",
3988                                     r"(disk|nic)\.(count)",
3989                                     "hvparams",
3990                                     ] + _SIMPLE_FIELDS +
3991                                   ["hv/%s" % name
3992                                    for name in constants.HVS_PARAMETERS] +
3993                                   ["be/%s" % name
3994                                    for name in constants.BES_PARAMETERS])
3995   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3996
3997
3998   def ExpandNames(self):
3999     _CheckOutputFields(static=self._FIELDS_STATIC,
4000                        dynamic=self._FIELDS_DYNAMIC,
4001                        selected=self.op.output_fields)
4002
4003     self.needed_locks = {}
4004     self.share_locks[locking.LEVEL_INSTANCE] = 1
4005     self.share_locks[locking.LEVEL_NODE] = 1
4006
4007     if self.op.names:
4008       self.wanted = _GetWantedInstances(self, self.op.names)
4009     else:
4010       self.wanted = locking.ALL_SET
4011
4012     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4013     self.do_locking = self.do_node_query and self.op.use_locking
4014     if self.do_locking:
4015       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4016       self.needed_locks[locking.LEVEL_NODE] = []
4017       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4018
4019   def DeclareLocks(self, level):
4020     if level == locking.LEVEL_NODE and self.do_locking:
4021       self._LockInstancesNodes()
4022
4023   def CheckPrereq(self):
4024     """Check prerequisites.
4025
4026     """
4027     pass
4028
4029   def Exec(self, feedback_fn):
4030     """Computes the list of nodes and their attributes.
4031
4032     """
4033     all_info = self.cfg.GetAllInstancesInfo()
4034     if self.wanted == locking.ALL_SET:
4035       # caller didn't specify instance names, so ordering is not important
4036       if self.do_locking:
4037         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4038       else:
4039         instance_names = all_info.keys()
4040       instance_names = utils.NiceSort(instance_names)
4041     else:
4042       # caller did specify names, so we must keep the ordering
4043       if self.do_locking:
4044         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4045       else:
4046         tgt_set = all_info.keys()
4047       missing = set(self.wanted).difference(tgt_set)
4048       if missing:
4049         raise errors.OpExecError("Some instances were removed before"
4050                                  " retrieving their data: %s" % missing)
4051       instance_names = self.wanted
4052
4053     instance_list = [all_info[iname] for iname in instance_names]
4054
4055     # begin data gathering
4056
4057     nodes = frozenset([inst.primary_node for inst in instance_list])
4058     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4059
4060     bad_nodes = []
4061     off_nodes = []
4062     if self.do_node_query:
4063       live_data = {}
4064       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4065       for name in nodes:
4066         result = node_data[name]
4067         if result.offline:
4068           # offline nodes will be in both lists
4069           off_nodes.append(name)
4070         if result.fail_msg:
4071           bad_nodes.append(name)
4072         else:
4073           if result.payload:
4074             live_data.update(result.payload)
4075           # else no instance is alive
4076     else:
4077       live_data = dict([(name, {}) for name in instance_names])
4078
4079     # end data gathering
4080
4081     HVPREFIX = "hv/"
4082     BEPREFIX = "be/"
4083     output = []
4084     cluster = self.cfg.GetClusterInfo()
4085     for instance in instance_list:
4086       iout = []
4087       i_hv = cluster.FillHV(instance)
4088       i_be = cluster.FillBE(instance)
4089       i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4090                                  nic.nicparams) for nic in instance.nics]
4091       for field in self.op.output_fields:
4092         st_match = self._FIELDS_STATIC.Matches(field)
4093         if field in self._SIMPLE_FIELDS:
4094           val = getattr(instance, field)
4095         elif field == "pnode":
4096           val = instance.primary_node
4097         elif field == "snodes":
4098           val = list(instance.secondary_nodes)
4099         elif field == "admin_state":
4100           val = instance.admin_up
4101         elif field == "oper_state":
4102           if instance.primary_node in bad_nodes:
4103             val = None
4104           else:
4105             val = bool(live_data.get(instance.name))
4106         elif field == "status":
4107           if instance.primary_node in off_nodes:
4108             val = "ERROR_nodeoffline"
4109           elif instance.primary_node in bad_nodes:
4110             val = "ERROR_nodedown"
4111           else:
4112             running = bool(live_data.get(instance.name))
4113             if running:
4114               if instance.admin_up:
4115                 val = "running"
4116               else:
4117                 val = "ERROR_up"
4118             else:
4119               if instance.admin_up:
4120                 val = "ERROR_down"
4121               else:
4122                 val = "ADMIN_down"
4123         elif field == "oper_ram":
4124           if instance.primary_node in bad_nodes:
4125             val = None
4126           elif instance.name in live_data:
4127             val = live_data[instance.name].get("memory", "?")
4128           else:
4129             val = "-"
4130         elif field == "vcpus":
4131           val = i_be[constants.BE_VCPUS]
4132         elif field == "disk_template":
4133           val = instance.disk_template
4134         elif field == "ip":
4135           if instance.nics:
4136             val = instance.nics[0].ip
4137           else:
4138             val = None
4139         elif field == "nic_mode":
4140           if instance.nics:
4141             val = i_nicp[0][constants.NIC_MODE]
4142           else:
4143             val = None
4144         elif field == "nic_link":
4145           if instance.nics:
4146             val = i_nicp[0][constants.NIC_LINK]
4147           else:
4148             val = None
4149         elif field == "bridge":
4150           if (instance.nics and
4151               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4152             val = i_nicp[0][constants.NIC_LINK]
4153           else:
4154             val = None
4155         elif field == "mac":
4156           if instance.nics:
4157             val = instance.nics[0].mac
4158           else:
4159             val = None
4160         elif field == "sda_size" or field == "sdb_size":
4161           idx = ord(field[2]) - ord('a')
4162           try:
4163             val = instance.FindDisk(idx).size
4164           except errors.OpPrereqError:
4165             val = None
4166         elif field == "disk_usage": # total disk usage per node
4167           disk_sizes = [{'size': disk.size} for disk in instance.disks]
4168           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4169         elif field == "tags":
4170           val = list(instance.GetTags())
4171         elif field == "hvparams":
4172           val = i_hv
4173         elif (field.startswith(HVPREFIX) and
4174               field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
4175           val = i_hv.get(field[len(HVPREFIX):], None)
4176         elif field == "beparams":
4177           val = i_be
4178         elif (field.startswith(BEPREFIX) and
4179               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4180           val = i_be.get(field[len(BEPREFIX):], None)
4181         elif st_match and st_match.groups():
4182           # matches a variable list
4183           st_groups = st_match.groups()
4184           if st_groups and st_groups[0] == "disk":
4185             if st_groups[1] == "count":
4186               val = len(instance.disks)
4187             elif st_groups[1] == "sizes":
4188               val = [disk.size for disk in instance.disks]
4189             elif st_groups[1] == "size":
4190               try:
4191                 val = instance.FindDisk(st_groups[2]).size
4192               except errors.OpPrereqError:
4193                 val = None
4194             else:
4195               assert False, "Unhandled disk parameter"
4196           elif st_groups[0] == "nic":
4197             if st_groups[1] == "count":
4198               val = len(instance.nics)
4199             elif st_groups[1] == "macs":
4200               val = [nic.mac for nic in instance.nics]
4201             elif st_groups[1] == "ips":
4202               val = [nic.ip for nic in instance.nics]
4203             elif st_groups[1] == "modes":
4204               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4205             elif st_groups[1] == "links":
4206               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4207             elif st_groups[1] == "bridges":
4208               val = []
4209               for nicp in i_nicp:
4210                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4211                   val.append(nicp[constants.NIC_LINK])
4212                 else:
4213                   val.append(None)
4214             else:
4215               # index-based item
4216               nic_idx = int(st_groups[2])
4217               if nic_idx >= len(instance.nics):
4218                 val = None
4219               else:
4220                 if st_groups[1] == "mac":
4221                   val = instance.nics[nic_idx].mac
4222                 elif st_groups[1] == "ip":
4223                   val = instance.nics[nic_idx].ip
4224                 elif st_groups[1] == "mode":
4225                   val = i_nicp[nic_idx][constants.NIC_MODE]
4226                 elif st_groups[1] == "link":
4227                   val = i_nicp[nic_idx][constants.NIC_LINK]
4228                 elif st_groups[1] == "bridge":
4229                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4230                   if nic_mode == constants.NIC_MODE_BRIDGED:
4231                     val = i_nicp[nic_idx][constants.NIC_LINK]
4232                   else:
4233                     val = None
4234                 else:
4235                   assert False, "Unhandled NIC parameter"
4236           else:
4237             assert False, ("Declared but unhandled variable parameter '%s'" %
4238                            field)
4239         else:
4240           assert False, "Declared but unhandled parameter '%s'" % field
4241         iout.append(val)
4242       output.append(iout)
4243
4244     return output
4245
4246
4247 class LUFailoverInstance(LogicalUnit):
4248   """Failover an instance.
4249
4250   """
4251   HPATH = "instance-failover"
4252   HTYPE = constants.HTYPE_INSTANCE
4253   _OP_REQP = ["instance_name", "ignore_consistency"]
4254   REQ_BGL = False
4255
4256   def ExpandNames(self):
4257     self._ExpandAndLockInstance()
4258     self.needed_locks[locking.LEVEL_NODE] = []
4259     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4260
4261   def DeclareLocks(self, level):
4262     if level == locking.LEVEL_NODE:
4263       self._LockInstancesNodes()
4264
4265   def BuildHooksEnv(self):
4266     """Build hooks env.
4267
4268     This runs on master, primary and secondary nodes of the instance.
4269
4270     """
4271     env = {
4272       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4273       }
4274     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4275     nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4276     return env, nl, nl
4277
4278   def CheckPrereq(self):
4279     """Check prerequisites.
4280
4281     This checks that the instance is in the cluster.
4282
4283     """
4284     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4285     assert self.instance is not None, \
4286       "Cannot retrieve locked instance %s" % self.op.instance_name
4287
4288     bep = self.cfg.GetClusterInfo().FillBE(instance)
4289     if instance.disk_template not in constants.DTS_NET_MIRROR:
4290       raise errors.OpPrereqError("Instance's disk layout is not"
4291                                  " network mirrored, cannot failover.")
4292
4293     secondary_nodes = instance.secondary_nodes
4294     if not secondary_nodes:
4295       raise errors.ProgrammerError("no secondary node but using "
4296                                    "a mirrored disk template")
4297
4298     target_node = secondary_nodes[0]
4299     _CheckNodeOnline(self, target_node)
4300     _CheckNodeNotDrained(self, target_node)
4301     if instance.admin_up:
4302       # check memory requirements on the secondary node
4303       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4304                            instance.name, bep[constants.BE_MEMORY],
4305                            instance.hypervisor)
4306     else:
4307       self.LogInfo("Not checking memory on the secondary node as"
4308                    " instance will not be started")
4309
4310     # check bridge existance
4311     _CheckInstanceBridgesExist(self, instance, node=target_node)
4312
4313   def Exec(self, feedback_fn):
4314     """Failover an instance.
4315
4316     The failover is done by shutting it down on its present node and
4317     starting it on the secondary.
4318
4319     """
4320     instance = self.instance
4321
4322     source_node = instance.primary_node
4323     target_node = instance.secondary_nodes[0]
4324
4325     feedback_fn("* checking disk consistency between source and target")
4326     for dev in instance.disks:
4327       # for drbd, these are drbd over lvm
4328       if not _CheckDiskConsistency(self, dev, target_node, False):
4329         if instance.admin_up and not self.op.ignore_consistency:
4330           raise errors.OpExecError("Disk %s is degraded on target node,"
4331                                    " aborting failover." % dev.iv_name)
4332
4333     feedback_fn("* shutting down instance on source node")
4334     logging.info("Shutting down instance %s on node %s",
4335                  instance.name, source_node)
4336
4337     result = self.rpc.call_instance_shutdown(source_node, instance)
4338     msg = result.fail_msg
4339     if msg:
4340       if self.op.ignore_consistency:
4341         self.proc.LogWarning("Could not shutdown instance %s on node %s."
4342                              " Proceeding anyway. Please make sure node"
4343                              " %s is down. Error details: %s",
4344                              instance.name, source_node, source_node, msg)
4345       else:
4346         raise errors.OpExecError("Could not shutdown instance %s on"
4347                                  " node %s: %s" %
4348                                  (instance.name, source_node, msg))
4349
4350     feedback_fn("* deactivating the instance's disks on source node")
4351     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4352       raise errors.OpExecError("Can't shut down the instance's disks.")
4353
4354     instance.primary_node = target_node
4355     # distribute new instance config to the other nodes
4356     self.cfg.Update(instance)
4357
4358     # Only start the instance if it's marked as up
4359     if instance.admin_up:
4360       feedback_fn("* activating the instance's disks on target node")
4361       logging.info("Starting instance %s on node %s",
4362                    instance.name, target_node)
4363
4364       disks_ok, _ = _AssembleInstanceDisks(self, instance,
4365                                                ignore_secondaries=True)
4366       if not disks_ok:
4367         _ShutdownInstanceDisks(self, instance)
4368         raise errors.OpExecError("Can't activate the instance's disks")
4369
4370       feedback_fn("* starting the instance on the target node")
4371       result = self.rpc.call_instance_start(target_node, instance, None, None)
4372       msg = result.fail_msg
4373       if msg:
4374         _ShutdownInstanceDisks(self, instance)
4375         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4376                                  (instance.name, target_node, msg))
4377
4378
4379 class LUMigrateInstance(LogicalUnit):
4380   """Migrate an instance.
4381
4382   This is migration without shutting down, compared to the failover,
4383   which is done with shutdown.
4384
4385   """
4386   HPATH = "instance-migrate"
4387   HTYPE = constants.HTYPE_INSTANCE
4388   _OP_REQP = ["instance_name", "live", "cleanup"]
4389
4390   REQ_BGL = False
4391
4392   def ExpandNames(self):
4393     self._ExpandAndLockInstance()
4394
4395     self.needed_locks[locking.LEVEL_NODE] = []
4396     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4397
4398     self._migrater = TLMigrateInstance(self, self.op.instance_name,
4399                                        self.op.live, self.op.cleanup)
4400     self.tasklets = [self._migrater]
4401
4402   def DeclareLocks(self, level):
4403     if level == locking.LEVEL_NODE:
4404       self._LockInstancesNodes()
4405
4406   def BuildHooksEnv(self):
4407     """Build hooks env.
4408
4409     This runs on master, primary and secondary nodes of the instance.
4410
4411     """
4412     instance = self._migrater.instance
4413     env = _BuildInstanceHookEnvByObject(self, instance)
4414     env["MIGRATE_LIVE"] = self.op.live
4415     env["MIGRATE_CLEANUP"] = self.op.cleanup
4416     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4417     return env, nl, nl
4418
4419
4420 class LUMoveInstance(LogicalUnit):
4421   """Move an instance by data-copying.
4422
4423   """
4424   HPATH = "instance-move"
4425   HTYPE = constants.HTYPE_INSTANCE
4426   _OP_REQP = ["instance_name", "target_node"]
4427   REQ_BGL = False
4428
4429   def ExpandNames(self):
4430     self._ExpandAndLockInstance()
4431     target_node = self.cfg.ExpandNodeName(self.op.target_node)
4432     if target_node is None:
4433       raise errors.OpPrereqError("Node '%s' not known" %
4434                                   self.op.target_node)
4435     self.op.target_node = target_node
4436     self.needed_locks[locking.LEVEL_NODE] = [target_node]
4437     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4438
4439   def DeclareLocks(self, level):
4440     if level == locking.LEVEL_NODE:
4441       self._LockInstancesNodes(primary_only=True)
4442
4443   def BuildHooksEnv(self):
4444     """Build hooks env.
4445
4446     This runs on master, primary and secondary nodes of the instance.
4447
4448     """
4449     env = {
4450       "TARGET_NODE": self.op.target_node,
4451       }
4452     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4453     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4454                                        self.op.target_node]
4455     return env, nl, nl
4456
4457   def CheckPrereq(self):
4458     """Check prerequisites.
4459
4460     This checks that the instance is in the cluster.
4461
4462     """
4463     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4464     assert self.instance is not None, \
4465       "Cannot retrieve locked instance %s" % self.op.instance_name
4466
4467     node = self.cfg.GetNodeInfo(self.op.target_node)
4468     assert node is not None, \
4469       "Cannot retrieve locked node %s" % self.op.target_node
4470
4471     self.target_node = target_node = node.name
4472
4473     if target_node == instance.primary_node:
4474       raise errors.OpPrereqError("Instance %s is already on the node %s" %
4475                                  (instance.name, target_node))
4476
4477     bep = self.cfg.GetClusterInfo().FillBE(instance)
4478
4479     for idx, dsk in enumerate(instance.disks):
4480       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4481         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4482                                    " cannot copy")
4483
4484     _CheckNodeOnline(self, target_node)
4485     _CheckNodeNotDrained(self, target_node)
4486
4487     if instance.admin_up:
4488       # check memory requirements on the secondary node
4489       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4490                            instance.name, bep[constants.BE_MEMORY],
4491                            instance.hypervisor)
4492     else:
4493       self.LogInfo("Not checking memory on the secondary node as"
4494                    " instance will not be started")
4495
4496     # check bridge existance
4497     _CheckInstanceBridgesExist(self, instance, node=target_node)
4498
4499   def Exec(self, feedback_fn):
4500     """Move an instance.
4501
4502     The move is done by shutting it down on its present node, copying
4503     the data over (slow) and starting it on the new node.
4504
4505     """
4506     instance = self.instance
4507
4508     source_node = instance.primary_node
4509     target_node = self.target_node
4510
4511     self.LogInfo("Shutting down instance %s on source node %s",
4512                  instance.name, source_node)
4513
4514     result = self.rpc.call_instance_shutdown(source_node, instance)
4515     msg = result.fail_msg
4516     if msg:
4517       if self.op.ignore_consistency:
4518         self.proc.LogWarning("Could not shutdown instance %s on node %s."
4519                              " Proceeding anyway. Please make sure node"
4520                              " %s is down. Error details: %s",
4521                              instance.name, source_node, source_node, msg)
4522       else:
4523         raise errors.OpExecError("Could not shutdown instance %s on"
4524                                  " node %s: %s" %
4525                                  (instance.name, source_node, msg))
4526
4527     # create the target disks
4528     try:
4529       _CreateDisks(self, instance, target_node=target_node)
4530     except errors.OpExecError:
4531       self.LogWarning("Device creation failed, reverting...")
4532       try:
4533         _RemoveDisks(self, instance, target_node=target_node)
4534       finally:
4535         self.cfg.ReleaseDRBDMinors(instance.name)
4536         raise
4537
4538     cluster_name = self.cfg.GetClusterInfo().cluster_name
4539
4540     errs = []
4541     # activate, get path, copy the data over
4542     for idx, disk in enumerate(instance.disks):
4543       self.LogInfo("Copying data for disk %d", idx)
4544       result = self.rpc.call_blockdev_assemble(target_node, disk,
4545                                                instance.name, True)
4546       if result.fail_msg:
4547         self.LogWarning("Can't assemble newly created disk %d: %s",
4548                         idx, result.fail_msg)
4549         errs.append(result.fail_msg)
4550         break
4551       dev_path = result.payload
4552       result = self.rpc.call_blockdev_export(source_node, disk,
4553                                              target_node, dev_path,
4554                                              cluster_name)
4555       if result.fail_msg:
4556         self.LogWarning("Can't copy data over for disk %d: %s",
4557                         idx, result.fail_msg)
4558         errs.append(result.fail_msg)
4559         break
4560
4561     if errs:
4562       self.LogWarning("Some disks failed to copy, aborting")
4563       try:
4564         _RemoveDisks(self, instance, target_node=target_node)
4565       finally:
4566         self.cfg.ReleaseDRBDMinors(instance.name)
4567         raise errors.OpExecError("Errors during disk copy: %s" %
4568                                  (",".join(errs),))
4569
4570     instance.primary_node = target_node
4571     self.cfg.Update(instance)
4572
4573     self.LogInfo("Removing the disks on the original node")
4574     _RemoveDisks(self, instance, target_node=source_node)
4575
4576     # Only start the instance if it's marked as up
4577     if instance.admin_up:
4578       self.LogInfo("Starting instance %s on node %s",
4579                    instance.name, target_node)
4580
4581       disks_ok, _ = _AssembleInstanceDisks(self, instance,
4582                                            ignore_secondaries=True)
4583       if not disks_ok:
4584         _ShutdownInstanceDisks(self, instance)
4585         raise errors.OpExecError("Can't activate the instance's disks")
4586
4587       result = self.rpc.call_instance_start(target_node, instance, None, None)
4588       msg = result.fail_msg
4589       if msg:
4590         _ShutdownInstanceDisks(self, instance)
4591         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4592                                  (instance.name, target_node, msg))
4593
4594
4595 class LUMigrateNode(LogicalUnit):
4596   """Migrate all instances from a node.
4597
4598   """
4599   HPATH = "node-migrate"
4600   HTYPE = constants.HTYPE_NODE
4601   _OP_REQP = ["node_name", "live"]
4602   REQ_BGL = False
4603
4604   def ExpandNames(self):
4605     self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4606     if self.op.node_name is None:
4607       raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
4608
4609     self.needed_locks = {
4610       locking.LEVEL_NODE: [self.op.node_name],
4611       }
4612
4613     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4614
4615     # Create tasklets for migrating instances for all instances on this node
4616     names = []
4617     tasklets = []
4618
4619     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4620       logging.debug("Migrating instance %s", inst.name)
4621       names.append(inst.name)
4622
4623       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4624
4625     self.tasklets = tasklets
4626
4627     # Declare instance locks
4628     self.needed_locks[locking.LEVEL_INSTANCE] = names
4629
4630   def DeclareLocks(self, level):
4631     if level == locking.LEVEL_NODE:
4632       self._LockInstancesNodes()
4633
4634   def BuildHooksEnv(self):
4635     """Build hooks env.
4636
4637     This runs on the master, the primary and all the secondaries.
4638
4639     """
4640     env = {
4641       "NODE_NAME": self.op.node_name,
4642       }
4643
4644     nl = [self.cfg.GetMasterNode()]
4645
4646     return (env, nl, nl)
4647
4648
4649 class TLMigrateInstance(Tasklet):
4650   def __init__(self, lu, instance_name, live, cleanup):
4651     """Initializes this class.
4652
4653     """
4654     Tasklet.__init__(self, lu)
4655
4656     # Parameters
4657     self.instance_name = instance_name
4658     self.live = live
4659     self.cleanup = cleanup
4660
4661   def CheckPrereq(self):
4662     """Check prerequisites.
4663
4664     This checks that the instance is in the cluster.
4665
4666     """
4667     instance = self.cfg.GetInstanceInfo(
4668       self.cfg.ExpandInstanceName(self.instance_name))
4669     if instance is None:
4670       raise errors.OpPrereqError("Instance '%s' not known" %
4671                                  self.instance_name)
4672
4673     if instance.disk_template != constants.DT_DRBD8:
4674       raise errors.OpPrereqError("Instance's disk layout is not"
4675                                  " drbd8, cannot migrate.")
4676
4677     secondary_nodes = instance.secondary_nodes
4678     if not secondary_nodes:
4679       raise errors.ConfigurationError("No secondary node but using"
4680                                       " drbd8 disk template")
4681
4682     i_be = self.cfg.GetClusterInfo().FillBE(instance)
4683
4684     target_node = secondary_nodes[0]
4685     # check memory requirements on the secondary node
4686     _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
4687                          instance.name, i_be[constants.BE_MEMORY],
4688                          instance.hypervisor)
4689
4690     # check bridge existance
4691     _CheckInstanceBridgesExist(self, instance, node=target_node)
4692
4693     if not self.cleanup:
4694       _CheckNodeNotDrained(self, target_node)
4695       result = self.rpc.call_instance_migratable(instance.primary_node,
4696                                                  instance)
4697       result.Raise("Can't migrate, please use failover", prereq=True)
4698
4699     self.instance = instance
4700
4701   def _WaitUntilSync(self):
4702     """Poll with custom rpc for disk sync.
4703
4704     This uses our own step-based rpc call.
4705
4706     """
4707     self.feedback_fn("* wait until resync is done")
4708     all_done = False
4709     while not all_done:
4710       all_done = True
4711       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4712                                             self.nodes_ip,
4713                                             self.instance.disks)
4714       min_percent = 100
4715       for node, nres in result.items():
4716         nres.Raise("Cannot resync disks on node %s" % node)
4717         node_done, node_percent = nres.payload
4718         all_done = all_done and node_done
4719         if node_percent is not None:
4720           min_percent = min(min_percent, node_percent)
4721       if not all_done:
4722         if min_percent < 100:
4723           self.feedback_fn("   - progress: %.1f%%" % min_percent)
4724         time.sleep(2)
4725
4726   def _EnsureSecondary(self, node):
4727     """Demote a node to secondary.
4728
4729     """
4730     self.feedback_fn("* switching node %s to secondary mode" % node)
4731
4732     for dev in self.instance.disks:
4733       self.cfg.SetDiskID(dev, node)
4734
4735     result = self.rpc.call_blockdev_close(node, self.instance.name,
4736                                           self.instance.disks)
4737     result.Raise("Cannot change disk to secondary on node %s" % node)
4738
4739   def _GoStandalone(self):
4740     """Disconnect from the network.
4741
4742     """
4743     self.feedback_fn("* changing into standalone mode")
4744     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4745                                                self.instance.disks)
4746     for node, nres in result.items():
4747       nres.Raise("Cannot disconnect disks node %s" % node)
4748
4749   def _GoReconnect(self, multimaster):
4750     """Reconnect to the network.
4751
4752     """
4753     if multimaster:
4754       msg = "dual-master"
4755     else:
4756       msg = "single-master"
4757     self.feedback_fn("* changing disks into %s mode" % msg)
4758     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4759                                            self.instance.disks,
4760                                            self.instance.name, multimaster)
4761     for node, nres in result.items():
4762       nres.Raise("Cannot change disks config on node %s" % node)
4763
4764   def _ExecCleanup(self):
4765     """Try to cleanup after a failed migration.
4766
4767     The cleanup is done by:
4768       - check that the instance is running only on one node
4769         (and update the config if needed)
4770       - change disks on its secondary node to secondary
4771       - wait until disks are fully synchronized
4772       - disconnect from the network
4773       - change disks into single-master mode
4774       - wait again until disks are fully synchronized
4775
4776     """
4777     instance = self.instance
4778     target_node = self.target_node
4779     source_node = self.source_node
4780
4781     # check running on only one node
4782     self.feedback_fn("* checking where the instance actually runs"
4783                      " (if this hangs, the hypervisor might be in"
4784                      " a bad state)")
4785     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4786     for node, result in ins_l.items():
4787       result.Raise("Can't contact node %s" % node)
4788
4789     runningon_source = instance.name in ins_l[source_node].payload
4790     runningon_target = instance.name in ins_l[target_node].payload
4791
4792     if runningon_source and runningon_target:
4793       raise errors.OpExecError("Instance seems to be running on two nodes,"
4794                                " or the hypervisor is confused. You will have"
4795                                " to ensure manually that it runs only on one"
4796                                " and restart this operation.")
4797
4798     if not (runningon_source or runningon_target):
4799       raise errors.OpExecError("Instance does not seem to be running at all."
4800                                " In this case, it's safer to repair by"
4801                                " running 'gnt-instance stop' to ensure disk"
4802                                " shutdown, and then restarting it.")
4803
4804     if runningon_target:
4805       # the migration has actually succeeded, we need to update the config
4806       self.feedback_fn("* instance running on secondary node (%s),"
4807                        " updating config" % target_node)
4808       instance.primary_node = target_node
4809       self.cfg.Update(instance)
4810       demoted_node = source_node
4811     else:
4812       self.feedback_fn("* instance confirmed to be running on its"
4813                        " primary node (%s)" % source_node)
4814       demoted_node = target_node
4815
4816     self._EnsureSecondary(demoted_node)
4817     try:
4818       self._WaitUntilSync()
4819     except errors.OpExecError:
4820       # we ignore here errors, since if the device is standalone, it
4821       # won't be able to sync
4822       pass
4823     self._GoStandalone()
4824     self._GoReconnect(False)
4825     self._WaitUntilSync()
4826
4827     self.feedback_fn("* done")
4828
4829   def _RevertDiskStatus(self):
4830     """Try to revert the disk status after a failed migration.
4831
4832     """
4833     target_node = self.target_node
4834     try:
4835       self._EnsureSecondary(target_node)
4836       self._GoStandalone()
4837       self._GoReconnect(False)
4838       self._WaitUntilSync()
4839     except errors.OpExecError, err:
4840       self.lu.LogWarning("Migration failed and I can't reconnect the"
4841                          " drives: error '%s'\n"
4842                          "Please look and recover the instance status" %
4843                          str(err))
4844
4845   def _AbortMigration(self):
4846     """Call the hypervisor code to abort a started migration.
4847
4848     """
4849     instance = self.instance
4850     target_node = self.target_node
4851     migration_info = self.migration_info
4852
4853     abort_result = self.rpc.call_finalize_migration(target_node,
4854                                                     instance,
4855                                                     migration_info,
4856                                                     False)
4857     abort_msg = abort_result.fail_msg
4858     if abort_msg:
4859       logging.error("Aborting migration failed on target node %s: %s" %
4860                     (target_node, abort_msg))
4861       # Don't raise an exception here, as we stil have to try to revert the
4862       # disk status, even if this step failed.
4863
4864   def _ExecMigration(self):
4865     """Migrate an instance.
4866
4867     The migrate is done by:
4868       - change the disks into dual-master mode
4869       - wait until disks are fully synchronized again
4870       - migrate the instance
4871       - change disks on the new secondary node (the old primary) to secondary
4872       - wait until disks are fully synchronized
4873       - change disks into single-master mode
4874
4875     """
4876     instance = self.instance
4877     target_node = self.target_node
4878     source_node = self.source_node
4879
4880     self.feedback_fn("* checking disk consistency between source and target")
4881     for dev in instance.disks:
4882       if not _CheckDiskConsistency(self, dev, target_node, False):
4883         raise errors.OpExecError("Disk %s is degraded or not fully"
4884                                  " synchronized on target node,"
4885                                  " aborting migrate." % dev.iv_name)
4886
4887     # First get the migration information from the remote node
4888     result = self.rpc.call_migration_info(source_node, instance)
4889     msg = result.fail_msg
4890     if msg:
4891       log_err = ("Failed fetching source migration information from %s: %s" %
4892                  (source_node, msg))
4893       logging.error(log_err)
4894       raise errors.OpExecError(log_err)
4895
4896     self.migration_info = migration_info = result.payload
4897
4898     # Then switch the disks to master/master mode
4899     self._EnsureSecondary(target_node)
4900     self._GoStandalone()
4901     self._GoReconnect(True)
4902     self._WaitUntilSync()
4903
4904     self.feedback_fn("* preparing %s to accept the instance" % target_node)
4905     result = self.rpc.call_accept_instance(target_node,
4906                                            instance,
4907                                            migration_info,
4908                                            self.nodes_ip[target_node])
4909
4910     msg = result.fail_msg
4911     if msg:
4912       logging.error("Instance pre-migration failed, trying to revert"
4913                     " disk status: %s", msg)
4914       self._AbortMigration()
4915       self._RevertDiskStatus()
4916       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4917                                (instance.name, msg))
4918
4919     self.feedback_fn("* migrating instance to %s" % target_node)
4920     time.sleep(10)
4921     result = self.rpc.call_instance_migrate(source_node, instance,
4922                                             self.nodes_ip[target_node],
4923                                             self.live)
4924     msg = result.fail_msg
4925     if msg:
4926       logging.error("Instance migration failed, trying to revert"
4927                     " disk status: %s", msg)
4928       self._AbortMigration()
4929       self._RevertDiskStatus()
4930       raise errors.OpExecError("Could not migrate instance %s: %s" %
4931                                (instance.name, msg))
4932     time.sleep(10)
4933
4934     instance.primary_node = target_node
4935     # distribute new instance config to the other nodes
4936     self.cfg.Update(instance)
4937
4938     result = self.rpc.call_finalize_migration(target_node,
4939                                               instance,
4940                                               migration_info,
4941                                               True)
4942     msg = result.fail_msg
4943     if msg:
4944       logging.error("Instance migration succeeded, but finalization failed:"
4945                     " %s" % msg)
4946       raise errors.OpExecError("Could not finalize instance migration: %s" %
4947                                msg)
4948
4949     self._EnsureSecondary(source_node)
4950     self._WaitUntilSync()
4951     self._GoStandalone()
4952     self._GoReconnect(False)
4953     self._WaitUntilSync()
4954
4955     self.feedback_fn("* done")
4956
4957   def Exec(self, feedback_fn):
4958     """Perform the migration.
4959
4960     """
4961     feedback_fn("Migrating instance %s" % self.instance.name)
4962
4963     self.feedback_fn = feedback_fn
4964
4965     self.source_node = self.instance.primary_node
4966     self.target_node = self.instance.secondary_nodes[0]
4967     self.all_nodes = [self.source_node, self.target_node]
4968     self.nodes_ip = {
4969       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4970       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4971       }
4972
4973     if self.cleanup:
4974       return self._ExecCleanup()
4975     else:
4976       return self._ExecMigration()
4977
4978
4979 def _CreateBlockDev(lu, node, instance, device, force_create,
4980                     info, force_open):
4981   """Create a tree of block devices on a given node.
4982
4983   If this device type has to be created on secondaries, create it and
4984   all its children.
4985
4986   If not, just recurse to children keeping the same 'force' value.
4987
4988   @param lu: the lu on whose behalf we execute
4989   @param node: the node on which to create the device
4990   @type instance: L{objects.Instance}
4991   @param instance: the instance which owns the device
4992   @type device: L{objects.Disk}
4993   @param device: the device to create
4994   @type force_create: boolean
4995   @param force_create: whether to force creation of this device; this
4996       will be change to True whenever we find a device which has
4997       CreateOnSecondary() attribute
4998   @param info: the extra 'metadata' we should attach to the device
4999       (this will be represented as a LVM tag)
5000   @type force_open: boolean
5001   @param force_open: this parameter will be passes to the
5002       L{backend.BlockdevCreate} function where it specifies
5003       whether we run on primary or not, and it affects both
5004       the child assembly and the device own Open() execution
5005
5006   """
5007   if device.CreateOnSecondary():
5008     force_create = True
5009
5010   if device.children:
5011     for child in device.children:
5012       _CreateBlockDev(lu, node, instance, child, force_create,
5013                       info, force_open)
5014
5015   if not force_create:
5016     return
5017
5018   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5019
5020
5021 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5022   """Create a single block device on a given node.
5023
5024   This will not recurse over children of the device, so they must be
5025   created in advance.
5026
5027   @param lu: the lu on whose behalf we execute
5028   @param node: the node on which to create the device
5029   @type instance: L{objects.Instance}
5030   @param instance: the instance which owns the device
5031   @type device: L{objects.Disk}
5032   @param device: the device to create
5033   @param info: the extra 'metadata' we should attach to the device
5034       (this will be represented as a LVM tag)
5035   @type force_open: boolean
5036   @param force_open: this parameter will be passes to the
5037       L{backend.BlockdevCreate} function where it specifies
5038       whether we run on primary or not, and it affects both
5039       the child assembly and the device own Open() execution
5040
5041   """
5042   lu.cfg.SetDiskID(device, node)
5043   result = lu.rpc.call_blockdev_create(node, device, device.size,
5044                                        instance.name, force_open, info)
5045   result.Raise("Can't create block device %s on"
5046                " node %s for instance %s" % (device, node, instance.name))
5047   if device.physical_id is None:
5048     device.physical_id = result.payload
5049
5050
5051 def _GenerateUniqueNames(lu, exts):
5052   """Generate a suitable LV name.
5053
5054   This will generate a logical volume name for the given instance.
5055
5056   """
5057   results = []
5058   for val in exts:
5059     new_id = lu.cfg.GenerateUniqueID()
5060     results.append("%s%s" % (new_id, val))
5061   return results
5062
5063
5064 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5065                          p_minor, s_minor):
5066   """Generate a drbd8 device complete with its children.
5067
5068   """
5069   port = lu.cfg.AllocatePort()
5070   vgname = lu.cfg.GetVGName()
5071   shared_secret = lu.cfg.GenerateDRBDSecret()
5072   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5073                           logical_id=(vgname, names[0]))
5074   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5075                           logical_id=(vgname, names[1]))
5076   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5077                           logical_id=(primary, secondary, port,
5078                                       p_minor, s_minor,
5079                                       shared_secret),
5080                           children=[dev_data, dev_meta],
5081                           iv_name=iv_name)
5082   return drbd_dev
5083
5084
5085 def _GenerateDiskTemplate(lu, template_name,
5086                           instance_name, primary_node,
5087                           secondary_nodes, disk_info,
5088                           file_storage_dir, file_driver,
5089                           base_index):
5090   """Generate the entire disk layout for a given template type.
5091
5092   """
5093   #TODO: compute space requirements
5094
5095   vgname = lu.cfg.GetVGName()
5096   disk_count = len(disk_info)
5097   disks = []
5098   if template_name == constants.DT_DISKLESS:
5099     pass
5100   elif template_name == constants.DT_PLAIN:
5101     if len(secondary_nodes) != 0:
5102       raise errors.ProgrammerError("Wrong template configuration")
5103
5104     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5105                                       for i in range(disk_count)])
5106     for idx, disk in enumerate(disk_info):
5107       disk_index = idx + base_index
5108       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5109                               logical_id=(vgname, names[idx]),
5110                               iv_name="disk/%d" % disk_index,
5111                               mode=disk["mode"])
5112       disks.append(disk_dev)
5113   elif template_name == constants.DT_DRBD8:
5114     if len(secondary_nodes) != 1:
5115       raise errors.ProgrammerError("Wrong template configuration")
5116     remote_node = secondary_nodes[0]
5117     minors = lu.cfg.AllocateDRBDMinor(
5118       [primary_node, remote_node] * len(disk_info), instance_name)
5119
5120     names = []
5121     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5122                                                for i in range(disk_count)]):
5123       names.append(lv_prefix + "_data")
5124       names.append(lv_prefix + "_meta")
5125     for idx, disk in enumerate(disk_info):
5126       disk_index = idx + base_index
5127       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5128                                       disk["size"], names[idx*2:idx*2+2],
5129                                       "disk/%d" % disk_index,
5130                                       minors[idx*2], minors[idx*2+1])
5131       disk_dev.mode = disk["mode"]
5132       disks.append(disk_dev)
5133   elif template_name == constants.DT_FILE:
5134     if len(secondary_nodes) != 0:
5135       raise errors.ProgrammerError("Wrong template configuration")
5136
5137     for idx, disk in enumerate(disk_info):
5138       disk_index = idx + base_index
5139       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5140                               iv_name="disk/%d" % disk_index,
5141                               logical_id=(file_driver,
5142                                           "%s/disk%d" % (file_storage_dir,
5143                                                          disk_index)),
5144                               mode=disk["mode"])
5145       disks.append(disk_dev)
5146   else:
5147     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5148   return disks
5149
5150
5151 def _GetInstanceInfoText(instance):
5152   """Compute that text that should be added to the disk's metadata.
5153
5154   """
5155   return "originstname+%s" % instance.name
5156
5157
5158 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5159   """Create all disks for an instance.
5160
5161   This abstracts away some work from AddInstance.
5162
5163   @type lu: L{LogicalUnit}
5164   @param lu: the logical unit on whose behalf we execute
5165   @type instance: L{objects.Instance}
5166   @param instance: the instance whose disks we should create
5167   @type to_skip: list
5168   @param to_skip: list of indices to skip
5169   @type target_node: string
5170   @param target_node: if passed, overrides the target node for creation
5171   @rtype: boolean
5172   @return: the success of the creation
5173
5174   """
5175   info = _GetInstanceInfoText(instance)
5176   if target_node is None:
5177     pnode = instance.primary_node
5178     all_nodes = instance.all_nodes
5179   else:
5180     pnode = target_node
5181     all_nodes = [pnode]
5182
5183   if instance.disk_template == constants.DT_FILE:
5184     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5185     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5186
5187     result.Raise("Failed to create directory '%s' on"
5188                  " node %s" % (file_storage_dir, pnode))
5189
5190   # Note: this needs to be kept in sync with adding of disks in
5191   # LUSetInstanceParams
5192   for idx, device in enumerate(instance.disks):
5193     if to_skip and idx in to_skip:
5194       continue
5195     logging.info("Creating volume %s for instance %s",
5196                  device.iv_name, instance.name)
5197     #HARDCODE
5198     for node in all_nodes:
5199       f_create = node == pnode
5200       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5201
5202
5203 def _RemoveDisks(lu, instance, target_node=None):
5204   """Remove all disks for an instance.
5205
5206   This abstracts away some work from `AddInstance()` and
5207   `RemoveInstance()`. Note that in case some of the devices couldn't
5208   be removed, the removal will continue with the other ones (compare
5209   with `_CreateDisks()`).
5210
5211   @type lu: L{LogicalUnit}
5212   @param lu: the logical unit on whose behalf we execute
5213   @type instance: L{objects.Instance}
5214   @param instance: the instance whose disks we should remove
5215   @type target_node: string
5216   @param target_node: used to override the node on which to remove the disks
5217   @rtype: boolean
5218   @return: the success of the removal
5219
5220   """
5221   logging.info("Removing block devices for instance %s", instance.name)
5222
5223   all_result = True
5224   for device in instance.disks:
5225     if target_node:
5226       edata = [(target_node, device)]
5227     else:
5228       edata = device.ComputeNodeTree(instance.primary_node)
5229     for node, disk in edata:
5230       lu.cfg.SetDiskID(disk, node)
5231       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5232       if msg:
5233         lu.LogWarning("Could not remove block device %s on node %s,"
5234                       " continuing anyway: %s", device.iv_name, node, msg)
5235         all_result = False
5236
5237   if instance.disk_template == constants.DT_FILE:
5238     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5239     if target_node:
5240       tgt = target_node
5241     else:
5242       tgt = instance.primary_node
5243     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5244     if result.fail_msg:
5245       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5246                     file_storage_dir, instance.primary_node, result.fail_msg)
5247       all_result = False
5248
5249   return all_result
5250
5251
5252 def _ComputeDiskSize(disk_template, disks):
5253   """Compute disk size requirements in the volume group
5254
5255   """
5256   # Required free disk space as a function of disk and swap space
5257   req_size_dict = {
5258     constants.DT_DISKLESS: None,
5259     constants.DT_PLAIN: sum(d["size"] for d in disks),
5260     # 128 MB are added for drbd metadata for each disk
5261     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5262     constants.DT_FILE: None,
5263   }
5264
5265   if disk_template not in req_size_dict:
5266     raise errors.ProgrammerError("Disk template '%s' size requirement"
5267                                  " is unknown" %  disk_template)
5268
5269   return req_size_dict[disk_template]
5270
5271
5272 def _CheckHVParams(lu, nodenames, hvname, hvparams):
5273   """Hypervisor parameter validation.
5274
5275   This function abstract the hypervisor parameter validation to be
5276   used in both instance create and instance modify.
5277
5278   @type lu: L{LogicalUnit}
5279   @param lu: the logical unit for which we check
5280   @type nodenames: list
5281   @param nodenames: the list of nodes on which we should check
5282   @type hvname: string
5283   @param hvname: the name of the hypervisor we should use
5284   @type hvparams: dict
5285   @param hvparams: the parameters which we need to check
5286   @raise errors.OpPrereqError: if the parameters are not valid
5287
5288   """
5289   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5290                                                   hvname,
5291                                                   hvparams)
5292   for node in nodenames:
5293     info = hvinfo[node]
5294     if info.offline:
5295       continue
5296     info.Raise("Hypervisor parameter validation failed on node %s" % node)
5297
5298
5299 class LUCreateInstance(LogicalUnit):
5300   """Create an instance.
5301
5302   """
5303   HPATH = "instance-add"
5304   HTYPE = constants.HTYPE_INSTANCE
5305   _OP_REQP = ["instance_name", "disks", "disk_template",
5306               "mode", "start",
5307               "wait_for_sync", "ip_check", "nics",
5308               "hvparams", "beparams"]
5309   REQ_BGL = False
5310
5311   def _ExpandNode(self, node):
5312     """Expands and checks one node name.
5313
5314     """
5315     node_full = self.cfg.ExpandNodeName(node)
5316     if node_full is None:
5317       raise errors.OpPrereqError("Unknown node %s" % node)
5318     return node_full
5319
5320   def ExpandNames(self):
5321     """ExpandNames for CreateInstance.
5322
5323     Figure out the right locks for instance creation.
5324
5325     """
5326     self.needed_locks = {}
5327
5328     # set optional parameters to none if they don't exist
5329     for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5330       if not hasattr(self.op, attr):
5331         setattr(self.op, attr, None)
5332
5333     # cheap checks, mostly valid constants given
5334
5335     # verify creation mode
5336     if self.op.mode not in (constants.INSTANCE_CREATE,
5337                             constants.INSTANCE_IMPORT):
5338       raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5339                                  self.op.mode)
5340
5341     # disk template and mirror node verification
5342     if self.op.disk_template not in constants.DISK_TEMPLATES:
5343       raise errors.OpPrereqError("Invalid disk template name")
5344
5345     if self.op.hypervisor is None:
5346       self.op.hypervisor = self.cfg.GetHypervisorType()
5347
5348     cluster = self.cfg.GetClusterInfo()
5349     enabled_hvs = cluster.enabled_hypervisors
5350     if self.op.hypervisor not in enabled_hvs:
5351       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5352                                  " cluster (%s)" % (self.op.hypervisor,
5353                                   ",".join(enabled_hvs)))
5354
5355     # check hypervisor parameter syntax (locally)
5356     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5357     filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5358                                   self.op.hvparams)
5359     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5360     hv_type.CheckParameterSyntax(filled_hvp)
5361     self.hv_full = filled_hvp
5362
5363     # fill and remember the beparams dict
5364     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5365     self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5366                                     self.op.beparams)
5367
5368     #### instance parameters check
5369
5370     # instance name verification
5371     hostname1 = utils.HostInfo(self.op.instance_name)
5372     self.op.instance_name = instance_name = hostname1.name
5373
5374     # this is just a preventive check, but someone might still add this
5375     # instance in the meantime, and creation will fail at lock-add time
5376     if instance_name in self.cfg.GetInstanceList():
5377       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5378                                  instance_name)
5379
5380     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5381
5382     # NIC buildup
5383     self.nics = []
5384     for idx, nic in enumerate(self.op.nics):
5385       nic_mode_req = nic.get("mode", None)
5386       nic_mode = nic_mode_req
5387       if nic_mode is None:
5388         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5389
5390       # in routed mode, for the first nic, the default ip is 'auto'
5391       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5392         default_ip_mode = constants.VALUE_AUTO
5393       else:
5394         default_ip_mode = constants.VALUE_NONE
5395
5396       # ip validity checks
5397       ip = nic.get("ip", default_ip_mode)
5398       if ip is None or ip.lower() == constants.VALUE_NONE:
5399         nic_ip = None
5400       elif ip.lower() == constants.VALUE_AUTO:
5401         nic_ip = hostname1.ip
5402       else:
5403         if not utils.IsValidIP(ip):
5404           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5405                                      " like a valid IP" % ip)
5406         nic_ip = ip
5407
5408       # TODO: check the ip for uniqueness !!
5409       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5410         raise errors.OpPrereqError("Routed nic mode requires an ip address")
5411
5412       # MAC address verification
5413       mac = nic.get("mac", constants.VALUE_AUTO)
5414       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5415         if not utils.IsValidMac(mac.lower()):
5416           raise errors.OpPrereqError("Invalid MAC address specified: %s" %
5417                                      mac)
5418         else:
5419           # or validate/reserve the current one
5420           if self.cfg.IsMacInUse(mac):
5421             raise errors.OpPrereqError("MAC address %s already in use"
5422                                        " in cluster" % mac)
5423
5424       # bridge verification
5425       bridge = nic.get("bridge", None)
5426       link = nic.get("link", None)
5427       if bridge and link:
5428         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5429                                    " at the same time")
5430       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5431         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
5432       elif bridge:
5433         link = bridge
5434
5435       nicparams = {}
5436       if nic_mode_req:
5437         nicparams[constants.NIC_MODE] = nic_mode_req
5438       if link:
5439         nicparams[constants.NIC_LINK] = link
5440
5441       check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5442                                       nicparams)
5443       objects.NIC.CheckParameterSyntax(check_params)
5444       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5445
5446     # disk checks/pre-build
5447     self.disks = []
5448     for disk in self.op.disks:
5449       mode = disk.get("mode", constants.DISK_RDWR)
5450       if mode not in constants.DISK_ACCESS_SET:
5451         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5452                                    mode)
5453       size = disk.get("size", None)
5454       if size is None:
5455         raise errors.OpPrereqError("Missing disk size")
5456       try:
5457         size = int(size)
5458       except ValueError:
5459         raise errors.OpPrereqError("Invalid disk size '%s'" % size)
5460       self.disks.append({"size": size, "mode": mode})
5461
5462     # used in CheckPrereq for ip ping check
5463     self.check_ip = hostname1.ip
5464
5465     # file storage checks
5466     if (self.op.file_driver and
5467         not self.op.file_driver in constants.FILE_DRIVER):
5468       raise errors.OpPrereqError("Invalid file driver name '%s'" %
5469                                  self.op.file_driver)
5470
5471     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5472       raise errors.OpPrereqError("File storage directory path not absolute")
5473
5474     ### Node/iallocator related checks
5475     if [self.op.iallocator, self.op.pnode].count(None) != 1:
5476       raise errors.OpPrereqError("One and only one of iallocator and primary"
5477                                  " node must be given")
5478
5479     if self.op.iallocator:
5480       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5481     else:
5482       self.op.pnode = self._ExpandNode(self.op.pnode)
5483       nodelist = [self.op.pnode]
5484       if self.op.snode is not None:
5485         self.op.snode = self._ExpandNode(self.op.snode)
5486         nodelist.append(self.op.snode)
5487       self.needed_locks[locking.LEVEL_NODE] = nodelist
5488
5489     # in case of import lock the source node too
5490     if self.op.mode == constants.INSTANCE_IMPORT:
5491       src_node = getattr(self.op, "src_node", None)
5492       src_path = getattr(self.op, "src_path", None)
5493
5494       if src_path is None:
5495         self.op.src_path = src_path = self.op.instance_name
5496
5497       if src_node is None:
5498         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5499         self.op.src_node = None
5500         if os.path.isabs(src_path):
5501           raise errors.OpPrereqError("Importing an instance from an absolute"
5502                                      " path requires a source node option.")
5503       else:
5504         self.op.src_node = src_node = self._ExpandNode(src_node)
5505         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5506           self.needed_locks[locking.LEVEL_NODE].append(src_node)
5507         if not os.path.isabs(src_path):
5508           self.op.src_path = src_path = \
5509             os.path.join(constants.EXPORT_DIR, src_path)
5510
5511     else: # INSTANCE_CREATE
5512       if getattr(self.op, "os_type", None) is None:
5513         raise errors.OpPrereqError("No guest OS specified")
5514
5515   def _RunAllocator(self):
5516     """Run the allocator based on input opcode.
5517
5518     """
5519     nics = [n.ToDict() for n in self.nics]
5520     ial = IAllocator(self.cfg, self.rpc,
5521                      mode=constants.IALLOCATOR_MODE_ALLOC,
5522                      name=self.op.instance_name,
5523                      disk_template=self.op.disk_template,
5524                      tags=[],
5525                      os=self.op.os_type,
5526                      vcpus=self.be_full[constants.BE_VCPUS],
5527                      mem_size=self.be_full[constants.BE_MEMORY],
5528                      disks=self.disks,
5529                      nics=nics,
5530                      hypervisor=self.op.hypervisor,
5531                      )
5532
5533     ial.Run(self.op.iallocator)
5534
5535     if not ial.success:
5536       raise errors.OpPrereqError("Can't compute nodes using"
5537                                  " iallocator '%s': %s" % (self.op.iallocator,
5538                                                            ial.info))
5539     if len(ial.nodes) != ial.required_nodes:
5540       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5541                                  " of nodes (%s), required %s" %
5542                                  (self.op.iallocator, len(ial.nodes),
5543                                   ial.required_nodes))
5544     self.op.pnode = ial.nodes[0]
5545     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5546                  self.op.instance_name, self.op.iallocator,
5547                  ", ".join(ial.nodes))
5548     if ial.required_nodes == 2:
5549       self.op.snode = ial.nodes[1]
5550
5551   def BuildHooksEnv(self):
5552     """Build hooks env.
5553
5554     This runs on master, primary and secondary nodes of the instance.
5555
5556     """
5557     env = {
5558       "ADD_MODE": self.op.mode,
5559       }
5560     if self.op.mode == constants.INSTANCE_IMPORT:
5561       env["SRC_NODE"] = self.op.src_node
5562       env["SRC_PATH"] = self.op.src_path
5563       env["SRC_IMAGES"] = self.src_images
5564
5565     env.update(_BuildInstanceHookEnv(
5566       name=self.op.instance_name,
5567       primary_node=self.op.pnode,
5568       secondary_nodes=self.secondaries,
5569       status=self.op.start,
5570       os_type=self.op.os_type,
5571       memory=self.be_full[constants.BE_MEMORY],
5572       vcpus=self.be_full[constants.BE_VCPUS],
5573       nics=_NICListToTuple(self, self.nics),
5574       disk_template=self.op.disk_template,
5575       disks=[(d["size"], d["mode"]) for d in self.disks],
5576       bep=self.be_full,
5577       hvp=self.hv_full,
5578       hypervisor_name=self.op.hypervisor,
5579     ))
5580
5581     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5582           self.secondaries)
5583     return env, nl, nl
5584
5585
5586   def CheckPrereq(self):
5587     """Check prerequisites.
5588
5589     """
5590     if (not self.cfg.GetVGName() and
5591         self.op.disk_template not in constants.DTS_NOT_LVM):
5592       raise errors.OpPrereqError("Cluster does not support lvm-based"
5593                                  " instances")
5594
5595     if self.op.mode == constants.INSTANCE_IMPORT:
5596       src_node = self.op.src_node
5597       src_path = self.op.src_path
5598
5599       if src_node is None:
5600         locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5601         exp_list = self.rpc.call_export_list(locked_nodes)
5602         found = False
5603         for node in exp_list:
5604           if exp_list[node].fail_msg:
5605             continue
5606           if src_path in exp_list[node].payload:
5607             found = True
5608             self.op.src_node = src_node = node
5609             self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5610                                                        src_path)
5611             break
5612         if not found:
5613           raise errors.OpPrereqError("No export found for relative path %s" %
5614                                       src_path)
5615
5616       _CheckNodeOnline(self, src_node)
5617       result = self.rpc.call_export_info(src_node, src_path)
5618       result.Raise("No export or invalid export found in dir %s" % src_path)
5619
5620       export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5621       if not export_info.has_section(constants.INISECT_EXP):
5622         raise errors.ProgrammerError("Corrupted export config")
5623
5624       ei_version = export_info.get(constants.INISECT_EXP, 'version')
5625       if (int(ei_version) != constants.EXPORT_VERSION):
5626         raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5627                                    (ei_version, constants.EXPORT_VERSION))
5628
5629       # Check that the new instance doesn't have less disks than the export
5630       instance_disks = len(self.disks)
5631       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
5632       if instance_disks < export_disks:
5633         raise errors.OpPrereqError("Not enough disks to import."
5634                                    " (instance: %d, export: %d)" %
5635                                    (instance_disks, export_disks))
5636
5637       self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
5638       disk_images = []
5639       for idx in range(export_disks):
5640         option = 'disk%d_dump' % idx
5641         if export_info.has_option(constants.INISECT_INS, option):
5642           # FIXME: are the old os-es, disk sizes, etc. useful?
5643           export_name = export_info.get(constants.INISECT_INS, option)
5644           image = os.path.join(src_path, export_name)
5645           disk_images.append(image)
5646         else:
5647           disk_images.append(False)
5648
5649       self.src_images = disk_images
5650
5651       old_name = export_info.get(constants.INISECT_INS, 'name')
5652       # FIXME: int() here could throw a ValueError on broken exports
5653       exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
5654       if self.op.instance_name == old_name:
5655         for idx, nic in enumerate(self.nics):
5656           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
5657             nic_mac_ini = 'nic%d_mac' % idx
5658             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
5659
5660     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
5661     # ip ping checks (we use the same ip that was resolved in ExpandNames)
5662     if self.op.start and not self.op.ip_check:
5663       raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
5664                                  " adding an instance in start mode")
5665
5666     if self.op.ip_check:
5667       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
5668         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5669                                    (self.check_ip, self.op.instance_name))
5670
5671     #### mac address generation
5672     # By generating here the mac address both the allocator and the hooks get
5673     # the real final mac address rather than the 'auto' or 'generate' value.
5674     # There is a race condition between the generation and the instance object
5675     # creation, which means that we know the mac is valid now, but we're not
5676     # sure it will be when we actually add the instance. If things go bad
5677     # adding the instance will abort because of a duplicate mac, and the
5678     # creation job will fail.
5679     for nic in self.nics:
5680       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5681         nic.mac = self.cfg.GenerateMAC()
5682
5683     #### allocator run
5684
5685     if self.op.iallocator is not None:
5686       self._RunAllocator()
5687
5688     #### node related checks
5689
5690     # check primary node
5691     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
5692     assert self.pnode is not None, \
5693       "Cannot retrieve locked node %s" % self.op.pnode
5694     if pnode.offline:
5695       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
5696                                  pnode.name)
5697     if pnode.drained:
5698       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
5699                                  pnode.name)
5700
5701     self.secondaries = []
5702
5703     # mirror node verification
5704     if self.op.disk_template in constants.DTS_NET_MIRROR:
5705       if self.op.snode is None:
5706         raise errors.OpPrereqError("The networked disk templates need"
5707                                    " a mirror node")
5708       if self.op.snode == pnode.name:
5709         raise errors.OpPrereqError("The secondary node cannot be"
5710                                    " the primary node.")
5711       _CheckNodeOnline(self, self.op.snode)
5712       _CheckNodeNotDrained(self, self.op.snode)
5713       self.secondaries.append(self.op.snode)
5714
5715     nodenames = [pnode.name] + self.secondaries
5716
5717     req_size = _ComputeDiskSize(self.op.disk_template,
5718                                 self.disks)
5719
5720     # Check lv size requirements
5721     if req_size is not None:
5722       nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5723                                          self.op.hypervisor)
5724       for node in nodenames:
5725         info = nodeinfo[node]
5726         info.Raise("Cannot get current information from node %s" % node)
5727         info = info.payload
5728         vg_free = info.get('vg_free', None)
5729         if not isinstance(vg_free, int):
5730           raise errors.OpPrereqError("Can't compute free disk space on"
5731                                      " node %s" % node)
5732         if req_size > vg_free:
5733           raise errors.OpPrereqError("Not enough disk space on target node %s."
5734                                      " %d MB available, %d MB required" %
5735                                      (node, vg_free, req_size))
5736
5737     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
5738
5739     # os verification
5740     result = self.rpc.call_os_get(pnode.name, self.op.os_type)
5741     result.Raise("OS '%s' not in supported os list for primary node %s" %
5742                  (self.op.os_type, pnode.name), prereq=True)
5743
5744     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
5745
5746     # memory check on primary node
5747     if self.op.start:
5748       _CheckNodeFreeMemory(self, self.pnode.name,
5749                            "creating instance %s" % self.op.instance_name,
5750                            self.be_full[constants.BE_MEMORY],
5751                            self.op.hypervisor)
5752
5753     self.dry_run_result = list(nodenames)
5754
5755   def Exec(self, feedback_fn):
5756     """Create and add the instance to the cluster.
5757
5758     """
5759     instance = self.op.instance_name
5760     pnode_name = self.pnode.name
5761
5762     ht_kind = self.op.hypervisor
5763     if ht_kind in constants.HTS_REQ_PORT:
5764       network_port = self.cfg.AllocatePort()
5765     else:
5766       network_port = None
5767
5768     ##if self.op.vnc_bind_address is None:
5769     ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5770
5771     # this is needed because os.path.join does not accept None arguments
5772     if self.op.file_storage_dir is None:
5773       string_file_storage_dir = ""
5774     else:
5775       string_file_storage_dir = self.op.file_storage_dir
5776
5777     # build the full file storage dir path
5778     file_storage_dir = os.path.normpath(os.path.join(
5779                                         self.cfg.GetFileStorageDir(),
5780                                         string_file_storage_dir, instance))
5781
5782
5783     disks = _GenerateDiskTemplate(self,
5784                                   self.op.disk_template,
5785                                   instance, pnode_name,
5786                                   self.secondaries,
5787                                   self.disks,
5788                                   file_storage_dir,
5789                                   self.op.file_driver,
5790                                   0)
5791
5792     iobj = objects.Instance(name=instance, os=self.op.os_type,
5793                             primary_node=pnode_name,
5794                             nics=self.nics, disks=disks,
5795                             disk_template=self.op.disk_template,
5796                             admin_up=False,
5797                             network_port=network_port,
5798                             beparams=self.op.beparams,
5799                             hvparams=self.op.hvparams,
5800                             hypervisor=self.op.hypervisor,
5801                             )
5802
5803     feedback_fn("* creating instance disks...")
5804     try:
5805       _CreateDisks(self, iobj)
5806     except errors.OpExecError:
5807       self.LogWarning("Device creation failed, reverting...")
5808       try:
5809         _RemoveDisks(self, iobj)
5810       finally:
5811         self.cfg.ReleaseDRBDMinors(instance)
5812         raise
5813
5814     feedback_fn("adding instance %s to cluster config" % instance)
5815
5816     self.cfg.AddInstance(iobj)
5817     # Declare that we don't want to remove the instance lock anymore, as we've
5818     # added the instance to the config
5819     del self.remove_locks[locking.LEVEL_INSTANCE]
5820     # Unlock all the nodes
5821     if self.op.mode == constants.INSTANCE_IMPORT:
5822       nodes_keep = [self.op.src_node]
5823       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5824                        if node != self.op.src_node]
5825       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5826       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5827     else:
5828       self.context.glm.release(locking.LEVEL_NODE)
5829       del self.acquired_locks[locking.LEVEL_NODE]
5830
5831     if self.op.wait_for_sync:
5832       disk_abort = not _WaitForSync(self, iobj)
5833     elif iobj.disk_template in constants.DTS_NET_MIRROR:
5834       # make sure the disks are not degraded (still sync-ing is ok)
5835       time.sleep(15)
5836       feedback_fn("* checking mirrors status")
5837       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
5838     else:
5839       disk_abort = False
5840
5841     if disk_abort:
5842       _RemoveDisks(self, iobj)
5843       self.cfg.RemoveInstance(iobj.name)
5844       # Make sure the instance lock gets removed
5845       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
5846       raise errors.OpExecError("There are some degraded disks for"
5847                                " this instance")
5848
5849     feedback_fn("creating os for instance %s on node %s" %
5850                 (instance, pnode_name))
5851
5852     if iobj.disk_template != constants.DT_DISKLESS:
5853       if self.op.mode == constants.INSTANCE_CREATE:
5854         feedback_fn("* running the instance OS create scripts...")
5855         result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
5856         result.Raise("Could not add os for instance %s"
5857                      " on node %s" % (instance, pnode_name))
5858
5859       elif self.op.mode == constants.INSTANCE_IMPORT:
5860         feedback_fn("* running the instance OS import scripts...")
5861         src_node = self.op.src_node
5862         src_images = self.src_images
5863         cluster_name = self.cfg.GetClusterName()
5864         import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
5865                                                          src_node, src_images,
5866                                                          cluster_name)
5867         msg = import_result.fail_msg
5868         if msg:
5869           self.LogWarning("Error while importing the disk images for instance"
5870                           " %s on node %s: %s" % (instance, pnode_name, msg))
5871       else:
5872         # also checked in the prereq part
5873         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
5874                                      % self.op.mode)
5875
5876     if self.op.start:
5877       iobj.admin_up = True
5878       self.cfg.Update(iobj)
5879       logging.info("Starting instance %s on node %s", instance, pnode_name)
5880       feedback_fn("* starting instance...")
5881       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
5882       result.Raise("Could not start instance")
5883
5884     return list(iobj.all_nodes)
5885
5886
5887 class LUConnectConsole(NoHooksLU):
5888   """Connect to an instance's console.
5889
5890   This is somewhat special in that it returns the command line that
5891   you need to run on the master node in order to connect to the
5892   console.
5893
5894   """
5895   _OP_REQP = ["instance_name"]
5896   REQ_BGL = False
5897
5898   def ExpandNames(self):
5899     self._ExpandAndLockInstance()
5900
5901   def CheckPrereq(self):
5902     """Check prerequisites.
5903
5904     This checks that the instance is in the cluster.
5905
5906     """
5907     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5908     assert self.instance is not None, \
5909       "Cannot retrieve locked instance %s" % self.op.instance_name
5910     _CheckNodeOnline(self, self.instance.primary_node)
5911
5912   def Exec(self, feedback_fn):
5913     """Connect to the console of an instance
5914
5915     """
5916     instance = self.instance
5917     node = instance.primary_node
5918
5919     node_insts = self.rpc.call_instance_list([node],
5920                                              [instance.hypervisor])[node]
5921     node_insts.Raise("Can't get node information from %s" % node)
5922
5923     if instance.name not in node_insts.payload:
5924       raise errors.OpExecError("Instance %s is not running." % instance.name)
5925
5926     logging.debug("Connecting to console of %s on %s", instance.name, node)
5927
5928     hyper = hypervisor.GetHypervisor(instance.hypervisor)
5929     cluster = self.cfg.GetClusterInfo()
5930     # beparams and hvparams are passed separately, to avoid editing the
5931     # instance and then saving the defaults in the instance itself.
5932     hvparams = cluster.FillHV(instance)
5933     beparams = cluster.FillBE(instance)
5934     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5935
5936     # build ssh cmdline
5937     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5938
5939
5940 class LUReplaceDisks(LogicalUnit):
5941   """Replace the disks of an instance.
5942
5943   """
5944   HPATH = "mirrors-replace"
5945   HTYPE = constants.HTYPE_INSTANCE
5946   _OP_REQP = ["instance_name", "mode", "disks"]
5947   REQ_BGL = False
5948
5949   def CheckArguments(self):
5950     if not hasattr(self.op, "remote_node"):
5951       self.op.remote_node = None
5952     if not hasattr(self.op, "iallocator"):
5953       self.op.iallocator = None
5954
5955     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
5956                                   self.op.iallocator)
5957
5958   def ExpandNames(self):
5959     self._ExpandAndLockInstance()
5960
5961     if self.op.iallocator is not None:
5962       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5963
5964     elif self.op.remote_node is not None:
5965       remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5966       if remote_node is None:
5967         raise errors.OpPrereqError("Node '%s' not known" %
5968                                    self.op.remote_node)
5969
5970       self.op.remote_node = remote_node
5971
5972       # Warning: do not remove the locking of the new secondary here
5973       # unless DRBD8.AddChildren is changed to work in parallel;
5974       # currently it doesn't since parallel invocations of
5975       # FindUnusedMinor will conflict
5976       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5977       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5978
5979     else:
5980       self.needed_locks[locking.LEVEL_NODE] = []
5981       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5982
5983     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
5984                                    self.op.iallocator, self.op.remote_node,
5985                                    self.op.disks)
5986
5987     self.tasklets = [self.replacer]
5988
5989   def DeclareLocks(self, level):
5990     # If we're not already locking all nodes in the set we have to declare the
5991     # instance's primary/secondary nodes.
5992     if (level == locking.LEVEL_NODE and
5993         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5994       self._LockInstancesNodes()
5995
5996   def BuildHooksEnv(self):
5997     """Build hooks env.
5998
5999     This runs on the master, the primary and all the secondaries.
6000
6001     """
6002     instance = self.replacer.instance
6003     env = {
6004       "MODE": self.op.mode,
6005       "NEW_SECONDARY": self.op.remote_node,
6006       "OLD_SECONDARY": instance.secondary_nodes[0],
6007       }
6008     env.update(_BuildInstanceHookEnvByObject(self, instance))
6009     nl = [
6010       self.cfg.GetMasterNode(),
6011       instance.primary_node,
6012       ]
6013     if self.op.remote_node is not None:
6014       nl.append(self.op.remote_node)
6015     return env, nl, nl
6016
6017
6018 class LUEvacuateNode(LogicalUnit):
6019   """Relocate the secondary instances from a node.
6020
6021   """
6022   HPATH = "node-evacuate"
6023   HTYPE = constants.HTYPE_NODE
6024   _OP_REQP = ["node_name"]
6025   REQ_BGL = False
6026
6027   def CheckArguments(self):
6028     if not hasattr(self.op, "remote_node"):
6029       self.op.remote_node = None
6030     if not hasattr(self.op, "iallocator"):
6031       self.op.iallocator = None
6032
6033     TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6034                                   self.op.remote_node,
6035                                   self.op.iallocator)
6036
6037   def ExpandNames(self):
6038     self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
6039     if self.op.node_name is None:
6040       raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
6041
6042     self.needed_locks = {}
6043
6044     # Declare node locks
6045     if self.op.iallocator is not None:
6046       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6047
6048     elif self.op.remote_node is not None:
6049       remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6050       if remote_node is None:
6051         raise errors.OpPrereqError("Node '%s' not known" %
6052                                    self.op.remote_node)
6053
6054       self.op.remote_node = remote_node
6055
6056       # Warning: do not remove the locking of the new secondary here
6057       # unless DRBD8.AddChildren is changed to work in parallel;
6058       # currently it doesn't since parallel invocations of
6059       # FindUnusedMinor will conflict
6060       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6061       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6062
6063     else:
6064       raise errors.OpPrereqError("Invalid parameters")
6065
6066     # Create tasklets for replacing disks for all secondary instances on this
6067     # node
6068     names = []
6069     tasklets = []
6070
6071     for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6072       logging.debug("Replacing disks for instance %s", inst.name)
6073       names.append(inst.name)
6074
6075       replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6076                                 self.op.iallocator, self.op.remote_node, [])
6077       tasklets.append(replacer)
6078
6079     self.tasklets = tasklets
6080     self.instance_names = names
6081
6082     # Declare instance locks
6083     self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6084
6085   def DeclareLocks(self, level):
6086     # If we're not already locking all nodes in the set we have to declare the
6087     # instance's primary/secondary nodes.
6088     if (level == locking.LEVEL_NODE and
6089         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6090       self._LockInstancesNodes()
6091
6092   def BuildHooksEnv(self):
6093     """Build hooks env.
6094
6095     This runs on the master, the primary and all the secondaries.
6096
6097     """
6098     env = {
6099       "NODE_NAME": self.op.node_name,
6100       }
6101
6102     nl = [self.cfg.GetMasterNode()]
6103
6104     if self.op.remote_node is not None:
6105       env["NEW_SECONDARY"] = self.op.remote_node
6106       nl.append(self.op.remote_node)
6107
6108     return (env, nl, nl)
6109
6110
6111 class TLReplaceDisks(Tasklet):
6112   """Replaces disks for an instance.
6113
6114   Note: Locking is not within the scope of this class.
6115
6116   """
6117   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6118                disks):
6119     """Initializes this class.
6120
6121     """
6122     Tasklet.__init__(self, lu)
6123
6124     # Parameters
6125     self.instance_name = instance_name
6126     self.mode = mode
6127     self.iallocator_name = iallocator_name
6128     self.remote_node = remote_node
6129     self.disks = disks
6130
6131     # Runtime data
6132     self.instance = None
6133     self.new_node = None
6134     self.target_node = None
6135     self.other_node = None
6136     self.remote_node_info = None
6137     self.node_secondary_ip = None
6138
6139   @staticmethod
6140   def CheckArguments(mode, remote_node, iallocator):
6141     """Helper function for users of this class.
6142
6143     """
6144     # check for valid parameter combination
6145     if mode == constants.REPLACE_DISK_CHG:
6146       if remote_node is None and iallocator is None:
6147         raise errors.OpPrereqError("When changing the secondary either an"
6148                                    " iallocator script must be used or the"
6149                                    " new node given")
6150
6151       if remote_node is not None and iallocator is not None:
6152         raise errors.OpPrereqError("Give either the iallocator or the new"
6153                                    " secondary, not both")
6154
6155     elif remote_node is not None or iallocator is not None:
6156       # Not replacing the secondary
6157       raise errors.OpPrereqError("The iallocator and new node options can"
6158                                  " only be used when changing the"
6159                                  " secondary node")
6160
6161   @staticmethod
6162   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6163     """Compute a new secondary node using an IAllocator.
6164
6165     """
6166     ial = IAllocator(lu.cfg, lu.rpc,
6167                      mode=constants.IALLOCATOR_MODE_RELOC,
6168                      name=instance_name,
6169                      relocate_from=relocate_from)
6170
6171     ial.Run(iallocator_name)
6172
6173     if not ial.success:
6174       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6175                                  " %s" % (iallocator_name, ial.info))
6176
6177     if len(ial.nodes) != ial.required_nodes:
6178       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6179                                  " of nodes (%s), required %s" %
6180                                  (len(ial.nodes), ial.required_nodes))
6181
6182     remote_node_name = ial.nodes[0]
6183
6184     lu.LogInfo("Selected new secondary for instance '%s': %s",
6185                instance_name, remote_node_name)
6186
6187     return remote_node_name
6188
6189   def _FindFaultyDisks(self, node_name):
6190     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6191                                     node_name, True)
6192
6193   def CheckPrereq(self):
6194     """Check prerequisites.
6195
6196     This checks that the instance is in the cluster.
6197
6198     """
6199     self.instance = self.cfg.GetInstanceInfo(self.instance_name)
6200     assert self.instance is not None, \
6201       "Cannot retrieve locked instance %s" % self.instance_name
6202
6203     if self.instance.disk_template != constants.DT_DRBD8:
6204       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6205                                  " instances")
6206
6207     if len(self.instance.secondary_nodes) != 1:
6208       raise errors.OpPrereqError("The instance has a strange layout,"
6209                                  " expected one secondary but found %d" %
6210                                  len(self.instance.secondary_nodes))
6211
6212     secondary_node = self.instance.secondary_nodes[0]
6213
6214     if self.iallocator_name is None:
6215       remote_node = self.remote_node
6216     else:
6217       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6218                                        self.instance.name, secondary_node)
6219
6220     if remote_node is not None:
6221       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6222       assert self.remote_node_info is not None, \
6223         "Cannot retrieve locked node %s" % remote_node
6224     else:
6225       self.remote_node_info = None
6226
6227     if remote_node == self.instance.primary_node:
6228       raise errors.OpPrereqError("The specified node is the primary node of"
6229                                  " the instance.")
6230
6231     if remote_node == secondary_node:
6232       raise errors.OpPrereqError("The specified node is already the"
6233                                  " secondary node of the instance.")
6234
6235     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6236                                     constants.REPLACE_DISK_CHG):
6237       raise errors.OpPrereqError("Cannot specify disks to be replaced")
6238
6239     if self.mode == constants.REPLACE_DISK_AUTO:
6240       faulty_primary = self._FindFaultyDisks(self.instance.primary_node)
6241       faulty_secondary = self._FindFaultyDisks(secondary_node)
6242
6243       if faulty_primary and faulty_secondary:
6244         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6245                                    " one node and can not be repaired"
6246                                    " automatically" % self.instance_name)
6247
6248       if faulty_primary:
6249         self.disks = faulty_primary
6250         self.target_node = self.instance.primary_node
6251         self.other_node = secondary_node
6252         check_nodes = [self.target_node, self.other_node]
6253       elif faulty_secondary:
6254         self.disks = faulty_secondary
6255         self.target_node = secondary_node
6256         self.other_node = self.instance.primary_node
6257         check_nodes = [self.target_node, self.other_node]
6258       else:
6259         self.disks = []
6260         check_nodes = []
6261
6262     else:
6263       # Non-automatic modes
6264       if self.mode == constants.REPLACE_DISK_PRI:
6265         self.target_node = self.instance.primary_node
6266         self.other_node = secondary_node
6267         check_nodes = [self.target_node, self.other_node]
6268
6269       elif self.mode == constants.REPLACE_DISK_SEC:
6270         self.target_node = secondary_node
6271         self.other_node = self.instance.primary_node
6272         check_nodes = [self.target_node, self.other_node]
6273
6274       elif self.mode == constants.REPLACE_DISK_CHG:
6275         self.new_node = remote_node
6276         self.other_node = self.instance.primary_node
6277         self.target_node = secondary_node
6278         check_nodes = [self.new_node, self.other_node]
6279
6280         _CheckNodeNotDrained(self.lu, remote_node)
6281
6282       else:
6283         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6284                                      self.mode)
6285
6286       # If not specified all disks should be replaced
6287       if not self.disks:
6288         self.disks = range(len(self.instance.disks))
6289
6290     for node in check_nodes:
6291       _CheckNodeOnline(self.lu, node)
6292
6293     # Check whether disks are valid
6294     for disk_idx in self.disks:
6295       self.instance.FindDisk(disk_idx)
6296
6297     # Get secondary node IP addresses
6298     node_2nd_ip = {}
6299
6300     for node_name in [self.target_node, self.other_node, self.new_node]:
6301       if node_name is not None:
6302         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6303
6304     self.node_secondary_ip = node_2nd_ip
6305
6306   def Exec(self, feedback_fn):
6307     """Execute disk replacement.
6308
6309     This dispatches the disk replacement to the appropriate handler.
6310
6311     """
6312     if not self.disks:
6313       feedback_fn("No disks need replacement")
6314       return
6315
6316     feedback_fn("Replacing disk(s) %s for %s" %
6317                 (", ".join([str(i) for i in self.disks]), self.instance.name))
6318
6319     activate_disks = (not self.instance.admin_up)
6320
6321     # Activate the instance disks if we're replacing them on a down instance
6322     if activate_disks:
6323       _StartInstanceDisks(self.lu, self.instance, True)
6324
6325     try:
6326       # Should we replace the secondary node?
6327       if self.new_node is not None:
6328         return self._ExecDrbd8Secondary()
6329       else:
6330         return self._ExecDrbd8DiskOnly()
6331
6332     finally:
6333       # Deactivate the instance disks if we're replacing them on a down instance
6334       if activate_disks:
6335         _SafeShutdownInstanceDisks(self.lu, self.instance)
6336
6337   def _CheckVolumeGroup(self, nodes):
6338     self.lu.LogInfo("Checking volume groups")
6339
6340     vgname = self.cfg.GetVGName()
6341
6342     # Make sure volume group exists on all involved nodes
6343     results = self.rpc.call_vg_list(nodes)
6344     if not results:
6345       raise errors.OpExecError("Can't list volume groups on the nodes")
6346
6347     for node in nodes:
6348       res = results[node]
6349       res.Raise("Error checking node %s" % node)
6350       if vgname not in res.payload:
6351         raise errors.OpExecError("Volume group '%s' not found on node %s" %
6352                                  (vgname, node))
6353
6354   def _CheckDisksExistence(self, nodes):
6355     # Check disk existence
6356     for idx, dev in enumerate(self.instance.disks):
6357       if idx not in self.disks:
6358         continue
6359
6360       for node in nodes:
6361         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6362         self.cfg.SetDiskID(dev, node)
6363
6364         result = self.rpc.call_blockdev_find(node, dev)
6365
6366         msg = result.fail_msg
6367         if msg or not result.payload:
6368           if not msg:
6369             msg = "disk not found"
6370           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6371                                    (idx, node, msg))
6372
6373   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6374     for idx, dev in enumerate(self.instance.disks):
6375       if idx not in self.disks:
6376         continue
6377
6378       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6379                       (idx, node_name))
6380
6381       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6382                                    ldisk=ldisk):
6383         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6384                                  " replace disks for instance %s" %
6385                                  (node_name, self.instance.name))
6386
6387   def _CreateNewStorage(self, node_name):
6388     vgname = self.cfg.GetVGName()
6389     iv_names = {}
6390
6391     for idx, dev in enumerate(self.instance.disks):
6392       if idx not in self.disks:
6393         continue
6394
6395       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6396
6397       self.cfg.SetDiskID(dev, node_name)
6398
6399       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6400       names = _GenerateUniqueNames(self.lu, lv_names)
6401
6402       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6403                              logical_id=(vgname, names[0]))
6404       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6405                              logical_id=(vgname, names[1]))
6406
6407       new_lvs = [lv_data, lv_meta]
6408       old_lvs = dev.children
6409       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6410
6411       # we pass force_create=True to force the LVM creation
6412       for new_lv in new_lvs:
6413         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6414                         _GetInstanceInfoText(self.instance), False)
6415
6416     return iv_names
6417
6418   def _CheckDevices(self, node_name, iv_names):
6419     for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
6420       self.cfg.SetDiskID(dev, node_name)
6421
6422       result = self.rpc.call_blockdev_find(node_name, dev)
6423
6424       msg = result.fail_msg
6425       if msg or not result.payload:
6426         if not msg:
6427           msg = "disk not found"
6428         raise errors.OpExecError("Can't find DRBD device %s: %s" %
6429                                  (name, msg))
6430
6431       if result.payload.is_degraded:
6432         raise errors.OpExecError("DRBD device %s is degraded!" % name)
6433
6434   def _RemoveOldStorage(self, node_name, iv_names):
6435     for name, (dev, old_lvs, _) in iv_names.iteritems():
6436       self.lu.LogInfo("Remove logical volumes for %s" % name)
6437
6438       for lv in old_lvs:
6439         self.cfg.SetDiskID(lv, node_name)
6440
6441         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6442         if msg:
6443           self.lu.LogWarning("Can't remove old LV: %s" % msg,
6444                              hint="remove unused LVs manually")
6445
6446   def _ExecDrbd8DiskOnly(self):
6447     """Replace a disk on the primary or secondary for DRBD 8.
6448
6449     The algorithm for replace is quite complicated:
6450
6451       1. for each disk to be replaced:
6452
6453         1. create new LVs on the target node with unique names
6454         1. detach old LVs from the drbd device
6455         1. rename old LVs to name_replaced.<time_t>
6456         1. rename new LVs to old LVs
6457         1. attach the new LVs (with the old names now) to the drbd device
6458
6459       1. wait for sync across all devices
6460
6461       1. for each modified disk:
6462
6463         1. remove old LVs (which have the name name_replaces.<time_t>)
6464
6465     Failures are not very well handled.
6466
6467     """
6468     steps_total = 6
6469
6470     # Step: check device activation
6471     self.lu.LogStep(1, steps_total, "Check device existence")
6472     self._CheckDisksExistence([self.other_node, self.target_node])
6473     self._CheckVolumeGroup([self.target_node, self.other_node])
6474
6475     # Step: check other node consistency
6476     self.lu.LogStep(2, steps_total, "Check peer consistency")
6477     self._CheckDisksConsistency(self.other_node,
6478                                 self.other_node == self.instance.primary_node,
6479                                 False)
6480
6481     # Step: create new storage
6482     self.lu.LogStep(3, steps_total, "Allocate new storage")
6483     iv_names = self._CreateNewStorage(self.target_node)
6484
6485     # Step: for each lv, detach+rename*2+attach
6486     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6487     for dev, old_lvs, new_lvs in iv_names.itervalues():
6488       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6489
6490       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
6491                                                      old_lvs)
6492       result.Raise("Can't detach drbd from local storage on node"
6493                    " %s for device %s" % (self.target_node, dev.iv_name))
6494       #dev.children = []
6495       #cfg.Update(instance)
6496
6497       # ok, we created the new LVs, so now we know we have the needed
6498       # storage; as such, we proceed on the target node to rename
6499       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6500       # using the assumption that logical_id == physical_id (which in
6501       # turn is the unique_id on that node)
6502
6503       # FIXME(iustin): use a better name for the replaced LVs
6504       temp_suffix = int(time.time())
6505       ren_fn = lambda d, suff: (d.physical_id[0],
6506                                 d.physical_id[1] + "_replaced-%s" % suff)
6507
6508       # Build the rename list based on what LVs exist on the node
6509       rename_old_to_new = []
6510       for to_ren in old_lvs:
6511         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6512         if not result.fail_msg and result.payload:
6513           # device exists
6514           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6515
6516       self.lu.LogInfo("Renaming the old LVs on the target node")
6517       result = self.rpc.call_blockdev_rename(self.target_node,
6518                                              rename_old_to_new)
6519       result.Raise("Can't rename old LVs on node %s" % self.target_node)
6520
6521       # Now we rename the new LVs to the old LVs
6522       self.lu.LogInfo("Renaming the new LVs on the target node")
6523       rename_new_to_old = [(new, old.physical_id)
6524                            for old, new in zip(old_lvs, new_lvs)]
6525       result = self.rpc.call_blockdev_rename(self.target_node,
6526                                              rename_new_to_old)
6527       result.Raise("Can't rename new LVs on node %s" % self.target_node)
6528
6529       for old, new in zip(old_lvs, new_lvs):
6530         new.logical_id = old.logical_id
6531         self.cfg.SetDiskID(new, self.target_node)
6532
6533       for disk in old_lvs:
6534         disk.logical_id = ren_fn(disk, temp_suffix)
6535         self.cfg.SetDiskID(disk, self.target_node)
6536
6537       # Now that the new lvs have the old name, we can add them to the device
6538       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6539       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
6540                                                   new_lvs)
6541       msg = result.fail_msg
6542       if msg:
6543         for new_lv in new_lvs:
6544           msg2 = self.rpc.call_blockdev_remove(self.target_node,
6545                                                new_lv).fail_msg
6546           if msg2:
6547             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6548                                hint=("cleanup manually the unused logical"
6549                                      "volumes"))
6550         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6551
6552       dev.children = new_lvs
6553
6554       self.cfg.Update(self.instance)
6555
6556     # Wait for sync
6557     # This can fail as the old devices are degraded and _WaitForSync
6558     # does a combined result over all disks, so we don't check its return value
6559     self.lu.LogStep(5, steps_total, "Sync devices")
6560     _WaitForSync(self.lu, self.instance, unlock=True)
6561
6562     # Check all devices manually
6563     self._CheckDevices(self.instance.primary_node, iv_names)
6564
6565     # Step: remove old storage
6566     self.lu.LogStep(6, steps_total, "Removing old storage")
6567     self._RemoveOldStorage(self.target_node, iv_names)
6568
6569   def _ExecDrbd8Secondary(self):
6570     """Replace the secondary node for DRBD 8.
6571
6572     The algorithm for replace is quite complicated:
6573       - for all disks of the instance:
6574         - create new LVs on the new node with same names
6575         - shutdown the drbd device on the old secondary
6576         - disconnect the drbd network on the primary
6577         - create the drbd device on the new secondary
6578         - network attach the drbd on the primary, using an artifice:
6579           the drbd code for Attach() will connect to the network if it
6580           finds a device which is connected to the good local disks but
6581           not network enabled
6582       - wait for sync across all devices
6583       - remove all disks from the old secondary
6584
6585     Failures are not very well handled.
6586
6587     """
6588     steps_total = 6
6589
6590     # Step: check device activation
6591     self.lu.LogStep(1, steps_total, "Check device existence")
6592     self._CheckDisksExistence([self.instance.primary_node])
6593     self._CheckVolumeGroup([self.instance.primary_node])
6594
6595     # Step: check other node consistency
6596     self.lu.LogStep(2, steps_total, "Check peer consistency")
6597     self._CheckDisksConsistency(self.instance.primary_node, True, True)
6598
6599     # Step: create new storage
6600     self.lu.LogStep(3, steps_total, "Allocate new storage")
6601     for idx, dev in enumerate(self.instance.disks):
6602       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
6603                       (self.new_node, idx))
6604       # we pass force_create=True to force LVM creation
6605       for new_lv in dev.children:
6606         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
6607                         _GetInstanceInfoText(self.instance), False)
6608
6609     # Step 4: dbrd minors and drbd setups changes
6610     # after this, we must manually remove the drbd minors on both the
6611     # error and the success paths
6612     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6613     minors = self.cfg.AllocateDRBDMinor([self.new_node
6614                                          for dev in self.instance.disks],
6615                                         self.instance.name)
6616     logging.debug("Allocated minors %r" % (minors,))
6617
6618     iv_names = {}
6619     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
6620       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
6621                       (self.new_node, idx))
6622       # create new devices on new_node; note that we create two IDs:
6623       # one without port, so the drbd will be activated without
6624       # networking information on the new node at this stage, and one
6625       # with network, for the latter activation in step 4
6626       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
6627       if self.instance.primary_node == o_node1:
6628         p_minor = o_minor1
6629       else:
6630         p_minor = o_minor2
6631
6632       new_alone_id = (self.instance.primary_node, self.new_node, None,
6633                       p_minor, new_minor, o_secret)
6634       new_net_id = (self.instance.primary_node, self.new_node, o_port,
6635                     p_minor, new_minor, o_secret)
6636
6637       iv_names[idx] = (dev, dev.children, new_net_id)
6638       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
6639                     new_net_id)
6640       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
6641                               logical_id=new_alone_id,
6642                               children=dev.children,
6643                               size=dev.size)
6644       try:
6645         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
6646                               _GetInstanceInfoText(self.instance), False)
6647       except errors.GenericError:
6648         self.cfg.ReleaseDRBDMinors(self.instance.name)
6649         raise
6650
6651     # We have new devices, shutdown the drbd on the old secondary
6652     for idx, dev in enumerate(self.instance.disks):
6653       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
6654       self.cfg.SetDiskID(dev, self.target_node)
6655       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
6656       if msg:
6657         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
6658                            "node: %s" % (idx, msg),
6659                            hint=("Please cleanup this device manually as"
6660                                  " soon as possible"))
6661
6662     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
6663     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
6664                                                self.node_secondary_ip,
6665                                                self.instance.disks)\
6666                                               [self.instance.primary_node]
6667
6668     msg = result.fail_msg
6669     if msg:
6670       # detaches didn't succeed (unlikely)
6671       self.cfg.ReleaseDRBDMinors(self.instance.name)
6672       raise errors.OpExecError("Can't detach the disks from the network on"
6673                                " old node: %s" % (msg,))
6674
6675     # if we managed to detach at least one, we update all the disks of
6676     # the instance to point to the new secondary
6677     self.lu.LogInfo("Updating instance configuration")
6678     for dev, _, new_logical_id in iv_names.itervalues():
6679       dev.logical_id = new_logical_id
6680       self.cfg.SetDiskID(dev, self.instance.primary_node)
6681
6682     self.cfg.Update(self.instance)
6683
6684     # and now perform the drbd attach
6685     self.lu.LogInfo("Attaching primary drbds to new secondary"
6686                     " (standalone => connected)")
6687     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
6688                                             self.new_node],
6689                                            self.node_secondary_ip,
6690                                            self.instance.disks,
6691                                            self.instance.name,
6692                                            False)
6693     for to_node, to_result in result.items():
6694       msg = to_result.fail_msg
6695       if msg:
6696         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
6697                            to_node, msg,
6698                            hint=("please do a gnt-instance info to see the"
6699                                  " status of disks"))
6700
6701     # Wait for sync
6702     # This can fail as the old devices are degraded and _WaitForSync
6703     # does a combined result over all disks, so we don't check its return value
6704     self.lu.LogStep(5, steps_total, "Sync devices")
6705     _WaitForSync(self.lu, self.instance, unlock=True)
6706
6707     # Check all devices manually
6708     self._CheckDevices(self.instance.primary_node, iv_names)
6709
6710     # Step: remove old storage
6711     self.lu.LogStep(6, steps_total, "Removing old storage")
6712     self._RemoveOldStorage(self.target_node, iv_names)
6713
6714
6715 class LURepairNodeStorage(NoHooksLU):
6716   """Repairs the volume group on a node.
6717
6718   """
6719   _OP_REQP = ["node_name"]
6720   REQ_BGL = False
6721
6722   def CheckArguments(self):
6723     node_name = self.cfg.ExpandNodeName(self.op.node_name)
6724     if node_name is None:
6725       raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
6726
6727     self.op.node_name = node_name
6728
6729   def ExpandNames(self):
6730     self.needed_locks = {
6731       locking.LEVEL_NODE: [self.op.node_name],
6732       }
6733
6734   def _CheckFaultyDisks(self, instance, node_name):
6735     if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
6736                                 node_name, True):
6737       raise errors.OpPrereqError("Instance '%s' has faulty disks on"
6738                                  " node '%s'" % (instance.name, node_name))
6739
6740   def CheckPrereq(self):
6741     """Check prerequisites.
6742
6743     """
6744     storage_type = self.op.storage_type
6745
6746     if (constants.SO_FIX_CONSISTENCY not in
6747         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
6748       raise errors.OpPrereqError("Storage units of type '%s' can not be"
6749                                  " repaired" % storage_type)
6750
6751     # Check whether any instance on this node has faulty disks
6752     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
6753       check_nodes = set(inst.all_nodes)
6754       check_nodes.discard(self.op.node_name)
6755       for inst_node_name in check_nodes:
6756         self._CheckFaultyDisks(inst, inst_node_name)
6757
6758   def Exec(self, feedback_fn):
6759     feedback_fn("Repairing storage unit '%s' on %s ..." %
6760                 (self.op.name, self.op.node_name))
6761
6762     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
6763     result = self.rpc.call_storage_execute(self.op.node_name,
6764                                            self.op.storage_type, st_args,
6765                                            self.op.name,
6766                                            constants.SO_FIX_CONSISTENCY)
6767     result.Raise("Failed to repair storage unit '%s' on %s" %
6768                  (self.op.name, self.op.node_name))
6769
6770
6771 class LUGrowDisk(LogicalUnit):
6772   """Grow a disk of an instance.
6773
6774   """
6775   HPATH = "disk-grow"
6776   HTYPE = constants.HTYPE_INSTANCE
6777   _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
6778   REQ_BGL = False
6779
6780   def ExpandNames(self):
6781     self._ExpandAndLockInstance()
6782     self.needed_locks[locking.LEVEL_NODE] = []
6783     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6784
6785   def DeclareLocks(self, level):
6786     if level == locking.LEVEL_NODE:
6787       self._LockInstancesNodes()
6788
6789   def BuildHooksEnv(self):
6790     """Build hooks env.
6791
6792     This runs on the master, the primary and all the secondaries.
6793
6794     """
6795     env = {
6796       "DISK": self.op.disk,
6797       "AMOUNT": self.op.amount,
6798       }
6799     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6800     nl = [
6801       self.cfg.GetMasterNode(),
6802       self.instance.primary_node,
6803       ]
6804     return env, nl, nl
6805
6806   def CheckPrereq(self):
6807     """Check prerequisites.
6808
6809     This checks that the instance is in the cluster.
6810
6811     """
6812     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6813     assert instance is not None, \
6814       "Cannot retrieve locked instance %s" % self.op.instance_name
6815     nodenames = list(instance.all_nodes)
6816     for node in nodenames:
6817       _CheckNodeOnline(self, node)
6818
6819
6820     self.instance = instance
6821
6822     if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
6823       raise errors.OpPrereqError("Instance's disk layout does not support"
6824                                  " growing.")
6825
6826     self.disk = instance.FindDisk(self.op.disk)
6827
6828     nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6829                                        instance.hypervisor)
6830     for node in nodenames:
6831       info = nodeinfo[node]
6832       info.Raise("Cannot get current information from node %s" % node)
6833       vg_free = info.payload.get('vg_free', None)
6834       if not isinstance(vg_free, int):
6835         raise errors.OpPrereqError("Can't compute free disk space on"
6836                                    " node %s" % node)
6837       if self.op.amount > vg_free:
6838         raise errors.OpPrereqError("Not enough disk space on target node %s:"
6839                                    " %d MiB available, %d MiB required" %
6840                                    (node, vg_free, self.op.amount))
6841
6842   def Exec(self, feedback_fn):
6843     """Execute disk grow.
6844
6845     """
6846     instance = self.instance
6847     disk = self.disk
6848     for node in instance.all_nodes:
6849       self.cfg.SetDiskID(disk, node)
6850       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
6851       result.Raise("Grow request failed to node %s" % node)
6852     disk.RecordGrow(self.op.amount)
6853     self.cfg.Update(instance)
6854     if self.op.wait_for_sync:
6855       disk_abort = not _WaitForSync(self, instance)
6856       if disk_abort:
6857         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
6858                              " status.\nPlease check the instance.")
6859
6860
6861 class LUQueryInstanceData(NoHooksLU):
6862   """Query runtime instance data.
6863
6864   """
6865   _OP_REQP = ["instances", "static"]
6866   REQ_BGL = False
6867
6868   def ExpandNames(self):
6869     self.needed_locks = {}
6870     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6871
6872     if not isinstance(self.op.instances, list):
6873       raise errors.OpPrereqError("Invalid argument type 'instances'")
6874
6875     if self.op.instances:
6876       self.wanted_names = []
6877       for name in self.op.instances:
6878         full_name = self.cfg.ExpandInstanceName(name)
6879         if full_name is None:
6880           raise errors.OpPrereqError("Instance '%s' not known" % name)
6881         self.wanted_names.append(full_name)
6882       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
6883     else:
6884       self.wanted_names = None
6885       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
6886
6887     self.needed_locks[locking.LEVEL_NODE] = []
6888     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6889
6890   def DeclareLocks(self, level):
6891     if level == locking.LEVEL_NODE:
6892       self._LockInstancesNodes()
6893
6894   def CheckPrereq(self):
6895     """Check prerequisites.
6896
6897     This only checks the optional instance list against the existing names.
6898
6899     """
6900     if self.wanted_names is None:
6901       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
6902
6903     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
6904                              in self.wanted_names]
6905     return
6906
6907   def _ComputeBlockdevStatus(self, node, instance_name, dev):
6908     """Returns the status of a block device
6909
6910     """
6911     if self.op.static or not node:
6912       return None
6913
6914     self.cfg.SetDiskID(dev, node)
6915
6916     result = self.rpc.call_blockdev_find(node, dev)
6917     if result.offline:
6918       return None
6919
6920     result.Raise("Can't compute disk status for %s" % instance_name)
6921
6922     status = result.payload
6923     if status is None:
6924       return None
6925
6926     return (status.dev_path, status.major, status.minor,
6927             status.sync_percent, status.estimated_time,
6928             status.is_degraded, status.ldisk_status)
6929
6930   def _ComputeDiskStatus(self, instance, snode, dev):
6931     """Compute block device status.
6932
6933     """
6934     if dev.dev_type in constants.LDS_DRBD:
6935       # we change the snode then (otherwise we use the one passed in)
6936       if dev.logical_id[0] == instance.primary_node:
6937         snode = dev.logical_id[1]
6938       else:
6939         snode = dev.logical_id[0]
6940
6941     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
6942                                               instance.name, dev)
6943     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
6944
6945     if dev.children:
6946       dev_children = [self._ComputeDiskStatus(instance, snode, child)
6947                       for child in dev.children]
6948     else:
6949       dev_children = []
6950
6951     data = {
6952       "iv_name": dev.iv_name,
6953       "dev_type": dev.dev_type,
6954       "logical_id": dev.logical_id,
6955       "physical_id": dev.physical_id,
6956       "pstatus": dev_pstatus,
6957       "sstatus": dev_sstatus,
6958       "children": dev_children,
6959       "mode": dev.mode,
6960       "size": dev.size,
6961       }
6962
6963     return data
6964
6965   def Exec(self, feedback_fn):
6966     """Gather and return data"""
6967     result = {}
6968
6969     cluster = self.cfg.GetClusterInfo()
6970
6971     for instance in self.wanted_instances:
6972       if not self.op.static:
6973         remote_info = self.rpc.call_instance_info(instance.primary_node,
6974                                                   instance.name,
6975                                                   instance.hypervisor)
6976         remote_info.Raise("Error checking node %s" % instance.primary_node)
6977         remote_info = remote_info.payload
6978         if remote_info and "state" in remote_info:
6979           remote_state = "up"
6980         else:
6981           remote_state = "down"
6982       else:
6983         remote_state = None
6984       if instance.admin_up:
6985         config_state = "up"
6986       else:
6987         config_state = "down"
6988
6989       disks = [self._ComputeDiskStatus(instance, None, device)
6990                for device in instance.disks]
6991
6992       idict = {
6993         "name": instance.name,
6994         "config_state": config_state,
6995         "run_state": remote_state,
6996         "pnode": instance.primary_node,
6997         "snodes": instance.secondary_nodes,
6998         "os": instance.os,
6999         # this happens to be the same format used for hooks
7000         "nics": _NICListToTuple(self, instance.nics),
7001         "disks": disks,
7002         "hypervisor": instance.hypervisor,
7003         "network_port": instance.network_port,
7004         "hv_instance": instance.hvparams,
7005         "hv_actual": cluster.FillHV(instance),
7006         "be_instance": instance.beparams,
7007         "be_actual": cluster.FillBE(instance),
7008         "serial_no": instance.serial_no,
7009         "mtime": instance.mtime,
7010         "ctime": instance.ctime,
7011         "uuid": instance.uuid,
7012         }
7013
7014       result[instance.name] = idict
7015
7016     return result
7017
7018
7019 class LUSetInstanceParams(LogicalUnit):
7020   """Modifies an instances's parameters.
7021
7022   """
7023   HPATH = "instance-modify"
7024   HTYPE = constants.HTYPE_INSTANCE
7025   _OP_REQP = ["instance_name"]
7026   REQ_BGL = False
7027
7028   def CheckArguments(self):
7029     if not hasattr(self.op, 'nics'):
7030       self.op.nics = []
7031     if not hasattr(self.op, 'disks'):
7032       self.op.disks = []
7033     if not hasattr(self.op, 'beparams'):
7034       self.op.beparams = {}
7035     if not hasattr(self.op, 'hvparams'):
7036       self.op.hvparams = {}
7037     self.op.force = getattr(self.op, "force", False)
7038     if not (self.op.nics or self.op.disks or
7039             self.op.hvparams or self.op.beparams):
7040       raise errors.OpPrereqError("No changes submitted")
7041
7042     # Disk validation
7043     disk_addremove = 0
7044     for disk_op, disk_dict in self.op.disks:
7045       if disk_op == constants.DDM_REMOVE:
7046         disk_addremove += 1
7047         continue
7048       elif disk_op == constants.DDM_ADD:
7049         disk_addremove += 1
7050       else:
7051         if not isinstance(disk_op, int):
7052           raise errors.OpPrereqError("Invalid disk index")
7053         if not isinstance(disk_dict, dict):
7054           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7055           raise errors.OpPrereqError(msg)
7056
7057       if disk_op == constants.DDM_ADD:
7058         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7059         if mode not in constants.DISK_ACCESS_SET:
7060           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
7061         size = disk_dict.get('size', None)
7062         if size is None:
7063           raise errors.OpPrereqError("Required disk parameter size missing")
7064         try:
7065           size = int(size)
7066         except ValueError, err:
7067           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7068                                      str(err))
7069         disk_dict['size'] = size
7070       else:
7071         # modification of disk
7072         if 'size' in disk_dict:
7073           raise errors.OpPrereqError("Disk size change not possible, use"
7074                                      " grow-disk")
7075
7076     if disk_addremove > 1:
7077       raise errors.OpPrereqError("Only one disk add or remove operation"
7078                                  " supported at a time")
7079
7080     # NIC validation
7081     nic_addremove = 0
7082     for nic_op, nic_dict in self.op.nics:
7083       if nic_op == constants.DDM_REMOVE:
7084         nic_addremove += 1
7085         continue
7086       elif nic_op == constants.DDM_ADD:
7087         nic_addremove += 1
7088       else:
7089         if not isinstance(nic_op, int):
7090           raise errors.OpPrereqError("Invalid nic index")
7091         if not isinstance(nic_dict, dict):
7092           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7093           raise errors.OpPrereqError(msg)
7094
7095       # nic_dict should be a dict
7096       nic_ip = nic_dict.get('ip', None)
7097       if nic_ip is not None:
7098         if nic_ip.lower() == constants.VALUE_NONE:
7099           nic_dict['ip'] = None
7100         else:
7101           if not utils.IsValidIP(nic_ip):
7102             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
7103
7104       nic_bridge = nic_dict.get('bridge', None)
7105       nic_link = nic_dict.get('link', None)
7106       if nic_bridge and nic_link:
7107         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7108                                    " at the same time")
7109       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7110         nic_dict['bridge'] = None
7111       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7112         nic_dict['link'] = None
7113
7114       if nic_op == constants.DDM_ADD:
7115         nic_mac = nic_dict.get('mac', None)
7116         if nic_mac is None:
7117           nic_dict['mac'] = constants.VALUE_AUTO
7118
7119       if 'mac' in nic_dict:
7120         nic_mac = nic_dict['mac']
7121         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7122           if not utils.IsValidMac(nic_mac):
7123             raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
7124         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7125           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7126                                      " modifying an existing nic")
7127
7128     if nic_addremove > 1:
7129       raise errors.OpPrereqError("Only one NIC add or remove operation"
7130                                  " supported at a time")
7131
7132   def ExpandNames(self):
7133     self._ExpandAndLockInstance()
7134     self.needed_locks[locking.LEVEL_NODE] = []
7135     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7136
7137   def DeclareLocks(self, level):
7138     if level == locking.LEVEL_NODE:
7139       self._LockInstancesNodes()
7140
7141   def BuildHooksEnv(self):
7142     """Build hooks env.
7143
7144     This runs on the master, primary and secondaries.
7145
7146     """
7147     args = dict()
7148     if constants.BE_MEMORY in self.be_new:
7149       args['memory'] = self.be_new[constants.BE_MEMORY]
7150     if constants.BE_VCPUS in self.be_new:
7151       args['vcpus'] = self.be_new[constants.BE_VCPUS]
7152     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7153     # information at all.
7154     if self.op.nics:
7155       args['nics'] = []
7156       nic_override = dict(self.op.nics)
7157       c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7158       for idx, nic in enumerate(self.instance.nics):
7159         if idx in nic_override:
7160           this_nic_override = nic_override[idx]
7161         else:
7162           this_nic_override = {}
7163         if 'ip' in this_nic_override:
7164           ip = this_nic_override['ip']
7165         else:
7166           ip = nic.ip
7167         if 'mac' in this_nic_override:
7168           mac = this_nic_override['mac']
7169         else:
7170           mac = nic.mac
7171         if idx in self.nic_pnew:
7172           nicparams = self.nic_pnew[idx]
7173         else:
7174           nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7175         mode = nicparams[constants.NIC_MODE]
7176         link = nicparams[constants.NIC_LINK]
7177         args['nics'].append((ip, mac, mode, link))
7178       if constants.DDM_ADD in nic_override:
7179         ip = nic_override[constants.DDM_ADD].get('ip', None)
7180         mac = nic_override[constants.DDM_ADD]['mac']
7181         nicparams = self.nic_pnew[constants.DDM_ADD]
7182         mode = nicparams[constants.NIC_MODE]
7183         link = nicparams[constants.NIC_LINK]
7184         args['nics'].append((ip, mac, mode, link))
7185       elif constants.DDM_REMOVE in nic_override:
7186         del args['nics'][-1]
7187
7188     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7189     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7190     return env, nl, nl
7191
7192   def _GetUpdatedParams(self, old_params, update_dict,
7193                         default_values, parameter_types):
7194     """Return the new params dict for the given params.
7195
7196     @type old_params: dict
7197     @param old_params: old parameters
7198     @type update_dict: dict
7199     @param update_dict: dict containing new parameter values,
7200                         or constants.VALUE_DEFAULT to reset the
7201                         parameter to its default value
7202     @type default_values: dict
7203     @param default_values: default values for the filled parameters
7204     @type parameter_types: dict
7205     @param parameter_types: dict mapping target dict keys to types
7206                             in constants.ENFORCEABLE_TYPES
7207     @rtype: (dict, dict)
7208     @return: (new_parameters, filled_parameters)
7209
7210     """
7211     params_copy = copy.deepcopy(old_params)
7212     for key, val in update_dict.iteritems():
7213       if val == constants.VALUE_DEFAULT:
7214         try:
7215           del params_copy[key]
7216         except KeyError:
7217           pass
7218       else:
7219         params_copy[key] = val
7220     utils.ForceDictType(params_copy, parameter_types)
7221     params_filled = objects.FillDict(default_values, params_copy)
7222     return (params_copy, params_filled)
7223
7224   def CheckPrereq(self):
7225     """Check prerequisites.
7226
7227     This only checks the instance list against the existing names.
7228
7229     """
7230     self.force = self.op.force
7231
7232     # checking the new params on the primary/secondary nodes
7233
7234     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7235     cluster = self.cluster = self.cfg.GetClusterInfo()
7236     assert self.instance is not None, \
7237       "Cannot retrieve locked instance %s" % self.op.instance_name
7238     pnode = instance.primary_node
7239     nodelist = list(instance.all_nodes)
7240
7241     # hvparams processing
7242     if self.op.hvparams:
7243       i_hvdict, hv_new = self._GetUpdatedParams(
7244                              instance.hvparams, self.op.hvparams,
7245                              cluster.hvparams[instance.hypervisor],
7246                              constants.HVS_PARAMETER_TYPES)
7247       # local check
7248       hypervisor.GetHypervisor(
7249         instance.hypervisor).CheckParameterSyntax(hv_new)
7250       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7251       self.hv_new = hv_new # the new actual values
7252       self.hv_inst = i_hvdict # the new dict (without defaults)
7253     else:
7254       self.hv_new = self.hv_inst = {}
7255
7256     # beparams processing
7257     if self.op.beparams:
7258       i_bedict, be_new = self._GetUpdatedParams(
7259                              instance.beparams, self.op.beparams,
7260                              cluster.beparams[constants.PP_DEFAULT],
7261                              constants.BES_PARAMETER_TYPES)
7262       self.be_new = be_new # the new actual values
7263       self.be_inst = i_bedict # the new dict (without defaults)
7264     else:
7265       self.be_new = self.be_inst = {}
7266
7267     self.warn = []
7268
7269     if constants.BE_MEMORY in self.op.beparams and not self.force:
7270       mem_check_list = [pnode]
7271       if be_new[constants.BE_AUTO_BALANCE]:
7272         # either we changed auto_balance to yes or it was from before
7273         mem_check_list.extend(instance.secondary_nodes)
7274       instance_info = self.rpc.call_instance_info(pnode, instance.name,
7275                                                   instance.hypervisor)
7276       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7277                                          instance.hypervisor)
7278       pninfo = nodeinfo[pnode]
7279       msg = pninfo.fail_msg
7280       if msg:
7281         # Assume the primary node is unreachable and go ahead
7282         self.warn.append("Can't get info from primary node %s: %s" %
7283                          (pnode,  msg))
7284       elif not isinstance(pninfo.payload.get('memory_free', None), int):
7285         self.warn.append("Node data from primary node %s doesn't contain"
7286                          " free memory information" % pnode)
7287       elif instance_info.fail_msg:
7288         self.warn.append("Can't get instance runtime information: %s" %
7289                         instance_info.fail_msg)
7290       else:
7291         if instance_info.payload:
7292           current_mem = int(instance_info.payload['memory'])
7293         else:
7294           # Assume instance not running
7295           # (there is a slight race condition here, but it's not very probable,
7296           # and we have no other way to check)
7297           current_mem = 0
7298         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7299                     pninfo.payload['memory_free'])
7300         if miss_mem > 0:
7301           raise errors.OpPrereqError("This change will prevent the instance"
7302                                      " from starting, due to %d MB of memory"
7303                                      " missing on its primary node" % miss_mem)
7304
7305       if be_new[constants.BE_AUTO_BALANCE]:
7306         for node, nres in nodeinfo.items():
7307           if node not in instance.secondary_nodes:
7308             continue
7309           msg = nres.fail_msg
7310           if msg:
7311             self.warn.append("Can't get info from secondary node %s: %s" %
7312                              (node, msg))
7313           elif not isinstance(nres.payload.get('memory_free', None), int):
7314             self.warn.append("Secondary node %s didn't return free"
7315                              " memory information" % node)
7316           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7317             self.warn.append("Not enough memory to failover instance to"
7318                              " secondary node %s" % node)
7319
7320     # NIC processing
7321     self.nic_pnew = {}
7322     self.nic_pinst = {}
7323     for nic_op, nic_dict in self.op.nics:
7324       if nic_op == constants.DDM_REMOVE:
7325         if not instance.nics:
7326           raise errors.OpPrereqError("Instance has no NICs, cannot remove")
7327         continue
7328       if nic_op != constants.DDM_ADD:
7329         # an existing nic
7330         if nic_op < 0 or nic_op >= len(instance.nics):
7331           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7332                                      " are 0 to %d" %
7333                                      (nic_op, len(instance.nics)))
7334         old_nic_params = instance.nics[nic_op].nicparams
7335         old_nic_ip = instance.nics[nic_op].ip
7336       else:
7337         old_nic_params = {}
7338         old_nic_ip = None
7339
7340       update_params_dict = dict([(key, nic_dict[key])
7341                                  for key in constants.NICS_PARAMETERS
7342                                  if key in nic_dict])
7343
7344       if 'bridge' in nic_dict:
7345         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7346
7347       new_nic_params, new_filled_nic_params = \
7348           self._GetUpdatedParams(old_nic_params, update_params_dict,
7349                                  cluster.nicparams[constants.PP_DEFAULT],
7350                                  constants.NICS_PARAMETER_TYPES)
7351       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7352       self.nic_pinst[nic_op] = new_nic_params
7353       self.nic_pnew[nic_op] = new_filled_nic_params
7354       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7355
7356       if new_nic_mode == constants.NIC_MODE_BRIDGED:
7357         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7358         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7359         if msg:
7360           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7361           if self.force:
7362             self.warn.append(msg)
7363           else:
7364             raise errors.OpPrereqError(msg)
7365       if new_nic_mode == constants.NIC_MODE_ROUTED:
7366         if 'ip' in nic_dict:
7367           nic_ip = nic_dict['ip']
7368         else:
7369           nic_ip = old_nic_ip
7370         if nic_ip is None:
7371           raise errors.OpPrereqError('Cannot set the nic ip to None'
7372                                      ' on a routed nic')
7373       if 'mac' in nic_dict:
7374         nic_mac = nic_dict['mac']
7375         if nic_mac is None:
7376           raise errors.OpPrereqError('Cannot set the nic mac to None')
7377         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7378           # otherwise generate the mac
7379           nic_dict['mac'] = self.cfg.GenerateMAC()
7380         else:
7381           # or validate/reserve the current one
7382           if self.cfg.IsMacInUse(nic_mac):
7383             raise errors.OpPrereqError("MAC address %s already in use"
7384                                        " in cluster" % nic_mac)
7385
7386     # DISK processing
7387     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7388       raise errors.OpPrereqError("Disk operations not supported for"
7389                                  " diskless instances")
7390     for disk_op, disk_dict in self.op.disks:
7391       if disk_op == constants.DDM_REMOVE:
7392         if len(instance.disks) == 1:
7393           raise errors.OpPrereqError("Cannot remove the last disk of"
7394                                      " an instance")
7395         ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7396         ins_l = ins_l[pnode]
7397         msg = ins_l.fail_msg
7398         if msg:
7399           raise errors.OpPrereqError("Can't contact node %s: %s" %
7400                                      (pnode, msg))
7401         if instance.name in ins_l.payload:
7402           raise errors.OpPrereqError("Instance is running, can't remove"
7403                                      " disks.")
7404
7405       if (disk_op == constants.DDM_ADD and
7406           len(instance.nics) >= constants.MAX_DISKS):
7407         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7408                                    " add more" % constants.MAX_DISKS)
7409       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7410         # an existing disk
7411         if disk_op < 0 or disk_op >= len(instance.disks):
7412           raise errors.OpPrereqError("Invalid disk index %s, valid values"
7413                                      " are 0 to %d" %
7414                                      (disk_op, len(instance.disks)))
7415
7416     return
7417
7418   def Exec(self, feedback_fn):
7419     """Modifies an instance.
7420
7421     All parameters take effect only at the next restart of the instance.
7422
7423     """
7424     # Process here the warnings from CheckPrereq, as we don't have a
7425     # feedback_fn there.
7426     for warn in self.warn:
7427       feedback_fn("WARNING: %s" % warn)
7428
7429     result = []
7430     instance = self.instance
7431     cluster = self.cluster
7432     # disk changes
7433     for disk_op, disk_dict in self.op.disks:
7434       if disk_op == constants.DDM_REMOVE:
7435         # remove the last disk
7436         device = instance.disks.pop()
7437         device_idx = len(instance.disks)
7438         for node, disk in device.ComputeNodeTree(instance.primary_node):
7439           self.cfg.SetDiskID(disk, node)
7440           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7441           if msg:
7442             self.LogWarning("Could not remove disk/%d on node %s: %s,"
7443                             " continuing anyway", device_idx, node, msg)
7444         result.append(("disk/%d" % device_idx, "remove"))
7445       elif disk_op == constants.DDM_ADD:
7446         # add a new disk
7447         if instance.disk_template == constants.DT_FILE:
7448           file_driver, file_path = instance.disks[0].logical_id
7449           file_path = os.path.dirname(file_path)
7450         else:
7451           file_driver = file_path = None
7452         disk_idx_base = len(instance.disks)
7453         new_disk = _GenerateDiskTemplate(self,
7454                                          instance.disk_template,
7455                                          instance.name, instance.primary_node,
7456                                          instance.secondary_nodes,
7457                                          [disk_dict],
7458                                          file_path,
7459                                          file_driver,
7460                                          disk_idx_base)[0]
7461         instance.disks.append(new_disk)
7462         info = _GetInstanceInfoText(instance)
7463
7464         logging.info("Creating volume %s for instance %s",
7465                      new_disk.iv_name, instance.name)
7466         # Note: this needs to be kept in sync with _CreateDisks
7467         #HARDCODE
7468         for node in instance.all_nodes:
7469           f_create = node == instance.primary_node
7470           try:
7471             _CreateBlockDev(self, node, instance, new_disk,
7472                             f_create, info, f_create)
7473           except errors.OpExecError, err:
7474             self.LogWarning("Failed to create volume %s (%s) on"
7475                             " node %s: %s",
7476                             new_disk.iv_name, new_disk, node, err)
7477         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7478                        (new_disk.size, new_disk.mode)))
7479       else:
7480         # change a given disk
7481         instance.disks[disk_op].mode = disk_dict['mode']
7482         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7483     # NIC changes
7484     for nic_op, nic_dict in self.op.nics:
7485       if nic_op == constants.DDM_REMOVE:
7486         # remove the last nic
7487         del instance.nics[-1]
7488         result.append(("nic.%d" % len(instance.nics), "remove"))
7489       elif nic_op == constants.DDM_ADD:
7490         # mac and bridge should be set, by now
7491         mac = nic_dict['mac']
7492         ip = nic_dict.get('ip', None)
7493         nicparams = self.nic_pinst[constants.DDM_ADD]
7494         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7495         instance.nics.append(new_nic)
7496         result.append(("nic.%d" % (len(instance.nics) - 1),
7497                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
7498                        (new_nic.mac, new_nic.ip,
7499                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7500                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7501                        )))
7502       else:
7503         for key in 'mac', 'ip':
7504           if key in nic_dict:
7505             setattr(instance.nics[nic_op], key, nic_dict[key])
7506         if nic_op in self.nic_pnew:
7507           instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
7508         for key, val in nic_dict.iteritems():
7509           result.append(("nic.%s/%d" % (key, nic_op), val))
7510
7511     # hvparams changes
7512     if self.op.hvparams:
7513       instance.hvparams = self.hv_inst
7514       for key, val in self.op.hvparams.iteritems():
7515         result.append(("hv/%s" % key, val))
7516
7517     # beparams changes
7518     if self.op.beparams:
7519       instance.beparams = self.be_inst
7520       for key, val in self.op.beparams.iteritems():
7521         result.append(("be/%s" % key, val))
7522
7523     self.cfg.Update(instance)
7524
7525     return result
7526
7527
7528 class LUQueryExports(NoHooksLU):
7529   """Query the exports list
7530
7531   """
7532   _OP_REQP = ['nodes']
7533   REQ_BGL = False
7534
7535   def ExpandNames(self):
7536     self.needed_locks = {}
7537     self.share_locks[locking.LEVEL_NODE] = 1
7538     if not self.op.nodes:
7539       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7540     else:
7541       self.needed_locks[locking.LEVEL_NODE] = \
7542         _GetWantedNodes(self, self.op.nodes)
7543
7544   def CheckPrereq(self):
7545     """Check prerequisites.
7546
7547     """
7548     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
7549
7550   def Exec(self, feedback_fn):
7551     """Compute the list of all the exported system images.
7552
7553     @rtype: dict
7554     @return: a dictionary with the structure node->(export-list)
7555         where export-list is a list of the instances exported on
7556         that node.
7557
7558     """
7559     rpcresult = self.rpc.call_export_list(self.nodes)
7560     result = {}
7561     for node in rpcresult:
7562       if rpcresult[node].fail_msg:
7563         result[node] = False
7564       else:
7565         result[node] = rpcresult[node].payload
7566
7567     return result
7568
7569
7570 class LUExportInstance(LogicalUnit):
7571   """Export an instance to an image in the cluster.
7572
7573   """
7574   HPATH = "instance-export"
7575   HTYPE = constants.HTYPE_INSTANCE
7576   _OP_REQP = ["instance_name", "target_node", "shutdown"]
7577   REQ_BGL = False
7578
7579   def ExpandNames(self):
7580     self._ExpandAndLockInstance()
7581     # FIXME: lock only instance primary and destination node
7582     #
7583     # Sad but true, for now we have do lock all nodes, as we don't know where
7584     # the previous export might be, and and in this LU we search for it and
7585     # remove it from its current node. In the future we could fix this by:
7586     #  - making a tasklet to search (share-lock all), then create the new one,
7587     #    then one to remove, after
7588     #  - removing the removal operation altogether
7589     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7590
7591   def DeclareLocks(self, level):
7592     """Last minute lock declaration."""
7593     # All nodes are locked anyway, so nothing to do here.
7594
7595   def BuildHooksEnv(self):
7596     """Build hooks env.
7597
7598     This will run on the master, primary node and target node.
7599
7600     """
7601     env = {
7602       "EXPORT_NODE": self.op.target_node,
7603       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
7604       }
7605     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7606     nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
7607           self.op.target_node]
7608     return env, nl, nl
7609
7610   def CheckPrereq(self):
7611     """Check prerequisites.
7612
7613     This checks that the instance and node names are valid.
7614
7615     """
7616     instance_name = self.op.instance_name
7617     self.instance = self.cfg.GetInstanceInfo(instance_name)
7618     assert self.instance is not None, \
7619           "Cannot retrieve locked instance %s" % self.op.instance_name
7620     _CheckNodeOnline(self, self.instance.primary_node)
7621
7622     self.dst_node = self.cfg.GetNodeInfo(
7623       self.cfg.ExpandNodeName(self.op.target_node))
7624
7625     if self.dst_node is None:
7626       # This is wrong node name, not a non-locked node
7627       raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
7628     _CheckNodeOnline(self, self.dst_node.name)
7629     _CheckNodeNotDrained(self, self.dst_node.name)
7630
7631     # instance disk type verification
7632     for disk in self.instance.disks:
7633       if disk.dev_type == constants.LD_FILE:
7634         raise errors.OpPrereqError("Export not supported for instances with"
7635                                    " file-based disks")
7636
7637   def Exec(self, feedback_fn):
7638     """Export an instance to an image in the cluster.
7639
7640     """
7641     instance = self.instance
7642     dst_node = self.dst_node
7643     src_node = instance.primary_node
7644
7645     if self.op.shutdown:
7646       # shutdown the instance, but not the disks
7647       feedback_fn("Shutting down instance %s" % instance.name)
7648       result = self.rpc.call_instance_shutdown(src_node, instance)
7649       result.Raise("Could not shutdown instance %s on"
7650                    " node %s" % (instance.name, src_node))
7651
7652     vgname = self.cfg.GetVGName()
7653
7654     snap_disks = []
7655
7656     # set the disks ID correctly since call_instance_start needs the
7657     # correct drbd minor to create the symlinks
7658     for disk in instance.disks:
7659       self.cfg.SetDiskID(disk, src_node)
7660
7661     # per-disk results
7662     dresults = []
7663     try:
7664       for idx, disk in enumerate(instance.disks):
7665         feedback_fn("Creating a snapshot of disk/%s on node %s" %
7666                     (idx, src_node))
7667
7668         # result.payload will be a snapshot of an lvm leaf of the one we passed
7669         result = self.rpc.call_blockdev_snapshot(src_node, disk)
7670         msg = result.fail_msg
7671         if msg:
7672           self.LogWarning("Could not snapshot disk/%s on node %s: %s",
7673                           idx, src_node, msg)
7674           snap_disks.append(False)
7675         else:
7676           disk_id = (vgname, result.payload)
7677           new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
7678                                  logical_id=disk_id, physical_id=disk_id,
7679                                  iv_name=disk.iv_name)
7680           snap_disks.append(new_dev)
7681
7682     finally:
7683       if self.op.shutdown and instance.admin_up:
7684         feedback_fn("Starting instance %s" % instance.name)
7685         result = self.rpc.call_instance_start(src_node, instance, None, None)
7686         msg = result.fail_msg
7687         if msg:
7688           _ShutdownInstanceDisks(self, instance)
7689           raise errors.OpExecError("Could not start instance: %s" % msg)
7690
7691     # TODO: check for size
7692
7693     cluster_name = self.cfg.GetClusterName()
7694     for idx, dev in enumerate(snap_disks):
7695       feedback_fn("Exporting snapshot %s from %s to %s" %
7696                   (idx, src_node, dst_node.name))
7697       if dev:
7698         result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
7699                                                instance, cluster_name, idx)
7700         msg = result.fail_msg
7701         if msg:
7702           self.LogWarning("Could not export disk/%s from node %s to"
7703                           " node %s: %s", idx, src_node, dst_node.name, msg)
7704           dresults.append(False)
7705         else:
7706           dresults.append(True)
7707         msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
7708         if msg:
7709           self.LogWarning("Could not remove snapshot for disk/%d from node"
7710                           " %s: %s", idx, src_node, msg)
7711       else:
7712         dresults.append(False)
7713
7714     feedback_fn("Finalizing export on %s" % dst_node.name)
7715     result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
7716     fin_resu = True
7717     msg = result.fail_msg
7718     if msg:
7719       self.LogWarning("Could not finalize export for instance %s"
7720                       " on node %s: %s", instance.name, dst_node.name, msg)
7721       fin_resu = False
7722
7723     nodelist = self.cfg.GetNodeList()
7724     nodelist.remove(dst_node.name)
7725
7726     # on one-node clusters nodelist will be empty after the removal
7727     # if we proceed the backup would be removed because OpQueryExports
7728     # substitutes an empty list with the full cluster node list.
7729     iname = instance.name
7730     if nodelist:
7731       feedback_fn("Removing old exports for instance %s" % iname)
7732       exportlist = self.rpc.call_export_list(nodelist)
7733       for node in exportlist:
7734         if exportlist[node].fail_msg:
7735           continue
7736         if iname in exportlist[node].payload:
7737           msg = self.rpc.call_export_remove(node, iname).fail_msg
7738           if msg:
7739             self.LogWarning("Could not remove older export for instance %s"
7740                             " on node %s: %s", iname, node, msg)
7741     return fin_resu, dresults
7742
7743
7744 class LURemoveExport(NoHooksLU):
7745   """Remove exports related to the named instance.
7746
7747   """
7748   _OP_REQP = ["instance_name"]
7749   REQ_BGL = False
7750
7751   def ExpandNames(self):
7752     self.needed_locks = {}
7753     # We need all nodes to be locked in order for RemoveExport to work, but we
7754     # don't need to lock the instance itself, as nothing will happen to it (and
7755     # we can remove exports also for a removed instance)
7756     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7757
7758   def CheckPrereq(self):
7759     """Check prerequisites.
7760     """
7761     pass
7762
7763   def Exec(self, feedback_fn):
7764     """Remove any export.
7765
7766     """
7767     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
7768     # If the instance was not found we'll try with the name that was passed in.
7769     # This will only work if it was an FQDN, though.
7770     fqdn_warn = False
7771     if not instance_name:
7772       fqdn_warn = True
7773       instance_name = self.op.instance_name
7774
7775     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7776     exportlist = self.rpc.call_export_list(locked_nodes)
7777     found = False
7778     for node in exportlist:
7779       msg = exportlist[node].fail_msg
7780       if msg:
7781         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
7782         continue
7783       if instance_name in exportlist[node].payload:
7784         found = True
7785         result = self.rpc.call_export_remove(node, instance_name)
7786         msg = result.fail_msg
7787         if msg:
7788           logging.error("Could not remove export for instance %s"
7789                         " on node %s: %s", instance_name, node, msg)
7790
7791     if fqdn_warn and not found:
7792       feedback_fn("Export not found. If trying to remove an export belonging"
7793                   " to a deleted instance please use its Fully Qualified"
7794                   " Domain Name.")
7795
7796
7797 class TagsLU(NoHooksLU):
7798   """Generic tags LU.
7799
7800   This is an abstract class which is the parent of all the other tags LUs.
7801
7802   """
7803
7804   def ExpandNames(self):
7805     self.needed_locks = {}
7806     if self.op.kind == constants.TAG_NODE:
7807       name = self.cfg.ExpandNodeName(self.op.name)
7808       if name is None:
7809         raise errors.OpPrereqError("Invalid node name (%s)" %
7810                                    (self.op.name,))
7811       self.op.name = name
7812       self.needed_locks[locking.LEVEL_NODE] = name
7813     elif self.op.kind == constants.TAG_INSTANCE:
7814       name = self.cfg.ExpandInstanceName(self.op.name)
7815       if name is None:
7816         raise errors.OpPrereqError("Invalid instance name (%s)" %
7817                                    (self.op.name,))
7818       self.op.name = name
7819       self.needed_locks[locking.LEVEL_INSTANCE] = name
7820
7821   def CheckPrereq(self):
7822     """Check prerequisites.
7823
7824     """
7825     if self.op.kind == constants.TAG_CLUSTER:
7826       self.target = self.cfg.GetClusterInfo()
7827     elif self.op.kind == constants.TAG_NODE:
7828       self.target = self.cfg.GetNodeInfo(self.op.name)
7829     elif self.op.kind == constants.TAG_INSTANCE:
7830       self.target = self.cfg.GetInstanceInfo(self.op.name)
7831     else:
7832       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
7833                                  str(self.op.kind))
7834
7835
7836 class LUGetTags(TagsLU):
7837   """Returns the tags of a given object.
7838
7839   """
7840   _OP_REQP = ["kind", "name"]
7841   REQ_BGL = False
7842
7843   def Exec(self, feedback_fn):
7844     """Returns the tag list.
7845
7846     """
7847     return list(self.target.GetTags())
7848
7849
7850 class LUSearchTags(NoHooksLU):
7851   """Searches the tags for a given pattern.
7852
7853   """
7854   _OP_REQP = ["pattern"]
7855   REQ_BGL = False
7856
7857   def ExpandNames(self):
7858     self.needed_locks = {}
7859
7860   def CheckPrereq(self):
7861     """Check prerequisites.
7862
7863     This checks the pattern passed for validity by compiling it.
7864
7865     """
7866     try:
7867       self.re = re.compile(self.op.pattern)
7868     except re.error, err:
7869       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
7870                                  (self.op.pattern, err))
7871
7872   def Exec(self, feedback_fn):
7873     """Returns the tag list.
7874
7875     """
7876     cfg = self.cfg
7877     tgts = [("/cluster", cfg.GetClusterInfo())]
7878     ilist = cfg.GetAllInstancesInfo().values()
7879     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
7880     nlist = cfg.GetAllNodesInfo().values()
7881     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
7882     results = []
7883     for path, target in tgts:
7884       for tag in target.GetTags():
7885         if self.re.search(tag):
7886           results.append((path, tag))
7887     return results
7888
7889
7890 class LUAddTags(TagsLU):
7891   """Sets a tag on a given object.
7892
7893   """
7894   _OP_REQP = ["kind", "name", "tags"]
7895   REQ_BGL = False
7896
7897   def CheckPrereq(self):
7898     """Check prerequisites.
7899
7900     This checks the type and length of the tag name and value.
7901
7902     """
7903     TagsLU.CheckPrereq(self)
7904     for tag in self.op.tags:
7905       objects.TaggableObject.ValidateTag(tag)
7906
7907   def Exec(self, feedback_fn):
7908     """Sets the tag.
7909
7910     """
7911     try:
7912       for tag in self.op.tags:
7913         self.target.AddTag(tag)
7914     except errors.TagError, err:
7915       raise errors.OpExecError("Error while setting tag: %s" % str(err))
7916     try:
7917       self.cfg.Update(self.target)
7918     except errors.ConfigurationError:
7919       raise errors.OpRetryError("There has been a modification to the"
7920                                 " config file and the operation has been"
7921                                 " aborted. Please retry.")
7922
7923
7924 class LUDelTags(TagsLU):
7925   """Delete a list of tags from a given object.
7926
7927   """
7928   _OP_REQP = ["kind", "name", "tags"]
7929   REQ_BGL = False
7930
7931   def CheckPrereq(self):
7932     """Check prerequisites.
7933
7934     This checks that we have the given tag.
7935
7936     """
7937     TagsLU.CheckPrereq(self)
7938     for tag in self.op.tags:
7939       objects.TaggableObject.ValidateTag(tag)
7940     del_tags = frozenset(self.op.tags)
7941     cur_tags = self.target.GetTags()
7942     if not del_tags <= cur_tags:
7943       diff_tags = del_tags - cur_tags
7944       diff_names = ["'%s'" % tag for tag in diff_tags]
7945       diff_names.sort()
7946       raise errors.OpPrereqError("Tag(s) %s not found" %
7947                                  (",".join(diff_names)))
7948
7949   def Exec(self, feedback_fn):
7950     """Remove the tag from the object.
7951
7952     """
7953     for tag in self.op.tags:
7954       self.target.RemoveTag(tag)
7955     try:
7956       self.cfg.Update(self.target)
7957     except errors.ConfigurationError:
7958       raise errors.OpRetryError("There has been a modification to the"
7959                                 " config file and the operation has been"
7960                                 " aborted. Please retry.")
7961
7962
7963 class LUTestDelay(NoHooksLU):
7964   """Sleep for a specified amount of time.
7965
7966   This LU sleeps on the master and/or nodes for a specified amount of
7967   time.
7968
7969   """
7970   _OP_REQP = ["duration", "on_master", "on_nodes"]
7971   REQ_BGL = False
7972
7973   def ExpandNames(self):
7974     """Expand names and set required locks.
7975
7976     This expands the node list, if any.
7977
7978     """
7979     self.needed_locks = {}
7980     if self.op.on_nodes:
7981       # _GetWantedNodes can be used here, but is not always appropriate to use
7982       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
7983       # more information.
7984       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
7985       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
7986
7987   def CheckPrereq(self):
7988     """Check prerequisites.
7989
7990     """
7991
7992   def Exec(self, feedback_fn):
7993     """Do the actual sleep.
7994
7995     """
7996     if self.op.on_master:
7997       if not utils.TestDelay(self.op.duration):
7998         raise errors.OpExecError("Error during master delay test")
7999     if self.op.on_nodes:
8000       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8001       for node, node_result in result.items():
8002         node_result.Raise("Failure during rpc call to node %s" % node)
8003
8004
8005 class IAllocator(object):
8006   """IAllocator framework.
8007
8008   An IAllocator instance has three sets of attributes:
8009     - cfg that is needed to query the cluster
8010     - input data (all members of the _KEYS class attribute are required)
8011     - four buffer attributes (in|out_data|text), that represent the
8012       input (to the external script) in text and data structure format,
8013       and the output from it, again in two formats
8014     - the result variables from the script (success, info, nodes) for
8015       easy usage
8016
8017   """
8018   _ALLO_KEYS = [
8019     "mem_size", "disks", "disk_template",
8020     "os", "tags", "nics", "vcpus", "hypervisor",
8021     ]
8022   _RELO_KEYS = [
8023     "relocate_from",
8024     ]
8025
8026   def __init__(self, cfg, rpc, mode, name, **kwargs):
8027     self.cfg = cfg
8028     self.rpc = rpc
8029     # init buffer variables
8030     self.in_text = self.out_text = self.in_data = self.out_data = None
8031     # init all input fields so that pylint is happy
8032     self.mode = mode
8033     self.name = name
8034     self.mem_size = self.disks = self.disk_template = None
8035     self.os = self.tags = self.nics = self.vcpus = None
8036     self.hypervisor = None
8037     self.relocate_from = None
8038     # computed fields
8039     self.required_nodes = None
8040     # init result fields
8041     self.success = self.info = self.nodes = None
8042     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8043       keyset = self._ALLO_KEYS
8044     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8045       keyset = self._RELO_KEYS
8046     else:
8047       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8048                                    " IAllocator" % self.mode)
8049     for key in kwargs:
8050       if key not in keyset:
8051         raise errors.ProgrammerError("Invalid input parameter '%s' to"
8052                                      " IAllocator" % key)
8053       setattr(self, key, kwargs[key])
8054     for key in keyset:
8055       if key not in kwargs:
8056         raise errors.ProgrammerError("Missing input parameter '%s' to"
8057                                      " IAllocator" % key)
8058     self._BuildInputData()
8059
8060   def _ComputeClusterData(self):
8061     """Compute the generic allocator input data.
8062
8063     This is the data that is independent of the actual operation.
8064
8065     """
8066     cfg = self.cfg
8067     cluster_info = cfg.GetClusterInfo()
8068     # cluster data
8069     data = {
8070       "version": constants.IALLOCATOR_VERSION,
8071       "cluster_name": cfg.GetClusterName(),
8072       "cluster_tags": list(cluster_info.GetTags()),
8073       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8074       # we don't have job IDs
8075       }
8076     iinfo = cfg.GetAllInstancesInfo().values()
8077     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8078
8079     # node data
8080     node_results = {}
8081     node_list = cfg.GetNodeList()
8082
8083     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8084       hypervisor_name = self.hypervisor
8085     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8086       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8087
8088     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8089                                         hypervisor_name)
8090     node_iinfo = \
8091       self.rpc.call_all_instances_info(node_list,
8092                                        cluster_info.enabled_hypervisors)
8093     for nname, nresult in node_data.items():
8094       # first fill in static (config-based) values
8095       ninfo = cfg.GetNodeInfo(nname)
8096       pnr = {
8097         "tags": list(ninfo.GetTags()),
8098         "primary_ip": ninfo.primary_ip,
8099         "secondary_ip": ninfo.secondary_ip,
8100         "offline": ninfo.offline,
8101         "drained": ninfo.drained,
8102         "master_candidate": ninfo.master_candidate,
8103         }
8104
8105       if not (ninfo.offline or ninfo.drained):
8106         nresult.Raise("Can't get data for node %s" % nname)
8107         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8108                                 nname)
8109         remote_info = nresult.payload
8110
8111         for attr in ['memory_total', 'memory_free', 'memory_dom0',
8112                      'vg_size', 'vg_free', 'cpu_total']:
8113           if attr not in remote_info:
8114             raise errors.OpExecError("Node '%s' didn't return attribute"
8115                                      " '%s'" % (nname, attr))
8116           if not isinstance(remote_info[attr], int):
8117             raise errors.OpExecError("Node '%s' returned invalid value"
8118                                      " for '%s': %s" %
8119                                      (nname, attr, remote_info[attr]))
8120         # compute memory used by primary instances
8121         i_p_mem = i_p_up_mem = 0
8122         for iinfo, beinfo in i_list:
8123           if iinfo.primary_node == nname:
8124             i_p_mem += beinfo[constants.BE_MEMORY]
8125             if iinfo.name not in node_iinfo[nname].payload:
8126               i_used_mem = 0
8127             else:
8128               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8129             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8130             remote_info['memory_free'] -= max(0, i_mem_diff)
8131
8132             if iinfo.admin_up:
8133               i_p_up_mem += beinfo[constants.BE_MEMORY]
8134
8135         # compute memory used by instances
8136         pnr_dyn = {
8137           "total_memory": remote_info['memory_total'],
8138           "reserved_memory": remote_info['memory_dom0'],
8139           "free_memory": remote_info['memory_free'],
8140           "total_disk": remote_info['vg_size'],
8141           "free_disk": remote_info['vg_free'],
8142           "total_cpus": remote_info['cpu_total'],
8143           "i_pri_memory": i_p_mem,
8144           "i_pri_up_memory": i_p_up_mem,
8145           }
8146         pnr.update(pnr_dyn)
8147
8148       node_results[nname] = pnr
8149     data["nodes"] = node_results
8150
8151     # instance data
8152     instance_data = {}
8153     for iinfo, beinfo in i_list:
8154       nic_data = []
8155       for nic in iinfo.nics:
8156         filled_params = objects.FillDict(
8157             cluster_info.nicparams[constants.PP_DEFAULT],
8158             nic.nicparams)
8159         nic_dict = {"mac": nic.mac,
8160                     "ip": nic.ip,
8161                     "mode": filled_params[constants.NIC_MODE],
8162                     "link": filled_params[constants.NIC_LINK],
8163                    }
8164         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8165           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8166         nic_data.append(nic_dict)
8167       pir = {
8168         "tags": list(iinfo.GetTags()),
8169         "admin_up": iinfo.admin_up,
8170         "vcpus": beinfo[constants.BE_VCPUS],
8171         "memory": beinfo[constants.BE_MEMORY],
8172         "os": iinfo.os,
8173         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8174         "nics": nic_data,
8175         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8176         "disk_template": iinfo.disk_template,
8177         "hypervisor": iinfo.hypervisor,
8178         }
8179       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8180                                                  pir["disks"])
8181       instance_data[iinfo.name] = pir
8182
8183     data["instances"] = instance_data
8184
8185     self.in_data = data
8186
8187   def _AddNewInstance(self):
8188     """Add new instance data to allocator structure.
8189
8190     This in combination with _AllocatorGetClusterData will create the
8191     correct structure needed as input for the allocator.
8192
8193     The checks for the completeness of the opcode must have already been
8194     done.
8195
8196     """
8197     data = self.in_data
8198
8199     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8200
8201     if self.disk_template in constants.DTS_NET_MIRROR:
8202       self.required_nodes = 2
8203     else:
8204       self.required_nodes = 1
8205     request = {
8206       "type": "allocate",
8207       "name": self.name,
8208       "disk_template": self.disk_template,
8209       "tags": self.tags,
8210       "os": self.os,
8211       "vcpus": self.vcpus,
8212       "memory": self.mem_size,
8213       "disks": self.disks,
8214       "disk_space_total": disk_space,
8215       "nics": self.nics,
8216       "required_nodes": self.required_nodes,
8217       }
8218     data["request"] = request
8219
8220   def _AddRelocateInstance(self):
8221     """Add relocate instance data to allocator structure.
8222
8223     This in combination with _IAllocatorGetClusterData will create the
8224     correct structure needed as input for the allocator.
8225
8226     The checks for the completeness of the opcode must have already been
8227     done.
8228
8229     """
8230     instance = self.cfg.GetInstanceInfo(self.name)
8231     if instance is None:
8232       raise errors.ProgrammerError("Unknown instance '%s' passed to"
8233                                    " IAllocator" % self.name)
8234
8235     if instance.disk_template not in constants.DTS_NET_MIRROR:
8236       raise errors.OpPrereqError("Can't relocate non-mirrored instances")
8237
8238     if len(instance.secondary_nodes) != 1:
8239       raise errors.OpPrereqError("Instance has not exactly one secondary node")
8240
8241     self.required_nodes = 1
8242     disk_sizes = [{'size': disk.size} for disk in instance.disks]
8243     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8244
8245     request = {
8246       "type": "relocate",
8247       "name": self.name,
8248       "disk_space_total": disk_space,
8249       "required_nodes": self.required_nodes,
8250       "relocate_from": self.relocate_from,
8251       }
8252     self.in_data["request"] = request
8253
8254   def _BuildInputData(self):
8255     """Build input data structures.
8256
8257     """
8258     self._ComputeClusterData()
8259
8260     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8261       self._AddNewInstance()
8262     else:
8263       self._AddRelocateInstance()
8264
8265     self.in_text = serializer.Dump(self.in_data)
8266
8267   def Run(self, name, validate=True, call_fn=None):
8268     """Run an instance allocator and return the results.
8269
8270     """
8271     if call_fn is None:
8272       call_fn = self.rpc.call_iallocator_runner
8273
8274     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8275     result.Raise("Failure while running the iallocator script")
8276
8277     self.out_text = result.payload
8278     if validate:
8279       self._ValidateResult()
8280
8281   def _ValidateResult(self):
8282     """Process the allocator results.
8283
8284     This will process and if successful save the result in
8285     self.out_data and the other parameters.
8286
8287     """
8288     try:
8289       rdict = serializer.Load(self.out_text)
8290     except Exception, err:
8291       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8292
8293     if not isinstance(rdict, dict):
8294       raise errors.OpExecError("Can't parse iallocator results: not a dict")
8295
8296     for key in "success", "info", "nodes":
8297       if key not in rdict:
8298         raise errors.OpExecError("Can't parse iallocator results:"
8299                                  " missing key '%s'" % key)
8300       setattr(self, key, rdict[key])
8301
8302     if not isinstance(rdict["nodes"], list):
8303       raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8304                                " is not a list")
8305     self.out_data = rdict
8306
8307
8308 class LUTestAllocator(NoHooksLU):
8309   """Run allocator tests.
8310
8311   This LU runs the allocator tests
8312
8313   """
8314   _OP_REQP = ["direction", "mode", "name"]
8315
8316   def CheckPrereq(self):
8317     """Check prerequisites.
8318
8319     This checks the opcode parameters depending on the director and mode test.
8320
8321     """
8322     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8323       for attr in ["name", "mem_size", "disks", "disk_template",
8324                    "os", "tags", "nics", "vcpus"]:
8325         if not hasattr(self.op, attr):
8326           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8327                                      attr)
8328       iname = self.cfg.ExpandInstanceName(self.op.name)
8329       if iname is not None:
8330         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8331                                    iname)
8332       if not isinstance(self.op.nics, list):
8333         raise errors.OpPrereqError("Invalid parameter 'nics'")
8334       for row in self.op.nics:
8335         if (not isinstance(row, dict) or
8336             "mac" not in row or
8337             "ip" not in row or
8338             "bridge" not in row):
8339           raise errors.OpPrereqError("Invalid contents of the"
8340                                      " 'nics' parameter")
8341       if not isinstance(self.op.disks, list):
8342         raise errors.OpPrereqError("Invalid parameter 'disks'")
8343       for row in self.op.disks:
8344         if (not isinstance(row, dict) or
8345             "size" not in row or
8346             not isinstance(row["size"], int) or
8347             "mode" not in row or
8348             row["mode"] not in ['r', 'w']):
8349           raise errors.OpPrereqError("Invalid contents of the"
8350                                      " 'disks' parameter")
8351       if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8352         self.op.hypervisor = self.cfg.GetHypervisorType()
8353     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8354       if not hasattr(self.op, "name"):
8355         raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
8356       fname = self.cfg.ExpandInstanceName(self.op.name)
8357       if fname is None:
8358         raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8359                                    self.op.name)
8360       self.op.name = fname
8361       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8362     else:
8363       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8364                                  self.op.mode)
8365
8366     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8367       if not hasattr(self.op, "allocator") or self.op.allocator is None:
8368         raise errors.OpPrereqError("Missing allocator name")
8369     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8370       raise errors.OpPrereqError("Wrong allocator test '%s'" %
8371                                  self.op.direction)
8372
8373   def Exec(self, feedback_fn):
8374     """Run the allocator test.
8375
8376     """
8377     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8378       ial = IAllocator(self.cfg, self.rpc,
8379                        mode=self.op.mode,
8380                        name=self.op.name,
8381                        mem_size=self.op.mem_size,
8382                        disks=self.op.disks,
8383                        disk_template=self.op.disk_template,
8384                        os=self.op.os,
8385                        tags=self.op.tags,
8386                        nics=self.op.nics,
8387                        vcpus=self.op.vcpus,
8388                        hypervisor=self.op.hypervisor,
8389                        )
8390     else:
8391       ial = IAllocator(self.cfg, self.rpc,
8392                        mode=self.op.mode,
8393                        name=self.op.name,
8394                        relocate_from=list(self.relocate_from),
8395                        )
8396
8397     if self.op.direction == constants.IALLOCATOR_DIR_IN:
8398       result = ial.in_text
8399     else:
8400       ial.Run(self.op.allocator, validate=False)
8401       result = ial.out_text
8402     return result