code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0613,W0201
  25
  26 import os
  27 import os.path
  28 import time
  29 import re
  30 import platform
  31 import logging
  32 import copy
  33
  34 from ganeti import ssh
  35 from ganeti import utils
  36 from ganeti import errors
  37 from ganeti import hypervisor
  38 from ganeti import locking
  39 from ganeti import constants
  40 from ganeti import objects
  41 from ganeti import serializer
  42 from ganeti import ssconf
  43
  44
  45 class LogicalUnit(object):
  46   """Logical Unit base class.
  47
  48   Subclasses must follow these rules:
  49     - implement ExpandNames
  50     - implement CheckPrereq (except when tasklets are used)
  51     - implement Exec (except when tasklets are used)
  52     - implement BuildHooksEnv
  53     - redefine HPATH and HTYPE
  54     - optionally redefine their run requirements:
  55         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  56
  57   Note that all commands require root permissions.
  58
  59   @ivar dry_run_result: the value (if any) that will be returned to the caller
  60       in dry-run mode (signalled by opcode dry_run parameter)
  61
  62   """
  63   HPATH = None
  64   HTYPE = None
  65   _OP_REQP = []
  66   REQ_BGL = True
  67
  68   def __init__(self, processor, op, context, rpc):
  69     """Constructor for LogicalUnit.
  70
  71     This needs to be overridden in derived classes in order to check op
  72     validity.
  73
  74     """
  75     self.proc = processor
  76     self.op = op
  77     self.cfg = context.cfg
  78     self.context = context
  79     self.rpc = rpc
  80     # Dicts used to declare locking needs to mcpu
  81     self.needed_locks = None
  82     self.acquired_locks = {}
  83     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
  84     self.add_locks = {}
  85     self.remove_locks = {}
  86     # Used to force good behavior when calling helper functions
  87     self.recalculate_locks = {}
  88     self.__ssh = None
  89     # logging
  90     self.LogWarning = processor.LogWarning
  91     self.LogInfo = processor.LogInfo
  92     self.LogStep = processor.LogStep
  93     # support for dry-run
  94     self.dry_run_result = None
  95
  96     # Tasklets
  97     self.tasklets = None
  98
  99     for attr_name in self._OP_REQP:
 100       attr_val = getattr(op, attr_name, None)
 101       if attr_val is None:
 102         raise errors.OpPrereqError("Required parameter '%s' missing" %
 103                                    attr_name)
 104
 105     self.CheckArguments()
 106
 107   def __GetSSH(self):
 108     """Returns the SshRunner object
 109
 110     """
 111     if not self.__ssh:
 112       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 113     return self.__ssh
 114
 115   ssh = property(fget=__GetSSH)
 116
 117   def CheckArguments(self):
 118     """Check syntactic validity for the opcode arguments.
 119
 120     This method is for doing a simple syntactic check and ensure
 121     validity of opcode parameters, without any cluster-related
 122     checks. While the same can be accomplished in ExpandNames and/or
 123     CheckPrereq, doing these separate is better because:
 124
 125       - ExpandNames is left as as purely a lock-related function
 126       - CheckPrereq is run after we have acquired locks (and possible
 127         waited for them)
 128
 129     The function is allowed to change the self.op attribute so that
 130     later methods can no longer worry about missing parameters.
 131
 132     """
 133     pass
 134
 135   def ExpandNames(self):
 136     """Expand names for this LU.
 137
 138     This method is called before starting to execute the opcode, and it should
 139     update all the parameters of the opcode to their canonical form (e.g. a
 140     short node name must be fully expanded after this method has successfully
 141     completed). This way locking, hooks, logging, ecc. can work correctly.
 142
 143     LUs which implement this method must also populate the self.needed_locks
 144     member, as a dict with lock levels as keys, and a list of needed lock names
 145     as values. Rules:
 146
 147       - use an empty dict if you don't need any lock
 148       - if you don't need any lock at a particular level omit that level
 149       - don't put anything for the BGL level
 150       - if you want all locks at a level use locking.ALL_SET as a value
 151
 152     If you need to share locks (rather than acquire them exclusively) at one
 153     level you can modify self.share_locks, setting a true value (usually 1) for
 154     that level. By default locks are not shared.
 155
 156     This function can also define a list of tasklets, which then will be
 157     executed in order instead of the usual LU-level CheckPrereq and Exec
 158     functions, if those are not defined by the LU.
 159
 160     Examples::
 161
 162       # Acquire all nodes and one instance
 163       self.needed_locks = {
 164         locking.LEVEL_NODE: locking.ALL_SET,
 165         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 166       }
 167       # Acquire just two nodes
 168       self.needed_locks = {
 169         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 170       }
 171       # Acquire no locks
 172       self.needed_locks = {} # No, you can't leave it to the default value None
 173
 174     """
 175     # The implementation of this method is mandatory only if the new LU is
 176     # concurrent, so that old LUs don't need to be changed all at the same
 177     # time.
 178     if self.REQ_BGL:
 179       self.needed_locks = {} # Exclusive LUs don't need locks.
 180     else:
 181       raise NotImplementedError
 182
 183   def DeclareLocks(self, level):
 184     """Declare LU locking needs for a level
 185
 186     While most LUs can just declare their locking needs at ExpandNames time,
 187     sometimes there's the need to calculate some locks after having acquired
 188     the ones before. This function is called just before acquiring locks at a
 189     particular level, but after acquiring the ones at lower levels, and permits
 190     such calculations. It can be used to modify self.needed_locks, and by
 191     default it does nothing.
 192
 193     This function is only called if you have something already set in
 194     self.needed_locks for the level.
 195
 196     @param level: Locking level which is going to be locked
 197     @type level: member of ganeti.locking.LEVELS
 198
 199     """
 200
 201   def CheckPrereq(self):
 202     """Check prerequisites for this LU.
 203
 204     This method should check that the prerequisites for the execution
 205     of this LU are fulfilled. It can do internode communication, but
 206     it should be idempotent - no cluster or system changes are
 207     allowed.
 208
 209     The method should raise errors.OpPrereqError in case something is
 210     not fulfilled. Its return value is ignored.
 211
 212     This method should also update all the parameters of the opcode to
 213     their canonical form if it hasn't been done by ExpandNames before.
 214
 215     """
 216     if self.tasklets is not None:
 217       for (idx, tl) in enumerate(self.tasklets):
 218         logging.debug("Checking prerequisites for tasklet %s/%s",
 219                       idx + 1, len(self.tasklets))
 220         tl.CheckPrereq()
 221     else:
 222       raise NotImplementedError
 223
 224   def Exec(self, feedback_fn):
 225     """Execute the LU.
 226
 227     This method should implement the actual work. It should raise
 228     errors.OpExecError for failures that are somewhat dealt with in
 229     code, or expected.
 230
 231     """
 232     if self.tasklets is not None:
 233       for (idx, tl) in enumerate(self.tasklets):
 234         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 235         tl.Exec(feedback_fn)
 236     else:
 237       raise NotImplementedError
 238
 239   def BuildHooksEnv(self):
 240     """Build hooks environment for this LU.
 241
 242     This method should return a three-node tuple consisting of: a dict
 243     containing the environment that will be used for running the
 244     specific hook for this LU, a list of node names on which the hook
 245     should run before the execution, and a list of node names on which
 246     the hook should run after the execution.
 247
 248     The keys of the dict must not have 'GANETI_' prefixed as this will
 249     be handled in the hooks runner. Also note additional keys will be
 250     added by the hooks runner. If the LU doesn't define any
 251     environment, an empty dict (and not None) should be returned.
 252
 253     No nodes should be returned as an empty list (and not None).
 254
 255     Note that if the HPATH for a LU class is None, this function will
 256     not be called.
 257
 258     """
 259     raise NotImplementedError
 260
 261   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 262     """Notify the LU about the results of its hooks.
 263
 264     This method is called every time a hooks phase is executed, and notifies
 265     the Logical Unit about the hooks' result. The LU can then use it to alter
 266     its result based on the hooks.  By default the method does nothing and the
 267     previous result is passed back unchanged but any LU can define it if it
 268     wants to use the local cluster hook-scripts somehow.
 269
 270     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 271         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 272     @param hook_results: the results of the multi-node hooks rpc call
 273     @param feedback_fn: function used send feedback back to the caller
 274     @param lu_result: the previous Exec result this LU had, or None
 275         in the PRE phase
 276     @return: the new Exec result, based on the previous result
 277         and hook results
 278
 279     """
 280     return lu_result
 281
 282   def _ExpandAndLockInstance(self):
 283     """Helper function to expand and lock an instance.
 284
 285     Many LUs that work on an instance take its name in self.op.instance_name
 286     and need to expand it and then declare the expanded name for locking. This
 287     function does it, and then updates self.op.instance_name to the expanded
 288     name. It also initializes needed_locks as a dict, if this hasn't been done
 289     before.
 290
 291     """
 292     if self.needed_locks is None:
 293       self.needed_locks = {}
 294     else:
 295       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 296         "_ExpandAndLockInstance called with instance-level locks set"
 297     expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
 298     if expanded_name is None:
 299       raise errors.OpPrereqError("Instance '%s' not known" %
 300                                   self.op.instance_name)
 301     self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
 302     self.op.instance_name = expanded_name
 303
 304   def _LockInstancesNodes(self, primary_only=False):
 305     """Helper function to declare instances' nodes for locking.
 306
 307     This function should be called after locking one or more instances to lock
 308     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 309     with all primary or secondary nodes for instances already locked and
 310     present in self.needed_locks[locking.LEVEL_INSTANCE].
 311
 312     It should be called from DeclareLocks, and for safety only works if
 313     self.recalculate_locks[locking.LEVEL_NODE] is set.
 314
 315     In the future it may grow parameters to just lock some instance's nodes, or
 316     to just lock primaries or secondary nodes, if needed.
 317
 318     If should be called in DeclareLocks in a way similar to::
 319
 320       if level == locking.LEVEL_NODE:
 321         self._LockInstancesNodes()
 322
 323     @type primary_only: boolean
 324     @param primary_only: only lock primary nodes of locked instances
 325
 326     """
 327     assert locking.LEVEL_NODE in self.recalculate_locks, \
 328       "_LockInstancesNodes helper function called with no nodes to recalculate"
 329
 330     # TODO: check if we're really been called with the instance locks held
 331
 332     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 333     # future we might want to have different behaviors depending on the value
 334     # of self.recalculate_locks[locking.LEVEL_NODE]
 335     wanted_nodes = []
 336     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 337       instance = self.context.cfg.GetInstanceInfo(instance_name)
 338       wanted_nodes.append(instance.primary_node)
 339       if not primary_only:
 340         wanted_nodes.extend(instance.secondary_nodes)
 341
 342     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 343       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 344     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 345       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 346
 347     del self.recalculate_locks[locking.LEVEL_NODE]
 348
 349
 350 class NoHooksLU(LogicalUnit):
 351   """Simple LU which runs no hooks.
 352
 353   This LU is intended as a parent for other LogicalUnits which will
 354   run no hooks, in order to reduce duplicate code.
 355
 356   """
 357   HPATH = None
 358   HTYPE = None
 359
 360
 361 class Tasklet:
 362   """Tasklet base class.
 363
 364   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 365   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 366   tasklets know nothing about locks.
 367
 368   Subclasses must follow these rules:
 369     - Implement CheckPrereq
 370     - Implement Exec
 371
 372   """
 373   def __init__(self, lu):
 374     self.lu = lu
 375
 376     # Shortcuts
 377     self.cfg = lu.cfg
 378     self.rpc = lu.rpc
 379
 380   def CheckPrereq(self):
 381     """Check prerequisites for this tasklets.
 382
 383     This method should check whether the prerequisites for the execution of
 384     this tasklet are fulfilled. It can do internode communication, but it
 385     should be idempotent - no cluster or system changes are allowed.
 386
 387     The method should raise errors.OpPrereqError in case something is not
 388     fulfilled. Its return value is ignored.
 389
 390     This method should also update all parameters to their canonical form if it
 391     hasn't been done before.
 392
 393     """
 394     raise NotImplementedError
 395
 396   def Exec(self, feedback_fn):
 397     """Execute the tasklet.
 398
 399     This method should implement the actual work. It should raise
 400     errors.OpExecError for failures that are somewhat dealt with in code, or
 401     expected.
 402
 403     """
 404     raise NotImplementedError
 405
 406
 407 def _GetWantedNodes(lu, nodes):
 408   """Returns list of checked and expanded node names.
 409
 410   @type lu: L{LogicalUnit}
 411   @param lu: the logical unit on whose behalf we execute
 412   @type nodes: list
 413   @param nodes: list of node names or None for all nodes
 414   @rtype: list
 415   @return: the list of nodes, sorted
 416   @raise errors.OpProgrammerError: if the nodes parameter is wrong type
 417
 418   """
 419   if not isinstance(nodes, list):
 420     raise errors.OpPrereqError("Invalid argument type 'nodes'")
 421
 422   if not nodes:
 423     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 424       " non-empty list of nodes whose name is to be expanded.")
 425
 426   wanted = []
 427   for name in nodes:
 428     node = lu.cfg.ExpandNodeName(name)
 429     if node is None:
 430       raise errors.OpPrereqError("No such node name '%s'" % name)
 431     wanted.append(node)
 432
 433   return utils.NiceSort(wanted)
 434
 435
 436 def _GetWantedInstances(lu, instances):
 437   """Returns list of checked and expanded instance names.
 438
 439   @type lu: L{LogicalUnit}
 440   @param lu: the logical unit on whose behalf we execute
 441   @type instances: list
 442   @param instances: list of instance names or None for all instances
 443   @rtype: list
 444   @return: the list of instances, sorted
 445   @raise errors.OpPrereqError: if the instances parameter is wrong type
 446   @raise errors.OpPrereqError: if any of the passed instances is not found
 447
 448   """
 449   if not isinstance(instances, list):
 450     raise errors.OpPrereqError("Invalid argument type 'instances'")
 451
 452   if instances:
 453     wanted = []
 454
 455     for name in instances:
 456       instance = lu.cfg.ExpandInstanceName(name)
 457       if instance is None:
 458         raise errors.OpPrereqError("No such instance name '%s'" % name)
 459       wanted.append(instance)
 460
 461   else:
 462     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 463   return wanted
 464
 465
 466 def _CheckOutputFields(static, dynamic, selected):
 467   """Checks whether all selected fields are valid.
 468
 469   @type static: L{utils.FieldSet}
 470   @param static: static fields set
 471   @type dynamic: L{utils.FieldSet}
 472   @param dynamic: dynamic fields set
 473
 474   """
 475   f = utils.FieldSet()
 476   f.Extend(static)
 477   f.Extend(dynamic)
 478
 479   delta = f.NonMatching(selected)
 480   if delta:
 481     raise errors.OpPrereqError("Unknown output fields selected: %s"
 482                                % ",".join(delta))
 483
 484
 485 def _CheckBooleanOpField(op, name):
 486   """Validates boolean opcode parameters.
 487
 488   This will ensure that an opcode parameter is either a boolean value,
 489   or None (but that it always exists).
 490
 491   """
 492   val = getattr(op, name, None)
 493   if not (val is None or isinstance(val, bool)):
 494     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 495                                (name, str(val)))
 496   setattr(op, name, val)
 497
 498
 499 def _CheckNodeOnline(lu, node):
 500   """Ensure that a given node is online.
 501
 502   @param lu: the LU on behalf of which we make the check
 503   @param node: the node to check
 504   @raise errors.OpPrereqError: if the node is offline
 505
 506   """
 507   if lu.cfg.GetNodeInfo(node).offline:
 508     raise errors.OpPrereqError("Can't use offline node %s" % node)
 509
 510
 511 def _CheckNodeNotDrained(lu, node):
 512   """Ensure that a given node is not drained.
 513
 514   @param lu: the LU on behalf of which we make the check
 515   @param node: the node to check
 516   @raise errors.OpPrereqError: if the node is drained
 517
 518   """
 519   if lu.cfg.GetNodeInfo(node).drained:
 520     raise errors.OpPrereqError("Can't use drained node %s" % node)
 521
 522
 523 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 524                           memory, vcpus, nics, disk_template, disks,
 525                           bep, hvp, hypervisor_name):
 526   """Builds instance related env variables for hooks
 527
 528   This builds the hook environment from individual variables.
 529
 530   @type name: string
 531   @param name: the name of the instance
 532   @type primary_node: string
 533   @param primary_node: the name of the instance's primary node
 534   @type secondary_nodes: list
 535   @param secondary_nodes: list of secondary nodes as strings
 536   @type os_type: string
 537   @param os_type: the name of the instance's OS
 538   @type status: boolean
 539   @param status: the should_run status of the instance
 540   @type memory: string
 541   @param memory: the memory size of the instance
 542   @type vcpus: string
 543   @param vcpus: the count of VCPUs the instance has
 544   @type nics: list
 545   @param nics: list of tuples (ip, mac, mode, link) representing
 546       the NICs the instance has
 547   @type disk_template: string
 548   @param disk_template: the disk template of the instance
 549   @type disks: list
 550   @param disks: the list of (size, mode) pairs
 551   @type bep: dict
 552   @param bep: the backend parameters for the instance
 553   @type hvp: dict
 554   @param hvp: the hypervisor parameters for the instance
 555   @type hypervisor_name: string
 556   @param hypervisor_name: the hypervisor for the instance
 557   @rtype: dict
 558   @return: the hook environment for this instance
 559
 560   """
 561   if status:
 562     str_status = "up"
 563   else:
 564     str_status = "down"
 565   env = {
 566     "OP_TARGET": name,
 567     "INSTANCE_NAME": name,
 568     "INSTANCE_PRIMARY": primary_node,
 569     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 570     "INSTANCE_OS_TYPE": os_type,
 571     "INSTANCE_STATUS": str_status,
 572     "INSTANCE_MEMORY": memory,
 573     "INSTANCE_VCPUS": vcpus,
 574     "INSTANCE_DISK_TEMPLATE": disk_template,
 575     "INSTANCE_HYPERVISOR": hypervisor_name,
 576   }
 577
 578   if nics:
 579     nic_count = len(nics)
 580     for idx, (ip, mac, mode, link) in enumerate(nics):
 581       if ip is None:
 582         ip = ""
 583       env["INSTANCE_NIC%d_IP" % idx] = ip
 584       env["INSTANCE_NIC%d_MAC" % idx] = mac
 585       env["INSTANCE_NIC%d_MODE" % idx] = mode
 586       env["INSTANCE_NIC%d_LINK" % idx] = link
 587       if mode == constants.NIC_MODE_BRIDGED:
 588         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 589   else:
 590     nic_count = 0
 591
 592   env["INSTANCE_NIC_COUNT"] = nic_count
 593
 594   if disks:
 595     disk_count = len(disks)
 596     for idx, (size, mode) in enumerate(disks):
 597       env["INSTANCE_DISK%d_SIZE" % idx] = size
 598       env["INSTANCE_DISK%d_MODE" % idx] = mode
 599   else:
 600     disk_count = 0
 601
 602   env["INSTANCE_DISK_COUNT"] = disk_count
 603
 604   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 605     for key, value in source.items():
 606       env["INSTANCE_%s_%s" % (kind, key)] = value
 607
 608   return env
 609
 610
 611 def _NICListToTuple(lu, nics):
 612   """Build a list of nic information tuples.
 613
 614   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 615   value in LUQueryInstanceData.
 616
 617   @type lu:  L{LogicalUnit}
 618   @param lu: the logical unit on whose behalf we execute
 619   @type nics: list of L{objects.NIC}
 620   @param nics: list of nics to convert to hooks tuples
 621
 622   """
 623   hooks_nics = []
 624   c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
 625   for nic in nics:
 626     ip = nic.ip
 627     mac = nic.mac
 628     filled_params = objects.FillDict(c_nicparams, nic.nicparams)
 629     mode = filled_params[constants.NIC_MODE]
 630     link = filled_params[constants.NIC_LINK]
 631     hooks_nics.append((ip, mac, mode, link))
 632   return hooks_nics
 633
 634
 635 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 636   """Builds instance related env variables for hooks from an object.
 637
 638   @type lu: L{LogicalUnit}
 639   @param lu: the logical unit on whose behalf we execute
 640   @type instance: L{objects.Instance}
 641   @param instance: the instance for which we should build the
 642       environment
 643   @type override: dict
 644   @param override: dictionary with key/values that will override
 645       our values
 646   @rtype: dict
 647   @return: the hook environment dictionary
 648
 649   """
 650   cluster = lu.cfg.GetClusterInfo()
 651   bep = cluster.FillBE(instance)
 652   hvp = cluster.FillHV(instance)
 653   args = {
 654     'name': instance.name,
 655     'primary_node': instance.primary_node,
 656     'secondary_nodes': instance.secondary_nodes,
 657     'os_type': instance.os,
 658     'status': instance.admin_up,
 659     'memory': bep[constants.BE_MEMORY],
 660     'vcpus': bep[constants.BE_VCPUS],
 661     'nics': _NICListToTuple(lu, instance.nics),
 662     'disk_template': instance.disk_template,
 663     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 664     'bep': bep,
 665     'hvp': hvp,
 666     'hypervisor_name': instance.hypervisor,
 667   }
 668   if override:
 669     args.update(override)
 670   return _BuildInstanceHookEnv(**args)
 671
 672
 673 def _AdjustCandidatePool(lu):
 674   """Adjust the candidate pool after node operations.
 675
 676   """
 677   mod_list = lu.cfg.MaintainCandidatePool()
 678   if mod_list:
 679     lu.LogInfo("Promoted nodes to master candidate role: %s",
 680                ", ".join(node.name for node in mod_list))
 681     for name in mod_list:
 682       lu.context.ReaddNode(name)
 683   mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
 684   if mc_now > mc_max:
 685     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 686                (mc_now, mc_max))
 687
 688
 689 def _CheckNicsBridgesExist(lu, target_nics, target_node,
 690                                profile=constants.PP_DEFAULT):
 691   """Check that the brigdes needed by a list of nics exist.
 692
 693   """
 694   c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
 695   paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
 696                 for nic in target_nics]
 697   brlist = [params[constants.NIC_LINK] for params in paramslist
 698             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 699   if brlist:
 700     result = lu.rpc.call_bridges_exist(target_node, brlist)
 701     result.Raise("Error checking bridges on destination node '%s'" %
 702                  target_node, prereq=True)
 703
 704
 705 def _CheckInstanceBridgesExist(lu, instance, node=None):
 706   """Check that the brigdes needed by an instance exist.
 707
 708   """
 709   if node is None:
 710     node = instance.primary_node
 711   _CheckNicsBridgesExist(lu, instance.nics, node)
 712
 713
 714 def _GetNodeInstancesInner(cfg, fn):
 715   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 716
 717
 718 def _GetNodeInstances(cfg, node_name):
 719   """Returns a list of all primary and secondary instances on a node.
 720
 721   """
 722
 723   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
 724
 725
 726 def _GetNodePrimaryInstances(cfg, node_name):
 727   """Returns primary instances on a node.
 728
 729   """
 730   return _GetNodeInstancesInner(cfg,
 731                                 lambda inst: node_name == inst.primary_node)
 732
 733
 734 def _GetNodeSecondaryInstances(cfg, node_name):
 735   """Returns secondary instances on a node.
 736
 737   """
 738   return _GetNodeInstancesInner(cfg,
 739                                 lambda inst: node_name in inst.secondary_nodes)
 740
 741
 742 def _GetStorageTypeArgs(cfg, storage_type):
 743   """Returns the arguments for a storage type.
 744
 745   """
 746   # Special case for file storage
 747   if storage_type == constants.ST_FILE:
 748     # storage.FileStorage wants a list of storage directories
 749     return [[cfg.GetFileStorageDir()]]
 750
 751   return []
 752
 753
 754 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
 755   faulty = []
 756
 757   for dev in instance.disks:
 758     cfg.SetDiskID(dev, node_name)
 759
 760   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
 761   result.Raise("Failed to get disk status from node %s" % node_name,
 762                prereq=prereq)
 763
 764   for idx, bdev_status in enumerate(result.payload):
 765     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
 766       faulty.append(idx)
 767
 768   return faulty
 769
 770
 771 class LUPostInitCluster(LogicalUnit):
 772   """Logical unit for running hooks after cluster initialization.
 773
 774   """
 775   HPATH = "cluster-init"
 776   HTYPE = constants.HTYPE_CLUSTER
 777   _OP_REQP = []
 778
 779   def BuildHooksEnv(self):
 780     """Build hooks env.
 781
 782     """
 783     env = {"OP_TARGET": self.cfg.GetClusterName()}
 784     mn = self.cfg.GetMasterNode()
 785     return env, [], [mn]
 786
 787   def CheckPrereq(self):
 788     """No prerequisites to check.
 789
 790     """
 791     return True
 792
 793   def Exec(self, feedback_fn):
 794     """Nothing to do.
 795
 796     """
 797     return True
 798
 799
 800 class LUDestroyCluster(LogicalUnit):
 801   """Logical unit for destroying the cluster.
 802
 803   """
 804   HPATH = "cluster-destroy"
 805   HTYPE = constants.HTYPE_CLUSTER
 806   _OP_REQP = []
 807
 808   def BuildHooksEnv(self):
 809     """Build hooks env.
 810
 811     """
 812     env = {"OP_TARGET": self.cfg.GetClusterName()}
 813     return env, [], []
 814
 815   def CheckPrereq(self):
 816     """Check prerequisites.
 817
 818     This checks whether the cluster is empty.
 819
 820     Any errors are signaled by raising errors.OpPrereqError.
 821
 822     """
 823     master = self.cfg.GetMasterNode()
 824
 825     nodelist = self.cfg.GetNodeList()
 826     if len(nodelist) != 1 or nodelist[0] != master:
 827       raise errors.OpPrereqError("There are still %d node(s) in"
 828                                  " this cluster." % (len(nodelist) - 1))
 829     instancelist = self.cfg.GetInstanceList()
 830     if instancelist:
 831       raise errors.OpPrereqError("There are still %d instance(s) in"
 832                                  " this cluster." % len(instancelist))
 833
 834   def Exec(self, feedback_fn):
 835     """Destroys the cluster.
 836
 837     """
 838     master = self.cfg.GetMasterNode()
 839
 840     # Run post hooks on master node before it's removed
 841     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
 842     try:
 843       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
 844     except:
 845       self.LogWarning("Errors occurred running hooks on %s" % master)
 846
 847     result = self.rpc.call_node_stop_master(master, False)
 848     result.Raise("Could not disable the master role")
 849     priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
 850     utils.CreateBackup(priv_key)
 851     utils.CreateBackup(pub_key)
 852     return master
 853
 854
 855 class LUVerifyCluster(LogicalUnit):
 856   """Verifies the cluster status.
 857
 858   """
 859   HPATH = "cluster-verify"
 860   HTYPE = constants.HTYPE_CLUSTER
 861   _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
 862   REQ_BGL = False
 863
 864   TCLUSTER = "cluster"
 865   TNODE = "node"
 866   TINSTANCE = "instance"
 867
 868   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
 869   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
 870   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
 871   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
 872   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
 873   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
 874   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
 875   ENODEDRBD = (TNODE, "ENODEDRBD")
 876   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
 877   ENODEHOOKS = (TNODE, "ENODEHOOKS")
 878   ENODEHV = (TNODE, "ENODEHV")
 879   ENODELVM = (TNODE, "ENODELVM")
 880   ENODEN1 = (TNODE, "ENODEN1")
 881   ENODENET = (TNODE, "ENODENET")
 882   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
 883   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
 884   ENODERPC = (TNODE, "ENODERPC")
 885   ENODESSH = (TNODE, "ENODESSH")
 886   ENODEVERSION = (TNODE, "ENODEVERSION")
 887
 888   ETYPE_FIELD = "code"
 889   ETYPE_ERROR = "ERROR"
 890   ETYPE_WARNING = "WARNING"
 891
 892   def ExpandNames(self):
 893     self.needed_locks = {
 894       locking.LEVEL_NODE: locking.ALL_SET,
 895       locking.LEVEL_INSTANCE: locking.ALL_SET,
 896     }
 897     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
 898
 899   def _Error(self, ecode, item, msg, *args, **kwargs):
 900     """Format an error message.
 901
 902     Based on the opcode's error_codes parameter, either format a
 903     parseable error code, or a simpler error string.
 904
 905     This must be called only from Exec and functions called from Exec.
 906
 907     """
 908     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
 909     itype, etxt = ecode
 910     # first complete the msg
 911     if args:
 912       msg = msg % args
 913     # then format the whole message
 914     if self.op.error_codes:
 915       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
 916     else:
 917       if item:
 918         item = " " + item
 919       else:
 920         item = ""
 921       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
 922     # and finally report it via the feedback_fn
 923     self._feedback_fn("  - %s" % msg)
 924
 925   def _ErrorIf(self, cond, *args, **kwargs):
 926     """Log an error message if the passed condition is True.
 927
 928     """
 929     cond = bool(cond) or self.op.debug_simulate_errors
 930     if cond:
 931       self._Error(*args, **kwargs)
 932     # do not mark the operation as failed for WARN cases only
 933     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
 934       self.bad = self.bad or cond
 935
 936   def _VerifyNode(self, nodeinfo, file_list, local_cksum,
 937                   node_result, master_files, drbd_map, vg_name):
 938     """Run multiple tests against a node.
 939
 940     Test list:
 941
 942       - compares ganeti version
 943       - checks vg existence and size > 20G
 944       - checks config file checksum
 945       - checks ssh to other nodes
 946
 947     @type nodeinfo: L{objects.Node}
 948     @param nodeinfo: the node to check
 949     @param file_list: required list of files
 950     @param local_cksum: dictionary of local files and their checksums
 951     @param node_result: the results from the node
 952     @param master_files: list of files that only masters should have
 953     @param drbd_map: the useddrbd minors for this node, in
 954         form of minor: (instance, must_exist) which correspond to instances
 955         and their running status
 956     @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
 957
 958     """
 959     node = nodeinfo.name
 960     _ErrorIf = self._ErrorIf
 961
 962     # main result, node_result should be a non-empty dict
 963     test = not node_result or not isinstance(node_result, dict)
 964     _ErrorIf(test, self.ENODERPC, node,
 965                   "unable to verify node: no data returned")
 966     if test:
 967       return
 968
 969     # compares ganeti version
 970     local_version = constants.PROTOCOL_VERSION
 971     remote_version = node_result.get('version', None)
 972     test = not (remote_version and
 973                 isinstance(remote_version, (list, tuple)) and
 974                 len(remote_version) == 2)
 975     _ErrorIf(test, self.ENODERPC, node,
 976              "connection to node returned invalid data")
 977     if test:
 978       return
 979
 980     test = local_version != remote_version[0]
 981     _ErrorIf(test, self.ENODEVERSION, node,
 982              "incompatible protocol versions: master %s,"
 983              " node %s", local_version, remote_version[0])
 984     if test:
 985       return
 986
 987     # node seems compatible, we can actually try to look into its results
 988
 989     # full package version
 990     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
 991                   self.ENODEVERSION, node,
 992                   "software version mismatch: master %s, node %s",
 993                   constants.RELEASE_VERSION, remote_version[1],
 994                   code=self.ETYPE_WARNING)
 995
 996     # checks vg existence and size > 20G
 997     if vg_name is not None:
 998       vglist = node_result.get(constants.NV_VGLIST, None)
 999       test = not vglist
1000       _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1001       if not test:
1002         vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1003                                               constants.MIN_VG_SIZE)
1004         _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1005
1006     # checks config file checksum
1007
1008     remote_cksum = node_result.get(constants.NV_FILELIST, None)
1009     test = not isinstance(remote_cksum, dict)
1010     _ErrorIf(test, self.ENODEFILECHECK, node,
1011              "node hasn't returned file checksum data")
1012     if not test:
1013       for file_name in file_list:
1014         node_is_mc = nodeinfo.master_candidate
1015         must_have = (file_name not in master_files) or node_is_mc
1016         # missing
1017         test1 = file_name not in remote_cksum
1018         # invalid checksum
1019         test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1020         # existing and good
1021         test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1022         _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1023                  "file '%s' missing", file_name)
1024         _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1025                  "file '%s' has wrong checksum", file_name)
1026         # not candidate and this is not a must-have file
1027         _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1028                  "file '%s' should not exist on non master"
1029                  " candidates (and the file is outdated)", file_name)
1030         # all good, except non-master/non-must have combination
1031         _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1032                  "file '%s' should not exist"
1033                  " on non master candidates", file_name)
1034
1035     # checks ssh to any
1036
1037     test = constants.NV_NODELIST not in node_result
1038     _ErrorIf(test, self.ENODESSH, node,
1039              "node hasn't returned node ssh connectivity data")
1040     if not test:
1041       if node_result[constants.NV_NODELIST]:
1042         for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1043           _ErrorIf(True, self.ENODESSH, node,
1044                    "ssh communication with node '%s': %s", a_node, a_msg)
1045
1046     test = constants.NV_NODENETTEST not in node_result
1047     _ErrorIf(test, self.ENODENET, node,
1048              "node hasn't returned node tcp connectivity data")
1049     if not test:
1050       if node_result[constants.NV_NODENETTEST]:
1051         nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1052         for anode in nlist:
1053           _ErrorIf(True, self.ENODENET, node,
1054                    "tcp communication with node '%s': %s",
1055                    anode, node_result[constants.NV_NODENETTEST][anode])
1056
1057     hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1058     if isinstance(hyp_result, dict):
1059       for hv_name, hv_result in hyp_result.iteritems():
1060         test = hv_result is not None
1061         _ErrorIf(test, self.ENODEHV, node,
1062                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1063
1064     # check used drbd list
1065     if vg_name is not None:
1066       used_minors = node_result.get(constants.NV_DRBDLIST, [])
1067       test = not isinstance(used_minors, (tuple, list))
1068       _ErrorIf(test, self.ENODEDRBD, node,
1069                "cannot parse drbd status file: %s", str(used_minors))
1070       if not test:
1071         for minor, (iname, must_exist) in drbd_map.items():
1072           test = minor not in used_minors and must_exist
1073           _ErrorIf(test, self.ENODEDRBD, node,
1074                    "drbd minor %d of instance %s is not active",
1075                    minor, iname)
1076         for minor in used_minors:
1077           test = minor not in drbd_map
1078           _ErrorIf(test, self.ENODEDRBD, node,
1079                    "unallocated drbd minor %d is in use", minor)
1080
1081   def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1082                       node_instance, n_offline):
1083     """Verify an instance.
1084
1085     This function checks to see if the required block devices are
1086     available on the instance's node.
1087
1088     """
1089     _ErrorIf = self._ErrorIf
1090     node_current = instanceconfig.primary_node
1091
1092     node_vol_should = {}
1093     instanceconfig.MapLVsByNode(node_vol_should)
1094
1095     for node in node_vol_should:
1096       if node in n_offline:
1097         # ignore missing volumes on offline nodes
1098         continue
1099       for volume in node_vol_should[node]:
1100         test = node not in node_vol_is or volume not in node_vol_is[node]
1101         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1102                  "volume %s missing on node %s", volume, node)
1103
1104     if instanceconfig.admin_up:
1105       test = ((node_current not in node_instance or
1106                not instance in node_instance[node_current]) and
1107               node_current not in n_offline)
1108       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1109                "instance not running on its primary node %s",
1110                node_current)
1111
1112     for node in node_instance:
1113       if (not node == node_current):
1114         test = instance in node_instance[node]
1115         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1116                  "instance should not run on node %s", node)
1117
1118   def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1119     """Verify if there are any unknown volumes in the cluster.
1120
1121     The .os, .swap and backup volumes are ignored. All other volumes are
1122     reported as unknown.
1123
1124     """
1125     for node in node_vol_is:
1126       for volume in node_vol_is[node]:
1127         test = (node not in node_vol_should or
1128                 volume not in node_vol_should[node])
1129         self._ErrorIf(test, self.ENODEORPHANLV, node,
1130                       "volume %s is unknown", volume)
1131
1132   def _VerifyOrphanInstances(self, instancelist, node_instance):
1133     """Verify the list of running instances.
1134
1135     This checks what instances are running but unknown to the cluster.
1136
1137     """
1138     for node in node_instance:
1139       for o_inst in node_instance[node]:
1140         test = o_inst not in instancelist
1141         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1142                       "instance %s on node %s should not exist", o_inst, node)
1143
1144   def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1145     """Verify N+1 Memory Resilience.
1146
1147     Check that if one single node dies we can still start all the instances it
1148     was primary for.
1149
1150     """
1151     for node, nodeinfo in node_info.iteritems():
1152       # This code checks that every node which is now listed as secondary has
1153       # enough memory to host all instances it is supposed to should a single
1154       # other node in the cluster fail.
1155       # FIXME: not ready for failover to an arbitrary node
1156       # FIXME: does not support file-backed instances
1157       # WARNING: we currently take into account down instances as well as up
1158       # ones, considering that even if they're down someone might want to start
1159       # them even in the event of a node failure.
1160       for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1161         needed_mem = 0
1162         for instance in instances:
1163           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1164           if bep[constants.BE_AUTO_BALANCE]:
1165             needed_mem += bep[constants.BE_MEMORY]
1166         test = nodeinfo['mfree'] < needed_mem
1167         self._ErrorIf(test, self.ENODEN1, node,
1168                       "not enough memory on to accommodate"
1169                       " failovers should peer node %s fail", prinode)
1170
1171   def CheckPrereq(self):
1172     """Check prerequisites.
1173
1174     Transform the list of checks we're going to skip into a set and check that
1175     all its members are valid.
1176
1177     """
1178     self.skip_set = frozenset(self.op.skip_checks)
1179     if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1180       raise errors.OpPrereqError("Invalid checks to be skipped specified")
1181
1182   def BuildHooksEnv(self):
1183     """Build hooks env.
1184
1185     Cluster-Verify hooks just ran in the post phase and their failure makes
1186     the output be logged in the verify output and the verification to fail.
1187
1188     """
1189     all_nodes = self.cfg.GetNodeList()
1190     env = {
1191       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1192       }
1193     for node in self.cfg.GetAllNodesInfo().values():
1194       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1195
1196     return env, [], all_nodes
1197
1198   def Exec(self, feedback_fn):
1199     """Verify integrity of cluster, performing various test on nodes.
1200
1201     """
1202     self.bad = False
1203     _ErrorIf = self._ErrorIf
1204     verbose = self.op.verbose
1205     self._feedback_fn = feedback_fn
1206     feedback_fn("* Verifying global settings")
1207     for msg in self.cfg.VerifyConfig():
1208       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1209
1210     vg_name = self.cfg.GetVGName()
1211     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1212     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1213     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1214     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1215     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1216                         for iname in instancelist)
1217     i_non_redundant = [] # Non redundant instances
1218     i_non_a_balanced = [] # Non auto-balanced instances
1219     n_offline = [] # List of offline nodes
1220     n_drained = [] # List of nodes being drained
1221     node_volume = {}
1222     node_instance = {}
1223     node_info = {}
1224     instance_cfg = {}
1225
1226     # FIXME: verify OS list
1227     # do local checksums
1228     master_files = [constants.CLUSTER_CONF_FILE]
1229
1230     file_names = ssconf.SimpleStore().GetFileList()
1231     file_names.append(constants.SSL_CERT_FILE)
1232     file_names.append(constants.RAPI_CERT_FILE)
1233     file_names.extend(master_files)
1234
1235     local_checksums = utils.FingerprintFiles(file_names)
1236
1237     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1238     node_verify_param = {
1239       constants.NV_FILELIST: file_names,
1240       constants.NV_NODELIST: [node.name for node in nodeinfo
1241                               if not node.offline],
1242       constants.NV_HYPERVISOR: hypervisors,
1243       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1244                                   node.secondary_ip) for node in nodeinfo
1245                                  if not node.offline],
1246       constants.NV_INSTANCELIST: hypervisors,
1247       constants.NV_VERSION: None,
1248       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1249       }
1250     if vg_name is not None:
1251       node_verify_param[constants.NV_VGLIST] = None
1252       node_verify_param[constants.NV_LVLIST] = vg_name
1253       node_verify_param[constants.NV_DRBDLIST] = None
1254     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1255                                            self.cfg.GetClusterName())
1256
1257     cluster = self.cfg.GetClusterInfo()
1258     master_node = self.cfg.GetMasterNode()
1259     all_drbd_map = self.cfg.ComputeDRBDMap()
1260
1261     feedback_fn("* Verifying node status")
1262     for node_i in nodeinfo:
1263       node = node_i.name
1264
1265       if node_i.offline:
1266         if verbose:
1267           feedback_fn("* Skipping offline node %s" % (node,))
1268         n_offline.append(node)
1269         continue
1270
1271       if node == master_node:
1272         ntype = "master"
1273       elif node_i.master_candidate:
1274         ntype = "master candidate"
1275       elif node_i.drained:
1276         ntype = "drained"
1277         n_drained.append(node)
1278       else:
1279         ntype = "regular"
1280       if verbose:
1281         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1282
1283       msg = all_nvinfo[node].fail_msg
1284       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1285       if msg:
1286         continue
1287
1288       nresult = all_nvinfo[node].payload
1289       node_drbd = {}
1290       for minor, instance in all_drbd_map[node].items():
1291         test = instance not in instanceinfo
1292         _ErrorIf(test, self.ECLUSTERCFG, None,
1293                  "ghost instance '%s' in temporary DRBD map", instance)
1294           # ghost instance should not be running, but otherwise we
1295           # don't give double warnings (both ghost instance and
1296           # unallocated minor in use)
1297         if test:
1298           node_drbd[minor] = (instance, False)
1299         else:
1300           instance = instanceinfo[instance]
1301           node_drbd[minor] = (instance.name, instance.admin_up)
1302       self._VerifyNode(node_i, file_names, local_checksums,
1303                        nresult, master_files, node_drbd, vg_name)
1304
1305       lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1306       if vg_name is None:
1307         node_volume[node] = {}
1308       elif isinstance(lvdata, basestring):
1309         _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1310                  utils.SafeEncode(lvdata))
1311         node_volume[node] = {}
1312       elif not isinstance(lvdata, dict):
1313         _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1314         continue
1315       else:
1316         node_volume[node] = lvdata
1317
1318       # node_instance
1319       idata = nresult.get(constants.NV_INSTANCELIST, None)
1320       test = not isinstance(idata, list)
1321       _ErrorIf(test, self.ENODEHV, node,
1322                "rpc call to node failed (instancelist)")
1323       if test:
1324         continue
1325
1326       node_instance[node] = idata
1327
1328       # node_info
1329       nodeinfo = nresult.get(constants.NV_HVINFO, None)
1330       test = not isinstance(nodeinfo, dict)
1331       _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1332       if test:
1333         continue
1334
1335       try:
1336         node_info[node] = {
1337           "mfree": int(nodeinfo['memory_free']),
1338           "pinst": [],
1339           "sinst": [],
1340           # dictionary holding all instances this node is secondary for,
1341           # grouped by their primary node. Each key is a cluster node, and each
1342           # value is a list of instances which have the key as primary and the
1343           # current node as secondary.  this is handy to calculate N+1 memory
1344           # availability if you can only failover from a primary to its
1345           # secondary.
1346           "sinst-by-pnode": {},
1347         }
1348         # FIXME: devise a free space model for file based instances as well
1349         if vg_name is not None:
1350           test = (constants.NV_VGLIST not in nresult or
1351                   vg_name not in nresult[constants.NV_VGLIST])
1352           _ErrorIf(test, self.ENODELVM, node,
1353                    "node didn't return data for the volume group '%s'"
1354                    " - it is either missing or broken", vg_name)
1355           if test:
1356             continue
1357           node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1358       except (ValueError, KeyError):
1359         _ErrorIf(True, self.ENODERPC, node,
1360                  "node returned invalid nodeinfo, check lvm/hypervisor")
1361         continue
1362
1363     node_vol_should = {}
1364
1365     feedback_fn("* Verifying instance status")
1366     for instance in instancelist:
1367       if verbose:
1368         feedback_fn("* Verifying instance %s" % instance)
1369       inst_config = instanceinfo[instance]
1370       self._VerifyInstance(instance, inst_config, node_volume,
1371                            node_instance, n_offline)
1372       inst_nodes_offline = []
1373
1374       inst_config.MapLVsByNode(node_vol_should)
1375
1376       instance_cfg[instance] = inst_config
1377
1378       pnode = inst_config.primary_node
1379       _ErrorIf(pnode not in node_info and pnode not in n_offline,
1380                self.ENODERPC, pnode, "instance %s, connection to"
1381                " primary node failed", instance)
1382       if pnode in node_info:
1383         node_info[pnode]['pinst'].append(instance)
1384
1385       if pnode in n_offline:
1386         inst_nodes_offline.append(pnode)
1387
1388       # If the instance is non-redundant we cannot survive losing its primary
1389       # node, so we are not N+1 compliant. On the other hand we have no disk
1390       # templates with more than one secondary so that situation is not well
1391       # supported either.
1392       # FIXME: does not support file-backed instances
1393       if len(inst_config.secondary_nodes) == 0:
1394         i_non_redundant.append(instance)
1395       _ErrorIf(len(inst_config.secondary_nodes) > 1,
1396                self.EINSTANCELAYOUT, instance,
1397                "instance has multiple secondary nodes", code="WARNING")
1398
1399       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1400         i_non_a_balanced.append(instance)
1401
1402       for snode in inst_config.secondary_nodes:
1403         _ErrorIf(snode not in node_info and snode not in n_offline,
1404                  self.ENODERPC, snode,
1405                  "instance %s, connection to secondary node"
1406                  "failed", instance)
1407
1408         if snode in node_info:
1409           node_info[snode]['sinst'].append(instance)
1410           if pnode not in node_info[snode]['sinst-by-pnode']:
1411             node_info[snode]['sinst-by-pnode'][pnode] = []
1412           node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1413
1414         if snode in n_offline:
1415           inst_nodes_offline.append(snode)
1416
1417       # warn that the instance lives on offline nodes
1418       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1419                "instance lives on offline node(s) %s",
1420                ", ".join(inst_nodes_offline))
1421
1422     feedback_fn("* Verifying orphan volumes")
1423     self._VerifyOrphanVolumes(node_vol_should, node_volume)
1424
1425     feedback_fn("* Verifying remaining instances")
1426     self._VerifyOrphanInstances(instancelist, node_instance)
1427
1428     if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1429       feedback_fn("* Verifying N+1 Memory redundancy")
1430       self._VerifyNPlusOneMemory(node_info, instance_cfg)
1431
1432     feedback_fn("* Other Notes")
1433     if i_non_redundant:
1434       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1435                   % len(i_non_redundant))
1436
1437     if i_non_a_balanced:
1438       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1439                   % len(i_non_a_balanced))
1440
1441     if n_offline:
1442       feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1443
1444     if n_drained:
1445       feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1446
1447     return not self.bad
1448
1449   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1450     """Analyze the post-hooks' result
1451
1452     This method analyses the hook result, handles it, and sends some
1453     nicely-formatted feedback back to the user.
1454
1455     @param phase: one of L{constants.HOOKS_PHASE_POST} or
1456         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1457     @param hooks_results: the results of the multi-node hooks rpc call
1458     @param feedback_fn: function used send feedback back to the caller
1459     @param lu_result: previous Exec result
1460     @return: the new Exec result, based on the previous result
1461         and hook results
1462
1463     """
1464     # We only really run POST phase hooks, and are only interested in
1465     # their results
1466     if phase == constants.HOOKS_PHASE_POST:
1467       # Used to change hooks' output to proper indentation
1468       indent_re = re.compile('^', re.M)
1469       feedback_fn("* Hooks Results")
1470       assert hooks_results, "invalid result from hooks"
1471
1472       for node_name in hooks_results:
1473         show_node_header = True
1474         res = hooks_results[node_name]
1475         msg = res.fail_msg
1476         test = msg and not res.offline
1477         self._ErrorIf(test, self.ENODEHOOKS, node_name,
1478                       "Communication failure in hooks execution: %s", msg)
1479         if test:
1480           # override manually lu_result here as _ErrorIf only
1481           # overrides self.bad
1482           lu_result = 1
1483           continue
1484         for script, hkr, output in res.payload:
1485           test = hkr == constants.HKR_FAIL
1486           self._ErrorIf(test, self.ENODEHOOKS, node_name,
1487                         "Script %s failed, output:", script)
1488           if test:
1489             output = indent_re.sub('      ', output)
1490             feedback_fn("%s" % output)
1491             lu_result = 1
1492
1493       return lu_result
1494
1495
1496 class LUVerifyDisks(NoHooksLU):
1497   """Verifies the cluster disks status.
1498
1499   """
1500   _OP_REQP = []
1501   REQ_BGL = False
1502
1503   def ExpandNames(self):
1504     self.needed_locks = {
1505       locking.LEVEL_NODE: locking.ALL_SET,
1506       locking.LEVEL_INSTANCE: locking.ALL_SET,
1507     }
1508     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1509
1510   def CheckPrereq(self):
1511     """Check prerequisites.
1512
1513     This has no prerequisites.
1514
1515     """
1516     pass
1517
1518   def Exec(self, feedback_fn):
1519     """Verify integrity of cluster disks.
1520
1521     @rtype: tuple of three items
1522     @return: a tuple of (dict of node-to-node_error, list of instances
1523         which need activate-disks, dict of instance: (node, volume) for
1524         missing volumes
1525
1526     """
1527     result = res_nodes, res_instances, res_missing = {}, [], {}
1528
1529     vg_name = self.cfg.GetVGName()
1530     nodes = utils.NiceSort(self.cfg.GetNodeList())
1531     instances = [self.cfg.GetInstanceInfo(name)
1532                  for name in self.cfg.GetInstanceList()]
1533
1534     nv_dict = {}
1535     for inst in instances:
1536       inst_lvs = {}
1537       if (not inst.admin_up or
1538           inst.disk_template not in constants.DTS_NET_MIRROR):
1539         continue
1540       inst.MapLVsByNode(inst_lvs)
1541       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1542       for node, vol_list in inst_lvs.iteritems():
1543         for vol in vol_list:
1544           nv_dict[(node, vol)] = inst
1545
1546     if not nv_dict:
1547       return result
1548
1549     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1550
1551     for node in nodes:
1552       # node_volume
1553       node_res = node_lvs[node]
1554       if node_res.offline:
1555         continue
1556       msg = node_res.fail_msg
1557       if msg:
1558         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1559         res_nodes[node] = msg
1560         continue
1561
1562       lvs = node_res.payload
1563       for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1564         inst = nv_dict.pop((node, lv_name), None)
1565         if (not lv_online and inst is not None
1566             and inst.name not in res_instances):
1567           res_instances.append(inst.name)
1568
1569     # any leftover items in nv_dict are missing LVs, let's arrange the
1570     # data better
1571     for key, inst in nv_dict.iteritems():
1572       if inst.name not in res_missing:
1573         res_missing[inst.name] = []
1574       res_missing[inst.name].append(key)
1575
1576     return result
1577
1578
1579 class LURepairDiskSizes(NoHooksLU):
1580   """Verifies the cluster disks sizes.
1581
1582   """
1583   _OP_REQP = ["instances"]
1584   REQ_BGL = False
1585
1586   def ExpandNames(self):
1587     if not isinstance(self.op.instances, list):
1588       raise errors.OpPrereqError("Invalid argument type 'instances'")
1589
1590     if self.op.instances:
1591       self.wanted_names = []
1592       for name in self.op.instances:
1593         full_name = self.cfg.ExpandInstanceName(name)
1594         if full_name is None:
1595           raise errors.OpPrereqError("Instance '%s' not known" % name)
1596         self.wanted_names.append(full_name)
1597       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
1598       self.needed_locks = {
1599         locking.LEVEL_NODE: [],
1600         locking.LEVEL_INSTANCE: self.wanted_names,
1601         }
1602       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1603     else:
1604       self.wanted_names = None
1605       self.needed_locks = {
1606         locking.LEVEL_NODE: locking.ALL_SET,
1607         locking.LEVEL_INSTANCE: locking.ALL_SET,
1608         }
1609     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1610
1611   def DeclareLocks(self, level):
1612     if level == locking.LEVEL_NODE and self.wanted_names is not None:
1613       self._LockInstancesNodes(primary_only=True)
1614
1615   def CheckPrereq(self):
1616     """Check prerequisites.
1617
1618     This only checks the optional instance list against the existing names.
1619
1620     """
1621     if self.wanted_names is None:
1622       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1623
1624     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1625                              in self.wanted_names]
1626
1627   def Exec(self, feedback_fn):
1628     """Verify the size of cluster disks.
1629
1630     """
1631     # TODO: check child disks too
1632     # TODO: check differences in size between primary/secondary nodes
1633     per_node_disks = {}
1634     for instance in self.wanted_instances:
1635       pnode = instance.primary_node
1636       if pnode not in per_node_disks:
1637         per_node_disks[pnode] = []
1638       for idx, disk in enumerate(instance.disks):
1639         per_node_disks[pnode].append((instance, idx, disk))
1640
1641     changed = []
1642     for node, dskl in per_node_disks.items():
1643       result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl])
1644       if result.fail_msg:
1645         self.LogWarning("Failure in blockdev_getsizes call to node"
1646                         " %s, ignoring", node)
1647         continue
1648       if len(result.data) != len(dskl):
1649         self.LogWarning("Invalid result from node %s, ignoring node results",
1650                         node)
1651         continue
1652       for ((instance, idx, disk), size) in zip(dskl, result.data):
1653         if size is None:
1654           self.LogWarning("Disk %d of instance %s did not return size"
1655                           " information, ignoring", idx, instance.name)
1656           continue
1657         if not isinstance(size, (int, long)):
1658           self.LogWarning("Disk %d of instance %s did not return valid"
1659                           " size information, ignoring", idx, instance.name)
1660           continue
1661         size = size >> 20
1662         if size != disk.size:
1663           self.LogInfo("Disk %d of instance %s has mismatched size,"
1664                        " correcting: recorded %d, actual %d", idx,
1665                        instance.name, disk.size, size)
1666           disk.size = size
1667           self.cfg.Update(instance)
1668           changed.append((instance.name, idx, size))
1669     return changed
1670
1671
1672 class LURenameCluster(LogicalUnit):
1673   """Rename the cluster.
1674
1675   """
1676   HPATH = "cluster-rename"
1677   HTYPE = constants.HTYPE_CLUSTER
1678   _OP_REQP = ["name"]
1679
1680   def BuildHooksEnv(self):
1681     """Build hooks env.
1682
1683     """
1684     env = {
1685       "OP_TARGET": self.cfg.GetClusterName(),
1686       "NEW_NAME": self.op.name,
1687       }
1688     mn = self.cfg.GetMasterNode()
1689     return env, [mn], [mn]
1690
1691   def CheckPrereq(self):
1692     """Verify that the passed name is a valid one.
1693
1694     """
1695     hostname = utils.HostInfo(self.op.name)
1696
1697     new_name = hostname.name
1698     self.ip = new_ip = hostname.ip
1699     old_name = self.cfg.GetClusterName()
1700     old_ip = self.cfg.GetMasterIP()
1701     if new_name == old_name and new_ip == old_ip:
1702       raise errors.OpPrereqError("Neither the name nor the IP address of the"
1703                                  " cluster has changed")
1704     if new_ip != old_ip:
1705       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1706         raise errors.OpPrereqError("The given cluster IP address (%s) is"
1707                                    " reachable on the network. Aborting." %
1708                                    new_ip)
1709
1710     self.op.name = new_name
1711
1712   def Exec(self, feedback_fn):
1713     """Rename the cluster.
1714
1715     """
1716     clustername = self.op.name
1717     ip = self.ip
1718
1719     # shutdown the master IP
1720     master = self.cfg.GetMasterNode()
1721     result = self.rpc.call_node_stop_master(master, False)
1722     result.Raise("Could not disable the master role")
1723
1724     try:
1725       cluster = self.cfg.GetClusterInfo()
1726       cluster.cluster_name = clustername
1727       cluster.master_ip = ip
1728       self.cfg.Update(cluster)
1729
1730       # update the known hosts file
1731       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1732       node_list = self.cfg.GetNodeList()
1733       try:
1734         node_list.remove(master)
1735       except ValueError:
1736         pass
1737       result = self.rpc.call_upload_file(node_list,
1738                                          constants.SSH_KNOWN_HOSTS_FILE)
1739       for to_node, to_result in result.iteritems():
1740         msg = to_result.fail_msg
1741         if msg:
1742           msg = ("Copy of file %s to node %s failed: %s" %
1743                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1744           self.proc.LogWarning(msg)
1745
1746     finally:
1747       result = self.rpc.call_node_start_master(master, False, False)
1748       msg = result.fail_msg
1749       if msg:
1750         self.LogWarning("Could not re-enable the master role on"
1751                         " the master, please restart manually: %s", msg)
1752
1753
1754 def _RecursiveCheckIfLVMBased(disk):
1755   """Check if the given disk or its children are lvm-based.
1756
1757   @type disk: L{objects.Disk}
1758   @param disk: the disk to check
1759   @rtype: boolean
1760   @return: boolean indicating whether a LD_LV dev_type was found or not
1761
1762   """
1763   if disk.children:
1764     for chdisk in disk.children:
1765       if _RecursiveCheckIfLVMBased(chdisk):
1766         return True
1767   return disk.dev_type == constants.LD_LV
1768
1769
1770 class LUSetClusterParams(LogicalUnit):
1771   """Change the parameters of the cluster.
1772
1773   """
1774   HPATH = "cluster-modify"
1775   HTYPE = constants.HTYPE_CLUSTER
1776   _OP_REQP = []
1777   REQ_BGL = False
1778
1779   def CheckArguments(self):
1780     """Check parameters
1781
1782     """
1783     if not hasattr(self.op, "candidate_pool_size"):
1784       self.op.candidate_pool_size = None
1785     if self.op.candidate_pool_size is not None:
1786       try:
1787         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1788       except (ValueError, TypeError), err:
1789         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1790                                    str(err))
1791       if self.op.candidate_pool_size < 1:
1792         raise errors.OpPrereqError("At least one master candidate needed")
1793
1794   def ExpandNames(self):
1795     # FIXME: in the future maybe other cluster params won't require checking on
1796     # all nodes to be modified.
1797     self.needed_locks = {
1798       locking.LEVEL_NODE: locking.ALL_SET,
1799     }
1800     self.share_locks[locking.LEVEL_NODE] = 1
1801
1802   def BuildHooksEnv(self):
1803     """Build hooks env.
1804
1805     """
1806     env = {
1807       "OP_TARGET": self.cfg.GetClusterName(),
1808       "NEW_VG_NAME": self.op.vg_name,
1809       }
1810     mn = self.cfg.GetMasterNode()
1811     return env, [mn], [mn]
1812
1813   def CheckPrereq(self):
1814     """Check prerequisites.
1815
1816     This checks whether the given params don't conflict and
1817     if the given volume group is valid.
1818
1819     """
1820     if self.op.vg_name is not None and not self.op.vg_name:
1821       instances = self.cfg.GetAllInstancesInfo().values()
1822       for inst in instances:
1823         for disk in inst.disks:
1824           if _RecursiveCheckIfLVMBased(disk):
1825             raise errors.OpPrereqError("Cannot disable lvm storage while"
1826                                        " lvm-based instances exist")
1827
1828     node_list = self.acquired_locks[locking.LEVEL_NODE]
1829
1830     # if vg_name not None, checks given volume group on all nodes
1831     if self.op.vg_name:
1832       vglist = self.rpc.call_vg_list(node_list)
1833       for node in node_list:
1834         msg = vglist[node].fail_msg
1835         if msg:
1836           # ignoring down node
1837           self.LogWarning("Error while gathering data on node %s"
1838                           " (ignoring node): %s", node, msg)
1839           continue
1840         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1841                                               self.op.vg_name,
1842                                               constants.MIN_VG_SIZE)
1843         if vgstatus:
1844           raise errors.OpPrereqError("Error on node '%s': %s" %
1845                                      (node, vgstatus))
1846
1847     self.cluster = cluster = self.cfg.GetClusterInfo()
1848     # validate params changes
1849     if self.op.beparams:
1850       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1851       self.new_beparams = objects.FillDict(
1852         cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1853
1854     if self.op.nicparams:
1855       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1856       self.new_nicparams = objects.FillDict(
1857         cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1858       objects.NIC.CheckParameterSyntax(self.new_nicparams)
1859
1860     # hypervisor list/parameters
1861     self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1862     if self.op.hvparams:
1863       if not isinstance(self.op.hvparams, dict):
1864         raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1865       for hv_name, hv_dict in self.op.hvparams.items():
1866         if hv_name not in self.new_hvparams:
1867           self.new_hvparams[hv_name] = hv_dict
1868         else:
1869           self.new_hvparams[hv_name].update(hv_dict)
1870
1871     if self.op.enabled_hypervisors is not None:
1872       self.hv_list = self.op.enabled_hypervisors
1873       if not self.hv_list:
1874         raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1875                                    " least one member")
1876       invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1877       if invalid_hvs:
1878         raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1879                                    " entries: %s" %
1880                                    utils.CommaJoin(invalid_hvs))
1881     else:
1882       self.hv_list = cluster.enabled_hypervisors
1883
1884     if self.op.hvparams or self.op.enabled_hypervisors is not None:
1885       # either the enabled list has changed, or the parameters have, validate
1886       for hv_name, hv_params in self.new_hvparams.items():
1887         if ((self.op.hvparams and hv_name in self.op.hvparams) or
1888             (self.op.enabled_hypervisors and
1889              hv_name in self.op.enabled_hypervisors)):
1890           # either this is a new hypervisor, or its parameters have changed
1891           hv_class = hypervisor.GetHypervisor(hv_name)
1892           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1893           hv_class.CheckParameterSyntax(hv_params)
1894           _CheckHVParams(self, node_list, hv_name, hv_params)
1895
1896   def Exec(self, feedback_fn):
1897     """Change the parameters of the cluster.
1898
1899     """
1900     if self.op.vg_name is not None:
1901       new_volume = self.op.vg_name
1902       if not new_volume:
1903         new_volume = None
1904       if new_volume != self.cfg.GetVGName():
1905         self.cfg.SetVGName(new_volume)
1906       else:
1907         feedback_fn("Cluster LVM configuration already in desired"
1908                     " state, not changing")
1909     if self.op.hvparams:
1910       self.cluster.hvparams = self.new_hvparams
1911     if self.op.enabled_hypervisors is not None:
1912       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1913     if self.op.beparams:
1914       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1915     if self.op.nicparams:
1916       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1917
1918     if self.op.candidate_pool_size is not None:
1919       self.cluster.candidate_pool_size = self.op.candidate_pool_size
1920       # we need to update the pool size here, otherwise the save will fail
1921       _AdjustCandidatePool(self)
1922
1923     self.cfg.Update(self.cluster)
1924
1925
1926 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1927   """Distribute additional files which are part of the cluster configuration.
1928
1929   ConfigWriter takes care of distributing the config and ssconf files, but
1930   there are more files which should be distributed to all nodes. This function
1931   makes sure those are copied.
1932
1933   @param lu: calling logical unit
1934   @param additional_nodes: list of nodes not in the config to distribute to
1935
1936   """
1937   # 1. Gather target nodes
1938   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1939   dist_nodes = lu.cfg.GetNodeList()
1940   if additional_nodes is not None:
1941     dist_nodes.extend(additional_nodes)
1942   if myself.name in dist_nodes:
1943     dist_nodes.remove(myself.name)
1944   # 2. Gather files to distribute
1945   dist_files = set([constants.ETC_HOSTS,
1946                     constants.SSH_KNOWN_HOSTS_FILE,
1947                     constants.RAPI_CERT_FILE,
1948                     constants.RAPI_USERS_FILE,
1949                     constants.HMAC_CLUSTER_KEY,
1950                    ])
1951
1952   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1953   for hv_name in enabled_hypervisors:
1954     hv_class = hypervisor.GetHypervisor(hv_name)
1955     dist_files.update(hv_class.GetAncillaryFiles())
1956
1957   # 3. Perform the files upload
1958   for fname in dist_files:
1959     if os.path.exists(fname):
1960       result = lu.rpc.call_upload_file(dist_nodes, fname)
1961       for to_node, to_result in result.items():
1962         msg = to_result.fail_msg
1963         if msg:
1964           msg = ("Copy of file %s to node %s failed: %s" %
1965                  (fname, to_node, msg))
1966           lu.proc.LogWarning(msg)
1967
1968
1969 class LURedistributeConfig(NoHooksLU):
1970   """Force the redistribution of cluster configuration.
1971
1972   This is a very simple LU.
1973
1974   """
1975   _OP_REQP = []
1976   REQ_BGL = False
1977
1978   def ExpandNames(self):
1979     self.needed_locks = {
1980       locking.LEVEL_NODE: locking.ALL_SET,
1981     }
1982     self.share_locks[locking.LEVEL_NODE] = 1
1983
1984   def CheckPrereq(self):
1985     """Check prerequisites.
1986
1987     """
1988
1989   def Exec(self, feedback_fn):
1990     """Redistribute the configuration.
1991
1992     """
1993     self.cfg.Update(self.cfg.GetClusterInfo())
1994     _RedistributeAncillaryFiles(self)
1995
1996
1997 def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1998   """Sleep and poll for an instance's disk to sync.
1999
2000   """
2001   if not instance.disks:
2002     return True
2003
2004   if not oneshot:
2005     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2006
2007   node = instance.primary_node
2008
2009   for dev in instance.disks:
2010     lu.cfg.SetDiskID(dev, node)
2011
2012   retries = 0
2013   degr_retries = 10 # in seconds, as we sleep 1 second each time
2014   while True:
2015     max_time = 0
2016     done = True
2017     cumul_degraded = False
2018     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2019     msg = rstats.fail_msg
2020     if msg:
2021       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2022       retries += 1
2023       if retries >= 10:
2024         raise errors.RemoteError("Can't contact node %s for mirror data,"
2025                                  " aborting." % node)
2026       time.sleep(6)
2027       continue
2028     rstats = rstats.payload
2029     retries = 0
2030     for i, mstat in enumerate(rstats):
2031       if mstat is None:
2032         lu.LogWarning("Can't compute data for node %s/%s",
2033                            node, instance.disks[i].iv_name)
2034         continue
2035
2036       cumul_degraded = (cumul_degraded or
2037                         (mstat.is_degraded and mstat.sync_percent is None))
2038       if mstat.sync_percent is not None:
2039         done = False
2040         if mstat.estimated_time is not None:
2041           rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2042           max_time = mstat.estimated_time
2043         else:
2044           rem_time = "no time estimate"
2045         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2046                         (instance.disks[i].iv_name, mstat.sync_percent,
2047                          rem_time))
2048
2049     # if we're done but degraded, let's do a few small retries, to
2050     # make sure we see a stable and not transient situation; therefore
2051     # we force restart of the loop
2052     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2053       logging.info("Degraded disks found, %d retries left", degr_retries)
2054       degr_retries -= 1
2055       time.sleep(1)
2056       continue
2057
2058     if done or oneshot:
2059       break
2060
2061     time.sleep(min(60, max_time))
2062
2063   if done:
2064     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2065   return not cumul_degraded
2066
2067
2068 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2069   """Check that mirrors are not degraded.
2070
2071   The ldisk parameter, if True, will change the test from the
2072   is_degraded attribute (which represents overall non-ok status for
2073   the device(s)) to the ldisk (representing the local storage status).
2074
2075   """
2076   lu.cfg.SetDiskID(dev, node)
2077
2078   result = True
2079
2080   if on_primary or dev.AssembleOnSecondary():
2081     rstats = lu.rpc.call_blockdev_find(node, dev)
2082     msg = rstats.fail_msg
2083     if msg:
2084       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2085       result = False
2086     elif not rstats.payload:
2087       lu.LogWarning("Can't find disk on node %s", node)
2088       result = False
2089     else:
2090       if ldisk:
2091         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2092       else:
2093         result = result and not rstats.payload.is_degraded
2094
2095   if dev.children:
2096     for child in dev.children:
2097       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2098
2099   return result
2100
2101
2102 class LUDiagnoseOS(NoHooksLU):
2103   """Logical unit for OS diagnose/query.
2104
2105   """
2106   _OP_REQP = ["output_fields", "names"]
2107   REQ_BGL = False
2108   _FIELDS_STATIC = utils.FieldSet()
2109   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
2110
2111   def ExpandNames(self):
2112     if self.op.names:
2113       raise errors.OpPrereqError("Selective OS query not supported")
2114
2115     _CheckOutputFields(static=self._FIELDS_STATIC,
2116                        dynamic=self._FIELDS_DYNAMIC,
2117                        selected=self.op.output_fields)
2118
2119     # Lock all nodes, in shared mode
2120     # Temporary removal of locks, should be reverted later
2121     # TODO: reintroduce locks when they are lighter-weight
2122     self.needed_locks = {}
2123     #self.share_locks[locking.LEVEL_NODE] = 1
2124     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2125
2126   def CheckPrereq(self):
2127     """Check prerequisites.
2128
2129     """
2130
2131   @staticmethod
2132   def _DiagnoseByOS(node_list, rlist):
2133     """Remaps a per-node return list into an a per-os per-node dictionary
2134
2135     @param node_list: a list with the names of all nodes
2136     @param rlist: a map with node names as keys and OS objects as values
2137
2138     @rtype: dict
2139     @return: a dictionary with osnames as keys and as value another map, with
2140         nodes as keys and tuples of (path, status, diagnose) as values, eg::
2141
2142           {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2143                                      (/srv/..., False, "invalid api")],
2144                            "node2": [(/srv/..., True, "")]}
2145           }
2146
2147     """
2148     all_os = {}
2149     # we build here the list of nodes that didn't fail the RPC (at RPC
2150     # level), so that nodes with a non-responding node daemon don't
2151     # make all OSes invalid
2152     good_nodes = [node_name for node_name in rlist
2153                   if not rlist[node_name].fail_msg]
2154     for node_name, nr in rlist.items():
2155       if nr.fail_msg or not nr.payload:
2156         continue
2157       for name, path, status, diagnose in nr.payload:
2158         if name not in all_os:
2159           # build a list of nodes for this os containing empty lists
2160           # for each node in node_list
2161           all_os[name] = {}
2162           for nname in good_nodes:
2163             all_os[name][nname] = []
2164         all_os[name][node_name].append((path, status, diagnose))
2165     return all_os
2166
2167   def Exec(self, feedback_fn):
2168     """Compute the list of OSes.
2169
2170     """
2171     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2172     node_data = self.rpc.call_os_diagnose(valid_nodes)
2173     pol = self._DiagnoseByOS(valid_nodes, node_data)
2174     output = []
2175     for os_name, os_data in pol.items():
2176       row = []
2177       for field in self.op.output_fields:
2178         if field == "name":
2179           val = os_name
2180         elif field == "valid":
2181           val = utils.all([osl and osl[0][1] for osl in os_data.values()])
2182         elif field == "node_status":
2183           # this is just a copy of the dict
2184           val = {}
2185           for node_name, nos_list in os_data.items():
2186             val[node_name] = nos_list
2187         else:
2188           raise errors.ParameterError(field)
2189         row.append(val)
2190       output.append(row)
2191
2192     return output
2193
2194
2195 class LURemoveNode(LogicalUnit):
2196   """Logical unit for removing a node.
2197
2198   """
2199   HPATH = "node-remove"
2200   HTYPE = constants.HTYPE_NODE
2201   _OP_REQP = ["node_name"]
2202
2203   def BuildHooksEnv(self):
2204     """Build hooks env.
2205
2206     This doesn't run on the target node in the pre phase as a failed
2207     node would then be impossible to remove.
2208
2209     """
2210     env = {
2211       "OP_TARGET": self.op.node_name,
2212       "NODE_NAME": self.op.node_name,
2213       }
2214     all_nodes = self.cfg.GetNodeList()
2215     if self.op.node_name in all_nodes:
2216       all_nodes.remove(self.op.node_name)
2217     return env, all_nodes, all_nodes
2218
2219   def CheckPrereq(self):
2220     """Check prerequisites.
2221
2222     This checks:
2223      - the node exists in the configuration
2224      - it does not have primary or secondary instances
2225      - it's not the master
2226
2227     Any errors are signaled by raising errors.OpPrereqError.
2228
2229     """
2230     node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2231     if node is None:
2232       raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
2233
2234     instance_list = self.cfg.GetInstanceList()
2235
2236     masternode = self.cfg.GetMasterNode()
2237     if node.name == masternode:
2238       raise errors.OpPrereqError("Node is the master node,"
2239                                  " you need to failover first.")
2240
2241     for instance_name in instance_list:
2242       instance = self.cfg.GetInstanceInfo(instance_name)
2243       if node.name in instance.all_nodes:
2244         raise errors.OpPrereqError("Instance %s is still running on the node,"
2245                                    " please remove first." % instance_name)
2246     self.op.node_name = node.name
2247     self.node = node
2248
2249   def Exec(self, feedback_fn):
2250     """Removes the node from the cluster.
2251
2252     """
2253     node = self.node
2254     logging.info("Stopping the node daemon and removing configs from node %s",
2255                  node.name)
2256
2257     self.context.RemoveNode(node.name)
2258
2259     # Run post hooks on the node before it's removed
2260     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2261     try:
2262       h_results = hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2263     except:
2264       self.LogWarning("Errors occurred running hooks on %s" % node.name)
2265
2266     result = self.rpc.call_node_leave_cluster(node.name)
2267     msg = result.fail_msg
2268     if msg:
2269       self.LogWarning("Errors encountered on the remote node while leaving"
2270                       " the cluster: %s", msg)
2271
2272     # Promote nodes to master candidate as needed
2273     _AdjustCandidatePool(self)
2274
2275
2276 class LUQueryNodes(NoHooksLU):
2277   """Logical unit for querying nodes.
2278
2279   """
2280   _OP_REQP = ["output_fields", "names", "use_locking"]
2281   REQ_BGL = False
2282   _FIELDS_DYNAMIC = utils.FieldSet(
2283     "dtotal", "dfree",
2284     "mtotal", "mnode", "mfree",
2285     "bootid",
2286     "ctotal", "cnodes", "csockets",
2287     )
2288
2289   _FIELDS_STATIC = utils.FieldSet(
2290     "name", "pinst_cnt", "sinst_cnt",
2291     "pinst_list", "sinst_list",
2292     "pip", "sip", "tags",
2293     "serial_no", "ctime", "mtime",
2294     "master_candidate",
2295     "master",
2296     "offline",
2297     "drained",
2298     "role",
2299     )
2300
2301   def ExpandNames(self):
2302     _CheckOutputFields(static=self._FIELDS_STATIC,
2303                        dynamic=self._FIELDS_DYNAMIC,
2304                        selected=self.op.output_fields)
2305
2306     self.needed_locks = {}
2307     self.share_locks[locking.LEVEL_NODE] = 1
2308
2309     if self.op.names:
2310       self.wanted = _GetWantedNodes(self, self.op.names)
2311     else:
2312       self.wanted = locking.ALL_SET
2313
2314     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2315     self.do_locking = self.do_node_query and self.op.use_locking
2316     if self.do_locking:
2317       # if we don't request only static fields, we need to lock the nodes
2318       self.needed_locks[locking.LEVEL_NODE] = self.wanted
2319
2320
2321   def CheckPrereq(self):
2322     """Check prerequisites.
2323
2324     """
2325     # The validation of the node list is done in the _GetWantedNodes,
2326     # if non empty, and if empty, there's no validation to do
2327     pass
2328
2329   def Exec(self, feedback_fn):
2330     """Computes the list of nodes and their attributes.
2331
2332     """
2333     all_info = self.cfg.GetAllNodesInfo()
2334     if self.do_locking:
2335       nodenames = self.acquired_locks[locking.LEVEL_NODE]
2336     elif self.wanted != locking.ALL_SET:
2337       nodenames = self.wanted
2338       missing = set(nodenames).difference(all_info.keys())
2339       if missing:
2340         raise errors.OpExecError(
2341           "Some nodes were removed before retrieving their data: %s" % missing)
2342     else:
2343       nodenames = all_info.keys()
2344
2345     nodenames = utils.NiceSort(nodenames)
2346     nodelist = [all_info[name] for name in nodenames]
2347
2348     # begin data gathering
2349
2350     if self.do_node_query:
2351       live_data = {}
2352       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2353                                           self.cfg.GetHypervisorType())
2354       for name in nodenames:
2355         nodeinfo = node_data[name]
2356         if not nodeinfo.fail_msg and nodeinfo.payload:
2357           nodeinfo = nodeinfo.payload
2358           fn = utils.TryConvert
2359           live_data[name] = {
2360             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2361             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2362             "mfree": fn(int, nodeinfo.get('memory_free', None)),
2363             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2364             "dfree": fn(int, nodeinfo.get('vg_free', None)),
2365             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2366             "bootid": nodeinfo.get('bootid', None),
2367             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2368             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2369             }
2370         else:
2371           live_data[name] = {}
2372     else:
2373       live_data = dict.fromkeys(nodenames, {})
2374
2375     node_to_primary = dict([(name, set()) for name in nodenames])
2376     node_to_secondary = dict([(name, set()) for name in nodenames])
2377
2378     inst_fields = frozenset(("pinst_cnt", "pinst_list",
2379                              "sinst_cnt", "sinst_list"))
2380     if inst_fields & frozenset(self.op.output_fields):
2381       instancelist = self.cfg.GetInstanceList()
2382
2383       for instance_name in instancelist:
2384         inst = self.cfg.GetInstanceInfo(instance_name)
2385         if inst.primary_node in node_to_primary:
2386           node_to_primary[inst.primary_node].add(inst.name)
2387         for secnode in inst.secondary_nodes:
2388           if secnode in node_to_secondary:
2389             node_to_secondary[secnode].add(inst.name)
2390
2391     master_node = self.cfg.GetMasterNode()
2392
2393     # end data gathering
2394
2395     output = []
2396     for node in nodelist:
2397       node_output = []
2398       for field in self.op.output_fields:
2399         if field == "name":
2400           val = node.name
2401         elif field == "pinst_list":
2402           val = list(node_to_primary[node.name])
2403         elif field == "sinst_list":
2404           val = list(node_to_secondary[node.name])
2405         elif field == "pinst_cnt":
2406           val = len(node_to_primary[node.name])
2407         elif field == "sinst_cnt":
2408           val = len(node_to_secondary[node.name])
2409         elif field == "pip":
2410           val = node.primary_ip
2411         elif field == "sip":
2412           val = node.secondary_ip
2413         elif field == "tags":
2414           val = list(node.GetTags())
2415         elif field == "serial_no":
2416           val = node.serial_no
2417         elif field == "ctime":
2418           val = node.ctime
2419         elif field == "mtime":
2420           val = node.mtime
2421         elif field == "master_candidate":
2422           val = node.master_candidate
2423         elif field == "master":
2424           val = node.name == master_node
2425         elif field == "offline":
2426           val = node.offline
2427         elif field == "drained":
2428           val = node.drained
2429         elif self._FIELDS_DYNAMIC.Matches(field):
2430           val = live_data[node.name].get(field, None)
2431         elif field == "role":
2432           if node.name == master_node:
2433             val = "M"
2434           elif node.master_candidate:
2435             val = "C"
2436           elif node.drained:
2437             val = "D"
2438           elif node.offline:
2439             val = "O"
2440           else:
2441             val = "R"
2442         else:
2443           raise errors.ParameterError(field)
2444         node_output.append(val)
2445       output.append(node_output)
2446
2447     return output
2448
2449
2450 class LUQueryNodeVolumes(NoHooksLU):
2451   """Logical unit for getting volumes on node(s).
2452
2453   """
2454   _OP_REQP = ["nodes", "output_fields"]
2455   REQ_BGL = False
2456   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2457   _FIELDS_STATIC = utils.FieldSet("node")
2458
2459   def ExpandNames(self):
2460     _CheckOutputFields(static=self._FIELDS_STATIC,
2461                        dynamic=self._FIELDS_DYNAMIC,
2462                        selected=self.op.output_fields)
2463
2464     self.needed_locks = {}
2465     self.share_locks[locking.LEVEL_NODE] = 1
2466     if not self.op.nodes:
2467       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2468     else:
2469       self.needed_locks[locking.LEVEL_NODE] = \
2470         _GetWantedNodes(self, self.op.nodes)
2471
2472   def CheckPrereq(self):
2473     """Check prerequisites.
2474
2475     This checks that the fields required are valid output fields.
2476
2477     """
2478     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2479
2480   def Exec(self, feedback_fn):
2481     """Computes the list of nodes and their attributes.
2482
2483     """
2484     nodenames = self.nodes
2485     volumes = self.rpc.call_node_volumes(nodenames)
2486
2487     ilist = [self.cfg.GetInstanceInfo(iname) for iname
2488              in self.cfg.GetInstanceList()]
2489
2490     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2491
2492     output = []
2493     for node in nodenames:
2494       nresult = volumes[node]
2495       if nresult.offline:
2496         continue
2497       msg = nresult.fail_msg
2498       if msg:
2499         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2500         continue
2501
2502       node_vols = nresult.payload[:]
2503       node_vols.sort(key=lambda vol: vol['dev'])
2504
2505       for vol in node_vols:
2506         node_output = []
2507         for field in self.op.output_fields:
2508           if field == "node":
2509             val = node
2510           elif field == "phys":
2511             val = vol['dev']
2512           elif field == "vg":
2513             val = vol['vg']
2514           elif field == "name":
2515             val = vol['name']
2516           elif field == "size":
2517             val = int(float(vol['size']))
2518           elif field == "instance":
2519             for inst in ilist:
2520               if node not in lv_by_node[inst]:
2521                 continue
2522               if vol['name'] in lv_by_node[inst][node]:
2523                 val = inst.name
2524                 break
2525             else:
2526               val = '-'
2527           else:
2528             raise errors.ParameterError(field)
2529           node_output.append(str(val))
2530
2531         output.append(node_output)
2532
2533     return output
2534
2535
2536 class LUQueryNodeStorage(NoHooksLU):
2537   """Logical unit for getting information on storage units on node(s).
2538
2539   """
2540   _OP_REQP = ["nodes", "storage_type", "output_fields"]
2541   REQ_BGL = False
2542   _FIELDS_STATIC = utils.FieldSet("node")
2543
2544   def ExpandNames(self):
2545     storage_type = self.op.storage_type
2546
2547     if storage_type not in constants.VALID_STORAGE_FIELDS:
2548       raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2549
2550     dynamic_fields = constants.VALID_STORAGE_FIELDS[storage_type]
2551
2552     _CheckOutputFields(static=self._FIELDS_STATIC,
2553                        dynamic=utils.FieldSet(*dynamic_fields),
2554                        selected=self.op.output_fields)
2555
2556     self.needed_locks = {}
2557     self.share_locks[locking.LEVEL_NODE] = 1
2558
2559     if self.op.nodes:
2560       self.needed_locks[locking.LEVEL_NODE] = \
2561         _GetWantedNodes(self, self.op.nodes)
2562     else:
2563       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2564
2565   def CheckPrereq(self):
2566     """Check prerequisites.
2567
2568     This checks that the fields required are valid output fields.
2569
2570     """
2571     self.op.name = getattr(self.op, "name", None)
2572
2573     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2574
2575   def Exec(self, feedback_fn):
2576     """Computes the list of nodes and their attributes.
2577
2578     """
2579     # Always get name to sort by
2580     if constants.SF_NAME in self.op.output_fields:
2581       fields = self.op.output_fields[:]
2582     else:
2583       fields = [constants.SF_NAME] + self.op.output_fields
2584
2585     # Never ask for node as it's only known to the LU
2586     while "node" in fields:
2587       fields.remove("node")
2588
2589     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2590     name_idx = field_idx[constants.SF_NAME]
2591
2592     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2593     data = self.rpc.call_storage_list(self.nodes,
2594                                       self.op.storage_type, st_args,
2595                                       self.op.name, fields)
2596
2597     result = []
2598
2599     for node in utils.NiceSort(self.nodes):
2600       nresult = data[node]
2601       if nresult.offline:
2602         continue
2603
2604       msg = nresult.fail_msg
2605       if msg:
2606         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2607         continue
2608
2609       rows = dict([(row[name_idx], row) for row in nresult.payload])
2610
2611       for name in utils.NiceSort(rows.keys()):
2612         row = rows[name]
2613
2614         out = []
2615
2616         for field in self.op.output_fields:
2617           if field == "node":
2618             val = node
2619           elif field in field_idx:
2620             val = row[field_idx[field]]
2621           else:
2622             raise errors.ParameterError(field)
2623
2624           out.append(val)
2625
2626         result.append(out)
2627
2628     return result
2629
2630
2631 class LUModifyNodeStorage(NoHooksLU):
2632   """Logical unit for modifying a storage volume on a node.
2633
2634   """
2635   _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2636   REQ_BGL = False
2637
2638   def CheckArguments(self):
2639     node_name = self.cfg.ExpandNodeName(self.op.node_name)
2640     if node_name is None:
2641       raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2642
2643     self.op.node_name = node_name
2644
2645     storage_type = self.op.storage_type
2646     if storage_type not in constants.VALID_STORAGE_FIELDS:
2647       raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2648
2649   def ExpandNames(self):
2650     self.needed_locks = {
2651       locking.LEVEL_NODE: self.op.node_name,
2652       }
2653
2654   def CheckPrereq(self):
2655     """Check prerequisites.
2656
2657     """
2658     storage_type = self.op.storage_type
2659
2660     try:
2661       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2662     except KeyError:
2663       raise errors.OpPrereqError("Storage units of type '%s' can not be"
2664                                  " modified" % storage_type)
2665
2666     diff = set(self.op.changes.keys()) - modifiable
2667     if diff:
2668       raise errors.OpPrereqError("The following fields can not be modified for"
2669                                  " storage units of type '%s': %r" %
2670                                  (storage_type, list(diff)))
2671
2672   def Exec(self, feedback_fn):
2673     """Computes the list of nodes and their attributes.
2674
2675     """
2676     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2677     result = self.rpc.call_storage_modify(self.op.node_name,
2678                                           self.op.storage_type, st_args,
2679                                           self.op.name, self.op.changes)
2680     result.Raise("Failed to modify storage unit '%s' on %s" %
2681                  (self.op.name, self.op.node_name))
2682
2683
2684 class LUAddNode(LogicalUnit):
2685   """Logical unit for adding node to the cluster.
2686
2687   """
2688   HPATH = "node-add"
2689   HTYPE = constants.HTYPE_NODE
2690   _OP_REQP = ["node_name"]
2691
2692   def BuildHooksEnv(self):
2693     """Build hooks env.
2694
2695     This will run on all nodes before, and on all nodes + the new node after.
2696
2697     """
2698     env = {
2699       "OP_TARGET": self.op.node_name,
2700       "NODE_NAME": self.op.node_name,
2701       "NODE_PIP": self.op.primary_ip,
2702       "NODE_SIP": self.op.secondary_ip,
2703       }
2704     nodes_0 = self.cfg.GetNodeList()
2705     nodes_1 = nodes_0 + [self.op.node_name, ]
2706     return env, nodes_0, nodes_1
2707
2708   def CheckPrereq(self):
2709     """Check prerequisites.
2710
2711     This checks:
2712      - the new node is not already in the config
2713      - it is resolvable
2714      - its parameters (single/dual homed) matches the cluster
2715
2716     Any errors are signaled by raising errors.OpPrereqError.
2717
2718     """
2719     node_name = self.op.node_name
2720     cfg = self.cfg
2721
2722     dns_data = utils.HostInfo(node_name)
2723
2724     node = dns_data.name
2725     primary_ip = self.op.primary_ip = dns_data.ip
2726     secondary_ip = getattr(self.op, "secondary_ip", None)
2727     if secondary_ip is None:
2728       secondary_ip = primary_ip
2729     if not utils.IsValidIP(secondary_ip):
2730       raise errors.OpPrereqError("Invalid secondary IP given")
2731     self.op.secondary_ip = secondary_ip
2732
2733     node_list = cfg.GetNodeList()
2734     if not self.op.readd and node in node_list:
2735       raise errors.OpPrereqError("Node %s is already in the configuration" %
2736                                  node)
2737     elif self.op.readd and node not in node_list:
2738       raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2739
2740     for existing_node_name in node_list:
2741       existing_node = cfg.GetNodeInfo(existing_node_name)
2742
2743       if self.op.readd and node == existing_node_name:
2744         if (existing_node.primary_ip != primary_ip or
2745             existing_node.secondary_ip != secondary_ip):
2746           raise errors.OpPrereqError("Readded node doesn't have the same IP"
2747                                      " address configuration as before")
2748         continue
2749
2750       if (existing_node.primary_ip == primary_ip or
2751           existing_node.secondary_ip == primary_ip or
2752           existing_node.primary_ip == secondary_ip or
2753           existing_node.secondary_ip == secondary_ip):
2754         raise errors.OpPrereqError("New node ip address(es) conflict with"
2755                                    " existing node %s" % existing_node.name)
2756
2757     # check that the type of the node (single versus dual homed) is the
2758     # same as for the master
2759     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2760     master_singlehomed = myself.secondary_ip == myself.primary_ip
2761     newbie_singlehomed = secondary_ip == primary_ip
2762     if master_singlehomed != newbie_singlehomed:
2763       if master_singlehomed:
2764         raise errors.OpPrereqError("The master has no private ip but the"
2765                                    " new node has one")
2766       else:
2767         raise errors.OpPrereqError("The master has a private ip but the"
2768                                    " new node doesn't have one")
2769
2770     # checks reachability
2771     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2772       raise errors.OpPrereqError("Node not reachable by ping")
2773
2774     if not newbie_singlehomed:
2775       # check reachability from my secondary ip to newbie's secondary ip
2776       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2777                            source=myself.secondary_ip):
2778         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2779                                    " based ping to noded port")
2780
2781     cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2782     if self.op.readd:
2783       exceptions = [node]
2784     else:
2785       exceptions = []
2786     mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
2787     # the new node will increase mc_max with one, so:
2788     mc_max = min(mc_max + 1, cp_size)
2789     self.master_candidate = mc_now < mc_max
2790
2791     if self.op.readd:
2792       self.new_node = self.cfg.GetNodeInfo(node)
2793       assert self.new_node is not None, "Can't retrieve locked node %s" % node
2794     else:
2795       self.new_node = objects.Node(name=node,
2796                                    primary_ip=primary_ip,
2797                                    secondary_ip=secondary_ip,
2798                                    master_candidate=self.master_candidate,
2799                                    offline=False, drained=False)
2800
2801   def Exec(self, feedback_fn):
2802     """Adds the new node to the cluster.
2803
2804     """
2805     new_node = self.new_node
2806     node = new_node.name
2807
2808     # for re-adds, reset the offline/drained/master-candidate flags;
2809     # we need to reset here, otherwise offline would prevent RPC calls
2810     # later in the procedure; this also means that if the re-add
2811     # fails, we are left with a non-offlined, broken node
2812     if self.op.readd:
2813       new_node.drained = new_node.offline = False
2814       self.LogInfo("Readding a node, the offline/drained flags were reset")
2815       # if we demote the node, we do cleanup later in the procedure
2816       new_node.master_candidate = self.master_candidate
2817
2818     # notify the user about any possible mc promotion
2819     if new_node.master_candidate:
2820       self.LogInfo("Node will be a master candidate")
2821
2822     # check connectivity
2823     result = self.rpc.call_version([node])[node]
2824     result.Raise("Can't get version information from node %s" % node)
2825     if constants.PROTOCOL_VERSION == result.payload:
2826       logging.info("Communication to node %s fine, sw version %s match",
2827                    node, result.payload)
2828     else:
2829       raise errors.OpExecError("Version mismatch master version %s,"
2830                                " node version %s" %
2831                                (constants.PROTOCOL_VERSION, result.payload))
2832
2833     # setup ssh on node
2834     logging.info("Copy ssh key to node %s", node)
2835     priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2836     keyarray = []
2837     keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2838                 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2839                 priv_key, pub_key]
2840
2841     for i in keyfiles:
2842       keyarray.append(utils.ReadFile(i))
2843
2844     result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2845                                     keyarray[2],
2846                                     keyarray[3], keyarray[4], keyarray[5])
2847     result.Raise("Cannot transfer ssh keys to the new node")
2848
2849     # Add node to our /etc/hosts, and add key to known_hosts
2850     if self.cfg.GetClusterInfo().modify_etc_hosts:
2851       utils.AddHostToEtcHosts(new_node.name)
2852
2853     if new_node.secondary_ip != new_node.primary_ip:
2854       result = self.rpc.call_node_has_ip_address(new_node.name,
2855                                                  new_node.secondary_ip)
2856       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2857                    prereq=True)
2858       if not result.payload:
2859         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2860                                  " you gave (%s). Please fix and re-run this"
2861                                  " command." % new_node.secondary_ip)
2862
2863     node_verify_list = [self.cfg.GetMasterNode()]
2864     node_verify_param = {
2865       constants.NV_NODELIST: [node],
2866       # TODO: do a node-net-test as well?
2867     }
2868
2869     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2870                                        self.cfg.GetClusterName())
2871     for verifier in node_verify_list:
2872       result[verifier].Raise("Cannot communicate with node %s" % verifier)
2873       nl_payload = result[verifier].payload[constants.NV_NODELIST]
2874       if nl_payload:
2875         for failed in nl_payload:
2876           feedback_fn("ssh/hostname verification failed %s -> %s" %
2877                       (verifier, nl_payload[failed]))
2878         raise errors.OpExecError("ssh/hostname verification failed.")
2879
2880     if self.op.readd:
2881       _RedistributeAncillaryFiles(self)
2882       self.context.ReaddNode(new_node)
2883       # make sure we redistribute the config
2884       self.cfg.Update(new_node)
2885       # and make sure the new node will not have old files around
2886       if not new_node.master_candidate:
2887         result = self.rpc.call_node_demote_from_mc(new_node.name)
2888         msg = result.fail_msg
2889         if msg:
2890           self.LogWarning("Node failed to demote itself from master"
2891                           " candidate status: %s" % msg)
2892     else:
2893       _RedistributeAncillaryFiles(self, additional_nodes=[node])
2894       self.context.AddNode(new_node)
2895
2896
2897 class LUSetNodeParams(LogicalUnit):
2898   """Modifies the parameters of a node.
2899
2900   """
2901   HPATH = "node-modify"
2902   HTYPE = constants.HTYPE_NODE
2903   _OP_REQP = ["node_name"]
2904   REQ_BGL = False
2905
2906   def CheckArguments(self):
2907     node_name = self.cfg.ExpandNodeName(self.op.node_name)
2908     if node_name is None:
2909       raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2910     self.op.node_name = node_name
2911     _CheckBooleanOpField(self.op, 'master_candidate')
2912     _CheckBooleanOpField(self.op, 'offline')
2913     _CheckBooleanOpField(self.op, 'drained')
2914     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2915     if all_mods.count(None) == 3:
2916       raise errors.OpPrereqError("Please pass at least one modification")
2917     if all_mods.count(True) > 1:
2918       raise errors.OpPrereqError("Can't set the node into more than one"
2919                                  " state at the same time")
2920
2921   def ExpandNames(self):
2922     self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2923
2924   def BuildHooksEnv(self):
2925     """Build hooks env.
2926
2927     This runs on the master node.
2928
2929     """
2930     env = {
2931       "OP_TARGET": self.op.node_name,
2932       "MASTER_CANDIDATE": str(self.op.master_candidate),
2933       "OFFLINE": str(self.op.offline),
2934       "DRAINED": str(self.op.drained),
2935       }
2936     nl = [self.cfg.GetMasterNode(),
2937           self.op.node_name]
2938     return env, nl, nl
2939
2940   def CheckPrereq(self):
2941     """Check prerequisites.
2942
2943     This only checks the instance list against the existing names.
2944
2945     """
2946     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2947
2948     if (self.op.master_candidate is not None or
2949         self.op.drained is not None or
2950         self.op.offline is not None):
2951       # we can't change the master's node flags
2952       if self.op.node_name == self.cfg.GetMasterNode():
2953         raise errors.OpPrereqError("The master role can be changed"
2954                                    " only via masterfailover")
2955
2956     if ((self.op.master_candidate == False or self.op.offline == True or
2957          self.op.drained == True) and node.master_candidate):
2958       cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2959       num_candidates, _ = self.cfg.GetMasterCandidateStats()
2960       if num_candidates <= cp_size:
2961         msg = ("Not enough master candidates (desired"
2962                " %d, new value will be %d)" % (cp_size, num_candidates-1))
2963         if self.op.force:
2964           self.LogWarning(msg)
2965         else:
2966           raise errors.OpPrereqError(msg)
2967
2968     if (self.op.master_candidate == True and
2969         ((node.offline and not self.op.offline == False) or
2970          (node.drained and not self.op.drained == False))):
2971       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2972                                  " to master_candidate" % node.name)
2973
2974     return
2975
2976   def Exec(self, feedback_fn):
2977     """Modifies a node.
2978
2979     """
2980     node = self.node
2981
2982     result = []
2983     changed_mc = False
2984
2985     if self.op.offline is not None:
2986       node.offline = self.op.offline
2987       result.append(("offline", str(self.op.offline)))
2988       if self.op.offline == True:
2989         if node.master_candidate:
2990           node.master_candidate = False
2991           changed_mc = True
2992           result.append(("master_candidate", "auto-demotion due to offline"))
2993         if node.drained:
2994           node.drained = False
2995           result.append(("drained", "clear drained status due to offline"))
2996
2997     if self.op.master_candidate is not None:
2998       node.master_candidate = self.op.master_candidate
2999       changed_mc = True
3000       result.append(("master_candidate", str(self.op.master_candidate)))
3001       if self.op.master_candidate == False:
3002         rrc = self.rpc.call_node_demote_from_mc(node.name)
3003         msg = rrc.fail_msg
3004         if msg:
3005           self.LogWarning("Node failed to demote itself: %s" % msg)
3006
3007     if self.op.drained is not None:
3008       node.drained = self.op.drained
3009       result.append(("drained", str(self.op.drained)))
3010       if self.op.drained == True:
3011         if node.master_candidate:
3012           node.master_candidate = False
3013           changed_mc = True
3014           result.append(("master_candidate", "auto-demotion due to drain"))
3015           rrc = self.rpc.call_node_demote_from_mc(node.name)
3016           msg = rrc.fail_msg
3017           if msg:
3018             self.LogWarning("Node failed to demote itself: %s" % msg)
3019         if node.offline:
3020           node.offline = False
3021           result.append(("offline", "clear offline status due to drain"))
3022
3023     # this will trigger configuration file update, if needed
3024     self.cfg.Update(node)
3025     # this will trigger job queue propagation or cleanup
3026     if changed_mc:
3027       self.context.ReaddNode(node)
3028
3029     return result
3030
3031
3032 class LUPowercycleNode(NoHooksLU):
3033   """Powercycles a node.
3034
3035   """
3036   _OP_REQP = ["node_name", "force"]
3037   REQ_BGL = False
3038
3039   def CheckArguments(self):
3040     node_name = self.cfg.ExpandNodeName(self.op.node_name)
3041     if node_name is None:
3042       raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
3043     self.op.node_name = node_name
3044     if node_name == self.cfg.GetMasterNode() and not self.op.force:
3045       raise errors.OpPrereqError("The node is the master and the force"
3046                                  " parameter was not set")
3047
3048   def ExpandNames(self):
3049     """Locking for PowercycleNode.
3050
3051     This is a last-resort option and shouldn't block on other
3052     jobs. Therefore, we grab no locks.
3053
3054     """
3055     self.needed_locks = {}
3056
3057   def CheckPrereq(self):
3058     """Check prerequisites.
3059
3060     This LU has no prereqs.
3061
3062     """
3063     pass
3064
3065   def Exec(self, feedback_fn):
3066     """Reboots a node.
3067
3068     """
3069     result = self.rpc.call_node_powercycle(self.op.node_name,
3070                                            self.cfg.GetHypervisorType())
3071     result.Raise("Failed to schedule the reboot")
3072     return result.payload
3073
3074
3075 class LUQueryClusterInfo(NoHooksLU):
3076   """Query cluster configuration.
3077
3078   """
3079   _OP_REQP = []
3080   REQ_BGL = False
3081
3082   def ExpandNames(self):
3083     self.needed_locks = {}
3084
3085   def CheckPrereq(self):
3086     """No prerequsites needed for this LU.
3087
3088     """
3089     pass
3090
3091   def Exec(self, feedback_fn):
3092     """Return cluster config.
3093
3094     """
3095     cluster = self.cfg.GetClusterInfo()
3096     result = {
3097       "software_version": constants.RELEASE_VERSION,
3098       "protocol_version": constants.PROTOCOL_VERSION,
3099       "config_version": constants.CONFIG_VERSION,
3100       "os_api_version": max(constants.OS_API_VERSIONS),
3101       "export_version": constants.EXPORT_VERSION,
3102       "architecture": (platform.architecture()[0], platform.machine()),
3103       "name": cluster.cluster_name,
3104       "master": cluster.master_node,
3105       "default_hypervisor": cluster.enabled_hypervisors[0],
3106       "enabled_hypervisors": cluster.enabled_hypervisors,
3107       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3108                         for hypervisor_name in cluster.enabled_hypervisors]),
3109       "beparams": cluster.beparams,
3110       "nicparams": cluster.nicparams,
3111       "candidate_pool_size": cluster.candidate_pool_size,
3112       "master_netdev": cluster.master_netdev,
3113       "volume_group_name": cluster.volume_group_name,
3114       "file_storage_dir": cluster.file_storage_dir,
3115       "ctime": cluster.ctime,
3116       "mtime": cluster.mtime,
3117       "tags": list(cluster.GetTags()),
3118       }
3119
3120     return result
3121
3122
3123 class LUQueryConfigValues(NoHooksLU):
3124   """Return configuration values.
3125
3126   """
3127   _OP_REQP = []
3128   REQ_BGL = False
3129   _FIELDS_DYNAMIC = utils.FieldSet()
3130   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3131                                   "watcher_pause")
3132
3133   def ExpandNames(self):
3134     self.needed_locks = {}
3135
3136     _CheckOutputFields(static=self._FIELDS_STATIC,
3137                        dynamic=self._FIELDS_DYNAMIC,
3138                        selected=self.op.output_fields)
3139
3140   def CheckPrereq(self):
3141     """No prerequisites.
3142
3143     """
3144     pass
3145
3146   def Exec(self, feedback_fn):
3147     """Dump a representation of the cluster config to the standard output.
3148
3149     """
3150     values = []
3151     for field in self.op.output_fields:
3152       if field == "cluster_name":
3153         entry = self.cfg.GetClusterName()
3154       elif field == "master_node":
3155         entry = self.cfg.GetMasterNode()
3156       elif field == "drain_flag":
3157         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3158       elif field == "watcher_pause":
3159         return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3160       else:
3161         raise errors.ParameterError(field)
3162       values.append(entry)
3163     return values
3164
3165
3166 class LUActivateInstanceDisks(NoHooksLU):
3167   """Bring up an instance's disks.
3168
3169   """
3170   _OP_REQP = ["instance_name"]
3171   REQ_BGL = False
3172
3173   def ExpandNames(self):
3174     self._ExpandAndLockInstance()
3175     self.needed_locks[locking.LEVEL_NODE] = []
3176     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3177
3178   def DeclareLocks(self, level):
3179     if level == locking.LEVEL_NODE:
3180       self._LockInstancesNodes()
3181
3182   def CheckPrereq(self):
3183     """Check prerequisites.
3184
3185     This checks that the instance is in the cluster.
3186
3187     """
3188     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3189     assert self.instance is not None, \
3190       "Cannot retrieve locked instance %s" % self.op.instance_name
3191     _CheckNodeOnline(self, self.instance.primary_node)
3192     if not hasattr(self.op, "ignore_size"):
3193       self.op.ignore_size = False
3194
3195   def Exec(self, feedback_fn):
3196     """Activate the disks.
3197
3198     """
3199     disks_ok, disks_info = \
3200               _AssembleInstanceDisks(self, self.instance,
3201                                      ignore_size=self.op.ignore_size)
3202     if not disks_ok:
3203       raise errors.OpExecError("Cannot activate block devices")
3204
3205     return disks_info
3206
3207
3208 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3209                            ignore_size=False):
3210   """Prepare the block devices for an instance.
3211
3212   This sets up the block devices on all nodes.
3213
3214   @type lu: L{LogicalUnit}
3215   @param lu: the logical unit on whose behalf we execute
3216   @type instance: L{objects.Instance}
3217   @param instance: the instance for whose disks we assemble
3218   @type ignore_secondaries: boolean
3219   @param ignore_secondaries: if true, errors on secondary nodes
3220       won't result in an error return from the function
3221   @type ignore_size: boolean
3222   @param ignore_size: if true, the current known size of the disk
3223       will not be used during the disk activation, useful for cases
3224       when the size is wrong
3225   @return: False if the operation failed, otherwise a list of
3226       (host, instance_visible_name, node_visible_name)
3227       with the mapping from node devices to instance devices
3228
3229   """
3230   device_info = []
3231   disks_ok = True
3232   iname = instance.name
3233   # With the two passes mechanism we try to reduce the window of
3234   # opportunity for the race condition of switching DRBD to primary
3235   # before handshaking occured, but we do not eliminate it
3236
3237   # The proper fix would be to wait (with some limits) until the
3238   # connection has been made and drbd transitions from WFConnection
3239   # into any other network-connected state (Connected, SyncTarget,
3240   # SyncSource, etc.)
3241
3242   # 1st pass, assemble on all nodes in secondary mode
3243   for inst_disk in instance.disks:
3244     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3245       if ignore_size:
3246         node_disk = node_disk.Copy()
3247         node_disk.UnsetSize()
3248       lu.cfg.SetDiskID(node_disk, node)
3249       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3250       msg = result.fail_msg
3251       if msg:
3252         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3253                            " (is_primary=False, pass=1): %s",
3254                            inst_disk.iv_name, node, msg)
3255         if not ignore_secondaries:
3256           disks_ok = False
3257
3258   # FIXME: race condition on drbd migration to primary
3259
3260   # 2nd pass, do only the primary node
3261   for inst_disk in instance.disks:
3262     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3263       if node != instance.primary_node:
3264         continue
3265       if ignore_size:
3266         node_disk = node_disk.Copy()
3267         node_disk.UnsetSize()
3268       lu.cfg.SetDiskID(node_disk, node)
3269       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3270       msg = result.fail_msg
3271       if msg:
3272         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3273                            " (is_primary=True, pass=2): %s",
3274                            inst_disk.iv_name, node, msg)
3275         disks_ok = False
3276     device_info.append((instance.primary_node, inst_disk.iv_name,
3277                         result.payload))
3278
3279   # leave the disks configured for the primary node
3280   # this is a workaround that would be fixed better by
3281   # improving the logical/physical id handling
3282   for disk in instance.disks:
3283     lu.cfg.SetDiskID(disk, instance.primary_node)
3284
3285   return disks_ok, device_info
3286
3287
3288 def _StartInstanceDisks(lu, instance, force):
3289   """Start the disks of an instance.
3290
3291   """
3292   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3293                                            ignore_secondaries=force)
3294   if not disks_ok:
3295     _ShutdownInstanceDisks(lu, instance)
3296     if force is not None and not force:
3297       lu.proc.LogWarning("", hint="If the message above refers to a"
3298                          " secondary node,"
3299                          " you can retry the operation using '--force'.")
3300     raise errors.OpExecError("Disk consistency error")
3301
3302
3303 class LUDeactivateInstanceDisks(NoHooksLU):
3304   """Shutdown an instance's disks.
3305
3306   """
3307   _OP_REQP = ["instance_name"]
3308   REQ_BGL = False
3309
3310   def ExpandNames(self):
3311     self._ExpandAndLockInstance()
3312     self.needed_locks[locking.LEVEL_NODE] = []
3313     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3314
3315   def DeclareLocks(self, level):
3316     if level == locking.LEVEL_NODE:
3317       self._LockInstancesNodes()
3318
3319   def CheckPrereq(self):
3320     """Check prerequisites.
3321
3322     This checks that the instance is in the cluster.
3323
3324     """
3325     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3326     assert self.instance is not None, \
3327       "Cannot retrieve locked instance %s" % self.op.instance_name
3328
3329   def Exec(self, feedback_fn):
3330     """Deactivate the disks
3331
3332     """
3333     instance = self.instance
3334     _SafeShutdownInstanceDisks(self, instance)
3335
3336
3337 def _SafeShutdownInstanceDisks(lu, instance):
3338   """Shutdown block devices of an instance.
3339
3340   This function checks if an instance is running, before calling
3341   _ShutdownInstanceDisks.
3342
3343   """
3344   pnode = instance.primary_node
3345   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3346   ins_l.Raise("Can't contact node %s" % pnode)
3347
3348   if instance.name in ins_l.payload:
3349     raise errors.OpExecError("Instance is running, can't shutdown"
3350                              " block devices.")
3351
3352   _ShutdownInstanceDisks(lu, instance)
3353
3354
3355 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3356   """Shutdown block devices of an instance.
3357
3358   This does the shutdown on all nodes of the instance.
3359
3360   If the ignore_primary is false, errors on the primary node are
3361   ignored.
3362
3363   """
3364   all_result = True
3365   for disk in instance.disks:
3366     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3367       lu.cfg.SetDiskID(top_disk, node)
3368       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3369       msg = result.fail_msg
3370       if msg:
3371         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3372                       disk.iv_name, node, msg)
3373         if not ignore_primary or node != instance.primary_node:
3374           all_result = False
3375   return all_result
3376
3377
3378 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3379   """Checks if a node has enough free memory.
3380
3381   This function check if a given node has the needed amount of free
3382   memory. In case the node has less memory or we cannot get the
3383   information from the node, this function raise an OpPrereqError
3384   exception.
3385
3386   @type lu: C{LogicalUnit}
3387   @param lu: a logical unit from which we get configuration data
3388   @type node: C{str}
3389   @param node: the node to check
3390   @type reason: C{str}
3391   @param reason: string to use in the error message
3392   @type requested: C{int}
3393   @param requested: the amount of memory in MiB to check for
3394   @type hypervisor_name: C{str}
3395   @param hypervisor_name: the hypervisor to ask for memory stats
3396   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3397       we cannot check the node
3398
3399   """
3400   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3401   nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
3402   free_mem = nodeinfo[node].payload.get('memory_free', None)
3403   if not isinstance(free_mem, int):
3404     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3405                                " was '%s'" % (node, free_mem))
3406   if requested > free_mem:
3407     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3408                                " needed %s MiB, available %s MiB" %
3409                                (node, reason, requested, free_mem))
3410
3411
3412 class LUStartupInstance(LogicalUnit):
3413   """Starts an instance.
3414
3415   """
3416   HPATH = "instance-start"
3417   HTYPE = constants.HTYPE_INSTANCE
3418   _OP_REQP = ["instance_name", "force"]
3419   REQ_BGL = False
3420
3421   def ExpandNames(self):
3422     self._ExpandAndLockInstance()
3423
3424   def BuildHooksEnv(self):
3425     """Build hooks env.
3426
3427     This runs on master, primary and secondary nodes of the instance.
3428
3429     """
3430     env = {
3431       "FORCE": self.op.force,
3432       }
3433     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3434     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3435     return env, nl, nl
3436
3437   def CheckPrereq(self):
3438     """Check prerequisites.
3439
3440     This checks that the instance is in the cluster.
3441
3442     """
3443     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3444     assert self.instance is not None, \
3445       "Cannot retrieve locked instance %s" % self.op.instance_name
3446
3447     # extra beparams
3448     self.beparams = getattr(self.op, "beparams", {})
3449     if self.beparams:
3450       if not isinstance(self.beparams, dict):
3451         raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3452                                    " dict" % (type(self.beparams), ))
3453       # fill the beparams dict
3454       utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3455       self.op.beparams = self.beparams
3456
3457     # extra hvparams
3458     self.hvparams = getattr(self.op, "hvparams", {})
3459     if self.hvparams:
3460       if not isinstance(self.hvparams, dict):
3461         raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3462                                    " dict" % (type(self.hvparams), ))
3463
3464       # check hypervisor parameter syntax (locally)
3465       cluster = self.cfg.GetClusterInfo()
3466       utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3467       filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3468                                     instance.hvparams)
3469       filled_hvp.update(self.hvparams)
3470       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3471       hv_type.CheckParameterSyntax(filled_hvp)
3472       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3473       self.op.hvparams = self.hvparams
3474
3475     _CheckNodeOnline(self, instance.primary_node)
3476
3477     bep = self.cfg.GetClusterInfo().FillBE(instance)
3478     # check bridges existence
3479     _CheckInstanceBridgesExist(self, instance)
3480
3481     remote_info = self.rpc.call_instance_info(instance.primary_node,
3482                                               instance.name,
3483                                               instance.hypervisor)
3484     remote_info.Raise("Error checking node %s" % instance.primary_node,
3485                       prereq=True)
3486     if not remote_info.payload: # not running already
3487       _CheckNodeFreeMemory(self, instance.primary_node,
3488                            "starting instance %s" % instance.name,
3489                            bep[constants.BE_MEMORY], instance.hypervisor)
3490
3491   def Exec(self, feedback_fn):
3492     """Start the instance.
3493
3494     """
3495     instance = self.instance
3496     force = self.op.force
3497
3498     self.cfg.MarkInstanceUp(instance.name)
3499
3500     node_current = instance.primary_node
3501
3502     _StartInstanceDisks(self, instance, force)
3503
3504     result = self.rpc.call_instance_start(node_current, instance,
3505                                           self.hvparams, self.beparams)
3506     msg = result.fail_msg
3507     if msg:
3508       _ShutdownInstanceDisks(self, instance)
3509       raise errors.OpExecError("Could not start instance: %s" % msg)
3510
3511
3512 class LURebootInstance(LogicalUnit):
3513   """Reboot an instance.
3514
3515   """
3516   HPATH = "instance-reboot"
3517   HTYPE = constants.HTYPE_INSTANCE
3518   _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3519   REQ_BGL = False
3520
3521   def ExpandNames(self):
3522     if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3523                                    constants.INSTANCE_REBOOT_HARD,
3524                                    constants.INSTANCE_REBOOT_FULL]:
3525       raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3526                                   (constants.INSTANCE_REBOOT_SOFT,
3527                                    constants.INSTANCE_REBOOT_HARD,
3528                                    constants.INSTANCE_REBOOT_FULL))
3529     self._ExpandAndLockInstance()
3530
3531   def BuildHooksEnv(self):
3532     """Build hooks env.
3533
3534     This runs on master, primary and secondary nodes of the instance.
3535
3536     """
3537     env = {
3538       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3539       "REBOOT_TYPE": self.op.reboot_type,
3540       }
3541     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3542     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3543     return env, nl, nl
3544
3545   def CheckPrereq(self):
3546     """Check prerequisites.
3547
3548     This checks that the instance is in the cluster.
3549
3550     """
3551     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3552     assert self.instance is not None, \
3553       "Cannot retrieve locked instance %s" % self.op.instance_name
3554
3555     _CheckNodeOnline(self, instance.primary_node)
3556
3557     # check bridges existence
3558     _CheckInstanceBridgesExist(self, instance)
3559
3560   def Exec(self, feedback_fn):
3561     """Reboot the instance.
3562
3563     """
3564     instance = self.instance
3565     ignore_secondaries = self.op.ignore_secondaries
3566     reboot_type = self.op.reboot_type
3567
3568     node_current = instance.primary_node
3569
3570     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3571                        constants.INSTANCE_REBOOT_HARD]:
3572       for disk in instance.disks:
3573         self.cfg.SetDiskID(disk, node_current)
3574       result = self.rpc.call_instance_reboot(node_current, instance,
3575                                              reboot_type)
3576       result.Raise("Could not reboot instance")
3577     else:
3578       result = self.rpc.call_instance_shutdown(node_current, instance)
3579       result.Raise("Could not shutdown instance for full reboot")
3580       _ShutdownInstanceDisks(self, instance)
3581       _StartInstanceDisks(self, instance, ignore_secondaries)
3582       result = self.rpc.call_instance_start(node_current, instance, None, None)
3583       msg = result.fail_msg
3584       if msg:
3585         _ShutdownInstanceDisks(self, instance)
3586         raise errors.OpExecError("Could not start instance for"
3587                                  " full reboot: %s" % msg)
3588
3589     self.cfg.MarkInstanceUp(instance.name)
3590
3591
3592 class LUShutdownInstance(LogicalUnit):
3593   """Shutdown an instance.
3594
3595   """
3596   HPATH = "instance-stop"
3597   HTYPE = constants.HTYPE_INSTANCE
3598   _OP_REQP = ["instance_name"]
3599   REQ_BGL = False
3600
3601   def ExpandNames(self):
3602     self._ExpandAndLockInstance()
3603
3604   def BuildHooksEnv(self):
3605     """Build hooks env.
3606
3607     This runs on master, primary and secondary nodes of the instance.
3608
3609     """
3610     env = _BuildInstanceHookEnvByObject(self, self.instance)
3611     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3612     return env, nl, nl
3613
3614   def CheckPrereq(self):
3615     """Check prerequisites.
3616
3617     This checks that the instance is in the cluster.
3618
3619     """
3620     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3621     assert self.instance is not None, \
3622       "Cannot retrieve locked instance %s" % self.op.instance_name
3623     _CheckNodeOnline(self, self.instance.primary_node)
3624
3625   def Exec(self, feedback_fn):
3626     """Shutdown the instance.
3627
3628     """
3629     instance = self.instance
3630     node_current = instance.primary_node
3631     self.cfg.MarkInstanceDown(instance.name)
3632     result = self.rpc.call_instance_shutdown(node_current, instance)
3633     msg = result.fail_msg
3634     if msg:
3635       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3636
3637     _ShutdownInstanceDisks(self, instance)
3638
3639
3640 class LUReinstallInstance(LogicalUnit):
3641   """Reinstall an instance.
3642
3643   """
3644   HPATH = "instance-reinstall"
3645   HTYPE = constants.HTYPE_INSTANCE
3646   _OP_REQP = ["instance_name"]
3647   REQ_BGL = False
3648
3649   def ExpandNames(self):
3650     self._ExpandAndLockInstance()
3651
3652   def BuildHooksEnv(self):
3653     """Build hooks env.
3654
3655     This runs on master, primary and secondary nodes of the instance.
3656
3657     """
3658     env = _BuildInstanceHookEnvByObject(self, self.instance)
3659     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3660     return env, nl, nl
3661
3662   def CheckPrereq(self):
3663     """Check prerequisites.
3664
3665     This checks that the instance is in the cluster and is not running.
3666
3667     """
3668     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3669     assert instance is not None, \
3670       "Cannot retrieve locked instance %s" % self.op.instance_name
3671     _CheckNodeOnline(self, instance.primary_node)
3672
3673     if instance.disk_template == constants.DT_DISKLESS:
3674       raise errors.OpPrereqError("Instance '%s' has no disks" %
3675                                  self.op.instance_name)
3676     if instance.admin_up:
3677       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3678                                  self.op.instance_name)
3679     remote_info = self.rpc.call_instance_info(instance.primary_node,
3680                                               instance.name,
3681                                               instance.hypervisor)
3682     remote_info.Raise("Error checking node %s" % instance.primary_node,
3683                       prereq=True)
3684     if remote_info.payload:
3685       raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3686                                  (self.op.instance_name,
3687                                   instance.primary_node))
3688
3689     self.op.os_type = getattr(self.op, "os_type", None)
3690     if self.op.os_type is not None:
3691       # OS verification
3692       pnode = self.cfg.GetNodeInfo(
3693         self.cfg.ExpandNodeName(instance.primary_node))
3694       if pnode is None:
3695         raise errors.OpPrereqError("Primary node '%s' is unknown" %
3696                                    self.op.pnode)
3697       result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3698       result.Raise("OS '%s' not in supported OS list for primary node %s" %
3699                    (self.op.os_type, pnode.name), prereq=True)
3700
3701     self.instance = instance
3702
3703   def Exec(self, feedback_fn):
3704     """Reinstall the instance.
3705
3706     """
3707     inst = self.instance
3708
3709     if self.op.os_type is not None:
3710       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3711       inst.os = self.op.os_type
3712       self.cfg.Update(inst)
3713
3714     _StartInstanceDisks(self, inst, None)
3715     try:
3716       feedback_fn("Running the instance OS create scripts...")
3717       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3718       result.Raise("Could not install OS for instance %s on node %s" %
3719                    (inst.name, inst.primary_node))
3720     finally:
3721       _ShutdownInstanceDisks(self, inst)
3722
3723
3724 class LURecreateInstanceDisks(LogicalUnit):
3725   """Recreate an instance's missing disks.
3726
3727   """
3728   HPATH = "instance-recreate-disks"
3729   HTYPE = constants.HTYPE_INSTANCE
3730   _OP_REQP = ["instance_name", "disks"]
3731   REQ_BGL = False
3732
3733   def CheckArguments(self):
3734     """Check the arguments.
3735
3736     """
3737     if not isinstance(self.op.disks, list):
3738       raise errors.OpPrereqError("Invalid disks parameter")
3739     for item in self.op.disks:
3740       if (not isinstance(item, int) or
3741           item < 0):
3742         raise errors.OpPrereqError("Invalid disk specification '%s'" %
3743                                    str(item))
3744
3745   def ExpandNames(self):
3746     self._ExpandAndLockInstance()
3747
3748   def BuildHooksEnv(self):
3749     """Build hooks env.
3750
3751     This runs on master, primary and secondary nodes of the instance.
3752
3753     """
3754     env = _BuildInstanceHookEnvByObject(self, self.instance)
3755     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3756     return env, nl, nl
3757
3758   def CheckPrereq(self):
3759     """Check prerequisites.
3760
3761     This checks that the instance is in the cluster and is not running.
3762
3763     """
3764     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3765     assert instance is not None, \
3766       "Cannot retrieve locked instance %s" % self.op.instance_name
3767     _CheckNodeOnline(self, instance.primary_node)
3768
3769     if instance.disk_template == constants.DT_DISKLESS:
3770       raise errors.OpPrereqError("Instance '%s' has no disks" %
3771                                  self.op.instance_name)
3772     if instance.admin_up:
3773       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3774                                  self.op.instance_name)
3775     remote_info = self.rpc.call_instance_info(instance.primary_node,
3776                                               instance.name,
3777                                               instance.hypervisor)
3778     remote_info.Raise("Error checking node %s" % instance.primary_node,
3779                       prereq=True)
3780     if remote_info.payload:
3781       raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3782                                  (self.op.instance_name,
3783                                   instance.primary_node))
3784
3785     if not self.op.disks:
3786       self.op.disks = range(len(instance.disks))
3787     else:
3788       for idx in self.op.disks:
3789         if idx >= len(instance.disks):
3790           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx)
3791
3792     self.instance = instance
3793
3794   def Exec(self, feedback_fn):
3795     """Recreate the disks.
3796
3797     """
3798     to_skip = []
3799     for idx, disk in enumerate(self.instance.disks):
3800       if idx not in self.op.disks: # disk idx has not been passed in
3801         to_skip.append(idx)
3802         continue
3803
3804     _CreateDisks(self, self.instance, to_skip=to_skip)
3805
3806
3807 class LURenameInstance(LogicalUnit):
3808   """Rename an instance.
3809
3810   """
3811   HPATH = "instance-rename"
3812   HTYPE = constants.HTYPE_INSTANCE
3813   _OP_REQP = ["instance_name", "new_name"]
3814
3815   def BuildHooksEnv(self):
3816     """Build hooks env.
3817
3818     This runs on master, primary and secondary nodes of the instance.
3819
3820     """
3821     env = _BuildInstanceHookEnvByObject(self, self.instance)
3822     env["INSTANCE_NEW_NAME"] = self.op.new_name
3823     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3824     return env, nl, nl
3825
3826   def CheckPrereq(self):
3827     """Check prerequisites.
3828
3829     This checks that the instance is in the cluster and is not running.
3830
3831     """
3832     instance = self.cfg.GetInstanceInfo(
3833       self.cfg.ExpandInstanceName(self.op.instance_name))
3834     if instance is None:
3835       raise errors.OpPrereqError("Instance '%s' not known" %
3836                                  self.op.instance_name)
3837     _CheckNodeOnline(self, instance.primary_node)
3838
3839     if instance.admin_up:
3840       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3841                                  self.op.instance_name)
3842     remote_info = self.rpc.call_instance_info(instance.primary_node,
3843                                               instance.name,
3844                                               instance.hypervisor)
3845     remote_info.Raise("Error checking node %s" % instance.primary_node,
3846                       prereq=True)
3847     if remote_info.payload:
3848       raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3849                                  (self.op.instance_name,
3850                                   instance.primary_node))
3851     self.instance = instance
3852
3853     # new name verification
3854     name_info = utils.HostInfo(self.op.new_name)
3855
3856     self.op.new_name = new_name = name_info.name
3857     instance_list = self.cfg.GetInstanceList()
3858     if new_name in instance_list:
3859       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3860                                  new_name)
3861
3862     if not getattr(self.op, "ignore_ip", False):
3863       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3864         raise errors.OpPrereqError("IP %s of instance %s already in use" %
3865                                    (name_info.ip, new_name))
3866
3867
3868   def Exec(self, feedback_fn):
3869     """Reinstall the instance.
3870
3871     """
3872     inst = self.instance
3873     old_name = inst.name
3874
3875     if inst.disk_template == constants.DT_FILE:
3876       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3877
3878     self.cfg.RenameInstance(inst.name, self.op.new_name)
3879     # Change the instance lock. This is definitely safe while we hold the BGL
3880     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3881     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3882
3883     # re-read the instance from the configuration after rename
3884     inst = self.cfg.GetInstanceInfo(self.op.new_name)
3885
3886     if inst.disk_template == constants.DT_FILE:
3887       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3888       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3889                                                      old_file_storage_dir,
3890                                                      new_file_storage_dir)
3891       result.Raise("Could not rename on node %s directory '%s' to '%s'"
3892                    " (but the instance has been renamed in Ganeti)" %
3893                    (inst.primary_node, old_file_storage_dir,
3894                     new_file_storage_dir))
3895
3896     _StartInstanceDisks(self, inst, None)
3897     try:
3898       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3899                                                  old_name)
3900       msg = result.fail_msg
3901       if msg:
3902         msg = ("Could not run OS rename script for instance %s on node %s"
3903                " (but the instance has been renamed in Ganeti): %s" %
3904                (inst.name, inst.primary_node, msg))
3905         self.proc.LogWarning(msg)
3906     finally:
3907       _ShutdownInstanceDisks(self, inst)
3908
3909
3910 class LURemoveInstance(LogicalUnit):
3911   """Remove an instance.
3912
3913   """
3914   HPATH = "instance-remove"
3915   HTYPE = constants.HTYPE_INSTANCE
3916   _OP_REQP = ["instance_name", "ignore_failures"]
3917   REQ_BGL = False
3918
3919   def ExpandNames(self):
3920     self._ExpandAndLockInstance()
3921     self.needed_locks[locking.LEVEL_NODE] = []
3922     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3923
3924   def DeclareLocks(self, level):
3925     if level == locking.LEVEL_NODE:
3926       self._LockInstancesNodes()
3927
3928   def BuildHooksEnv(self):
3929     """Build hooks env.
3930
3931     This runs on master, primary and secondary nodes of the instance.
3932
3933     """
3934     env = _BuildInstanceHookEnvByObject(self, self.instance)
3935     nl = [self.cfg.GetMasterNode()]
3936     return env, nl, nl
3937
3938   def CheckPrereq(self):
3939     """Check prerequisites.
3940
3941     This checks that the instance is in the cluster.
3942
3943     """
3944     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3945     assert self.instance is not None, \
3946       "Cannot retrieve locked instance %s" % self.op.instance_name
3947
3948   def Exec(self, feedback_fn):
3949     """Remove the instance.
3950
3951     """
3952     instance = self.instance
3953     logging.info("Shutting down instance %s on node %s",
3954                  instance.name, instance.primary_node)
3955
3956     result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3957     msg = result.fail_msg
3958     if msg:
3959       if self.op.ignore_failures:
3960         feedback_fn("Warning: can't shutdown instance: %s" % msg)
3961       else:
3962         raise errors.OpExecError("Could not shutdown instance %s on"
3963                                  " node %s: %s" %
3964                                  (instance.name, instance.primary_node, msg))
3965
3966     logging.info("Removing block devices for instance %s", instance.name)
3967
3968     if not _RemoveDisks(self, instance):
3969       if self.op.ignore_failures:
3970         feedback_fn("Warning: can't remove instance's disks")
3971       else:
3972         raise errors.OpExecError("Can't remove instance's disks")
3973
3974     logging.info("Removing instance %s out of cluster config", instance.name)
3975
3976     self.cfg.RemoveInstance(instance.name)
3977     self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3978
3979
3980 class LUQueryInstances(NoHooksLU):
3981   """Logical unit for querying instances.
3982
3983   """
3984   _OP_REQP = ["output_fields", "names", "use_locking"]
3985   REQ_BGL = False
3986   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3987                                     "admin_state",
3988                                     "disk_template", "ip", "mac", "bridge",
3989                                     "nic_mode", "nic_link",
3990                                     "sda_size", "sdb_size", "vcpus", "tags",
3991                                     "network_port", "beparams",
3992                                     r"(disk)\.(size)/([0-9]+)",
3993                                     r"(disk)\.(sizes)", "disk_usage",
3994                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
3995                                     r"(nic)\.(bridge)/([0-9]+)",
3996                                     r"(nic)\.(macs|ips|modes|links|bridges)",
3997                                     r"(disk|nic)\.(count)",
3998                                     "serial_no", "hypervisor", "hvparams",
3999                                     "ctime", "mtime",
4000                                     ] +
4001                                   ["hv/%s" % name
4002                                    for name in constants.HVS_PARAMETERS] +
4003                                   ["be/%s" % name
4004                                    for name in constants.BES_PARAMETERS])
4005   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4006
4007
4008   def ExpandNames(self):
4009     _CheckOutputFields(static=self._FIELDS_STATIC,
4010                        dynamic=self._FIELDS_DYNAMIC,
4011                        selected=self.op.output_fields)
4012
4013     self.needed_locks = {}
4014     self.share_locks[locking.LEVEL_INSTANCE] = 1
4015     self.share_locks[locking.LEVEL_NODE] = 1
4016
4017     if self.op.names:
4018       self.wanted = _GetWantedInstances(self, self.op.names)
4019     else:
4020       self.wanted = locking.ALL_SET
4021
4022     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4023     self.do_locking = self.do_node_query and self.op.use_locking
4024     if self.do_locking:
4025       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4026       self.needed_locks[locking.LEVEL_NODE] = []
4027       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4028
4029   def DeclareLocks(self, level):
4030     if level == locking.LEVEL_NODE and self.do_locking:
4031       self._LockInstancesNodes()
4032
4033   def CheckPrereq(self):
4034     """Check prerequisites.
4035
4036     """
4037     pass
4038
4039   def Exec(self, feedback_fn):
4040     """Computes the list of nodes and their attributes.
4041
4042     """
4043     all_info = self.cfg.GetAllInstancesInfo()
4044     if self.wanted == locking.ALL_SET:
4045       # caller didn't specify instance names, so ordering is not important
4046       if self.do_locking:
4047         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4048       else:
4049         instance_names = all_info.keys()
4050       instance_names = utils.NiceSort(instance_names)
4051     else:
4052       # caller did specify names, so we must keep the ordering
4053       if self.do_locking:
4054         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4055       else:
4056         tgt_set = all_info.keys()
4057       missing = set(self.wanted).difference(tgt_set)
4058       if missing:
4059         raise errors.OpExecError("Some instances were removed before"
4060                                  " retrieving their data: %s" % missing)
4061       instance_names = self.wanted
4062
4063     instance_list = [all_info[iname] for iname in instance_names]
4064
4065     # begin data gathering
4066
4067     nodes = frozenset([inst.primary_node for inst in instance_list])
4068     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4069
4070     bad_nodes = []
4071     off_nodes = []
4072     if self.do_node_query:
4073       live_data = {}
4074       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4075       for name in nodes:
4076         result = node_data[name]
4077         if result.offline:
4078           # offline nodes will be in both lists
4079           off_nodes.append(name)
4080         if result.fail_msg:
4081           bad_nodes.append(name)
4082         else:
4083           if result.payload:
4084             live_data.update(result.payload)
4085           # else no instance is alive
4086     else:
4087       live_data = dict([(name, {}) for name in instance_names])
4088
4089     # end data gathering
4090
4091     HVPREFIX = "hv/"
4092     BEPREFIX = "be/"
4093     output = []
4094     cluster = self.cfg.GetClusterInfo()
4095     for instance in instance_list:
4096       iout = []
4097       i_hv = cluster.FillHV(instance)
4098       i_be = cluster.FillBE(instance)
4099       i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4100                                  nic.nicparams) for nic in instance.nics]
4101       for field in self.op.output_fields:
4102         st_match = self._FIELDS_STATIC.Matches(field)
4103         if field == "name":
4104           val = instance.name
4105         elif field == "os":
4106           val = instance.os
4107         elif field == "pnode":
4108           val = instance.primary_node
4109         elif field == "snodes":
4110           val = list(instance.secondary_nodes)
4111         elif field == "admin_state":
4112           val = instance.admin_up
4113         elif field == "oper_state":
4114           if instance.primary_node in bad_nodes:
4115             val = None
4116           else:
4117             val = bool(live_data.get(instance.name))
4118         elif field == "status":
4119           if instance.primary_node in off_nodes:
4120             val = "ERROR_nodeoffline"
4121           elif instance.primary_node in bad_nodes:
4122             val = "ERROR_nodedown"
4123           else:
4124             running = bool(live_data.get(instance.name))
4125             if running:
4126               if instance.admin_up:
4127                 val = "running"
4128               else:
4129                 val = "ERROR_up"
4130             else:
4131               if instance.admin_up:
4132                 val = "ERROR_down"
4133               else:
4134                 val = "ADMIN_down"
4135         elif field == "oper_ram":
4136           if instance.primary_node in bad_nodes:
4137             val = None
4138           elif instance.name in live_data:
4139             val = live_data[instance.name].get("memory", "?")
4140           else:
4141             val = "-"
4142         elif field == "vcpus":
4143           val = i_be[constants.BE_VCPUS]
4144         elif field == "disk_template":
4145           val = instance.disk_template
4146         elif field == "ip":
4147           if instance.nics:
4148             val = instance.nics[0].ip
4149           else:
4150             val = None
4151         elif field == "nic_mode":
4152           if instance.nics:
4153             val = i_nicp[0][constants.NIC_MODE]
4154           else:
4155             val = None
4156         elif field == "nic_link":
4157           if instance.nics:
4158             val = i_nicp[0][constants.NIC_LINK]
4159           else:
4160             val = None
4161         elif field == "bridge":
4162           if (instance.nics and
4163               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4164             val = i_nicp[0][constants.NIC_LINK]
4165           else:
4166             val = None
4167         elif field == "mac":
4168           if instance.nics:
4169             val = instance.nics[0].mac
4170           else:
4171             val = None
4172         elif field == "sda_size" or field == "sdb_size":
4173           idx = ord(field[2]) - ord('a')
4174           try:
4175             val = instance.FindDisk(idx).size
4176           except errors.OpPrereqError:
4177             val = None
4178         elif field == "disk_usage": # total disk usage per node
4179           disk_sizes = [{'size': disk.size} for disk in instance.disks]
4180           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4181         elif field == "tags":
4182           val = list(instance.GetTags())
4183         elif field == "serial_no":
4184           val = instance.serial_no
4185         elif field == "ctime":
4186           val = instance.ctime
4187         elif field == "mtime":
4188           val = instance.mtime
4189         elif field == "network_port":
4190           val = instance.network_port
4191         elif field == "hypervisor":
4192           val = instance.hypervisor
4193         elif field == "hvparams":
4194           val = i_hv
4195         elif (field.startswith(HVPREFIX) and
4196               field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
4197           val = i_hv.get(field[len(HVPREFIX):], None)
4198         elif field == "beparams":
4199           val = i_be
4200         elif (field.startswith(BEPREFIX) and
4201               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4202           val = i_be.get(field[len(BEPREFIX):], None)
4203         elif st_match and st_match.groups():
4204           # matches a variable list
4205           st_groups = st_match.groups()
4206           if st_groups and st_groups[0] == "disk":
4207             if st_groups[1] == "count":
4208               val = len(instance.disks)
4209             elif st_groups[1] == "sizes":
4210               val = [disk.size for disk in instance.disks]
4211             elif st_groups[1] == "size":
4212               try:
4213                 val = instance.FindDisk(st_groups[2]).size
4214               except errors.OpPrereqError:
4215                 val = None
4216             else:
4217               assert False, "Unhandled disk parameter"
4218           elif st_groups[0] == "nic":
4219             if st_groups[1] == "count":
4220               val = len(instance.nics)
4221             elif st_groups[1] == "macs":
4222               val = [nic.mac for nic in instance.nics]
4223             elif st_groups[1] == "ips":
4224               val = [nic.ip for nic in instance.nics]
4225             elif st_groups[1] == "modes":
4226               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4227             elif st_groups[1] == "links":
4228               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4229             elif st_groups[1] == "bridges":
4230               val = []
4231               for nicp in i_nicp:
4232                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4233                   val.append(nicp[constants.NIC_LINK])
4234                 else:
4235                   val.append(None)
4236             else:
4237               # index-based item
4238               nic_idx = int(st_groups[2])
4239               if nic_idx >= len(instance.nics):
4240                 val = None
4241               else:
4242                 if st_groups[1] == "mac":
4243                   val = instance.nics[nic_idx].mac
4244                 elif st_groups[1] == "ip":
4245                   val = instance.nics[nic_idx].ip
4246                 elif st_groups[1] == "mode":
4247                   val = i_nicp[nic_idx][constants.NIC_MODE]
4248                 elif st_groups[1] == "link":
4249                   val = i_nicp[nic_idx][constants.NIC_LINK]
4250                 elif st_groups[1] == "bridge":
4251                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4252                   if nic_mode == constants.NIC_MODE_BRIDGED:
4253                     val = i_nicp[nic_idx][constants.NIC_LINK]
4254                   else:
4255                     val = None
4256                 else:
4257                   assert False, "Unhandled NIC parameter"
4258           else:
4259             assert False, ("Declared but unhandled variable parameter '%s'" %
4260                            field)
4261         else:
4262           assert False, "Declared but unhandled parameter '%s'" % field
4263         iout.append(val)
4264       output.append(iout)
4265
4266     return output
4267
4268
4269 class LUFailoverInstance(LogicalUnit):
4270   """Failover an instance.
4271
4272   """
4273   HPATH = "instance-failover"
4274   HTYPE = constants.HTYPE_INSTANCE
4275   _OP_REQP = ["instance_name", "ignore_consistency"]
4276   REQ_BGL = False
4277
4278   def ExpandNames(self):
4279     self._ExpandAndLockInstance()
4280     self.needed_locks[locking.LEVEL_NODE] = []
4281     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4282
4283   def DeclareLocks(self, level):
4284     if level == locking.LEVEL_NODE:
4285       self._LockInstancesNodes()
4286
4287   def BuildHooksEnv(self):
4288     """Build hooks env.
4289
4290     This runs on master, primary and secondary nodes of the instance.
4291
4292     """
4293     env = {
4294       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4295       }
4296     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4297     nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4298     return env, nl, nl
4299
4300   def CheckPrereq(self):
4301     """Check prerequisites.
4302
4303     This checks that the instance is in the cluster.
4304
4305     """
4306     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4307     assert self.instance is not None, \
4308       "Cannot retrieve locked instance %s" % self.op.instance_name
4309
4310     bep = self.cfg.GetClusterInfo().FillBE(instance)
4311     if instance.disk_template not in constants.DTS_NET_MIRROR:
4312       raise errors.OpPrereqError("Instance's disk layout is not"
4313                                  " network mirrored, cannot failover.")
4314
4315     secondary_nodes = instance.secondary_nodes
4316     if not secondary_nodes:
4317       raise errors.ProgrammerError("no secondary node but using "
4318                                    "a mirrored disk template")
4319
4320     target_node = secondary_nodes[0]
4321     _CheckNodeOnline(self, target_node)
4322     _CheckNodeNotDrained(self, target_node)
4323     if instance.admin_up:
4324       # check memory requirements on the secondary node
4325       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4326                            instance.name, bep[constants.BE_MEMORY],
4327                            instance.hypervisor)
4328     else:
4329       self.LogInfo("Not checking memory on the secondary node as"
4330                    " instance will not be started")
4331
4332     # check bridge existance
4333     _CheckInstanceBridgesExist(self, instance, node=target_node)
4334
4335   def Exec(self, feedback_fn):
4336     """Failover an instance.
4337
4338     The failover is done by shutting it down on its present node and
4339     starting it on the secondary.
4340
4341     """
4342     instance = self.instance
4343
4344     source_node = instance.primary_node
4345     target_node = instance.secondary_nodes[0]
4346
4347     feedback_fn("* checking disk consistency between source and target")
4348     for dev in instance.disks:
4349       # for drbd, these are drbd over lvm
4350       if not _CheckDiskConsistency(self, dev, target_node, False):
4351         if instance.admin_up and not self.op.ignore_consistency:
4352           raise errors.OpExecError("Disk %s is degraded on target node,"
4353                                    " aborting failover." % dev.iv_name)
4354
4355     feedback_fn("* shutting down instance on source node")
4356     logging.info("Shutting down instance %s on node %s",
4357                  instance.name, source_node)
4358
4359     result = self.rpc.call_instance_shutdown(source_node, instance)
4360     msg = result.fail_msg
4361     if msg:
4362       if self.op.ignore_consistency:
4363         self.proc.LogWarning("Could not shutdown instance %s on node %s."
4364                              " Proceeding anyway. Please make sure node"
4365                              " %s is down. Error details: %s",
4366                              instance.name, source_node, source_node, msg)
4367       else:
4368         raise errors.OpExecError("Could not shutdown instance %s on"
4369                                  " node %s: %s" %
4370                                  (instance.name, source_node, msg))
4371
4372     feedback_fn("* deactivating the instance's disks on source node")
4373     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4374       raise errors.OpExecError("Can't shut down the instance's disks.")
4375
4376     instance.primary_node = target_node
4377     # distribute new instance config to the other nodes
4378     self.cfg.Update(instance)
4379
4380     # Only start the instance if it's marked as up
4381     if instance.admin_up:
4382       feedback_fn("* activating the instance's disks on target node")
4383       logging.info("Starting instance %s on node %s",
4384                    instance.name, target_node)
4385
4386       disks_ok, _ = _AssembleInstanceDisks(self, instance,
4387                                                ignore_secondaries=True)
4388       if not disks_ok:
4389         _ShutdownInstanceDisks(self, instance)
4390         raise errors.OpExecError("Can't activate the instance's disks")
4391
4392       feedback_fn("* starting the instance on the target node")
4393       result = self.rpc.call_instance_start(target_node, instance, None, None)
4394       msg = result.fail_msg
4395       if msg:
4396         _ShutdownInstanceDisks(self, instance)
4397         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4398                                  (instance.name, target_node, msg))
4399
4400
4401 class LUMigrateInstance(LogicalUnit):
4402   """Migrate an instance.
4403
4404   This is migration without shutting down, compared to the failover,
4405   which is done with shutdown.
4406
4407   """
4408   HPATH = "instance-migrate"
4409   HTYPE = constants.HTYPE_INSTANCE
4410   _OP_REQP = ["instance_name", "live", "cleanup"]
4411
4412   REQ_BGL = False
4413
4414   def ExpandNames(self):
4415     self._ExpandAndLockInstance()
4416
4417     self.needed_locks[locking.LEVEL_NODE] = []
4418     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4419
4420     self._migrater = TLMigrateInstance(self, self.op.instance_name,
4421                                        self.op.live, self.op.cleanup)
4422     self.tasklets = [self._migrater]
4423
4424   def DeclareLocks(self, level):
4425     if level == locking.LEVEL_NODE:
4426       self._LockInstancesNodes()
4427
4428   def BuildHooksEnv(self):
4429     """Build hooks env.
4430
4431     This runs on master, primary and secondary nodes of the instance.
4432
4433     """
4434     instance = self._migrater.instance
4435     env = _BuildInstanceHookEnvByObject(self, instance)
4436     env["MIGRATE_LIVE"] = self.op.live
4437     env["MIGRATE_CLEANUP"] = self.op.cleanup
4438     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4439     return env, nl, nl
4440
4441
4442 class LUMoveInstance(LogicalUnit):
4443   """Move an instance by data-copying.
4444
4445   """
4446   HPATH = "instance-move"
4447   HTYPE = constants.HTYPE_INSTANCE
4448   _OP_REQP = ["instance_name", "target_node"]
4449   REQ_BGL = False
4450
4451   def ExpandNames(self):
4452     self._ExpandAndLockInstance()
4453     target_node = self.cfg.ExpandNodeName(self.op.target_node)
4454     if target_node is None:
4455       raise errors.OpPrereqError("Node '%s' not known" %
4456                                   self.op.target_node)
4457     self.op.target_node = target_node
4458     self.needed_locks[locking.LEVEL_NODE] = [target_node]
4459     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4460
4461   def DeclareLocks(self, level):
4462     if level == locking.LEVEL_NODE:
4463       self._LockInstancesNodes(primary_only=True)
4464
4465   def BuildHooksEnv(self):
4466     """Build hooks env.
4467
4468     This runs on master, primary and secondary nodes of the instance.
4469
4470     """
4471     env = {
4472       "TARGET_NODE": self.op.target_node,
4473       }
4474     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4475     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4476                                        self.op.target_node]
4477     return env, nl, nl
4478
4479   def CheckPrereq(self):
4480     """Check prerequisites.
4481
4482     This checks that the instance is in the cluster.
4483
4484     """
4485     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4486     assert self.instance is not None, \
4487       "Cannot retrieve locked instance %s" % self.op.instance_name
4488
4489     node = self.cfg.GetNodeInfo(self.op.target_node)
4490     assert node is not None, \
4491       "Cannot retrieve locked node %s" % self.op.target_node
4492
4493     self.target_node = target_node = node.name
4494
4495     if target_node == instance.primary_node:
4496       raise errors.OpPrereqError("Instance %s is already on the node %s" %
4497                                  (instance.name, target_node))
4498
4499     bep = self.cfg.GetClusterInfo().FillBE(instance)
4500
4501     for idx, dsk in enumerate(instance.disks):
4502       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4503         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4504                                    " cannot copy")
4505
4506     _CheckNodeOnline(self, target_node)
4507     _CheckNodeNotDrained(self, target_node)
4508
4509     if instance.admin_up:
4510       # check memory requirements on the secondary node
4511       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4512                            instance.name, bep[constants.BE_MEMORY],
4513                            instance.hypervisor)
4514     else:
4515       self.LogInfo("Not checking memory on the secondary node as"
4516                    " instance will not be started")
4517
4518     # check bridge existance
4519     _CheckInstanceBridgesExist(self, instance, node=target_node)
4520
4521   def Exec(self, feedback_fn):
4522     """Move an instance.
4523
4524     The move is done by shutting it down on its present node, copying
4525     the data over (slow) and starting it on the new node.
4526
4527     """
4528     instance = self.instance
4529
4530     source_node = instance.primary_node
4531     target_node = self.target_node
4532
4533     self.LogInfo("Shutting down instance %s on source node %s",
4534                  instance.name, source_node)
4535
4536     result = self.rpc.call_instance_shutdown(source_node, instance)
4537     msg = result.fail_msg
4538     if msg:
4539       if self.op.ignore_consistency:
4540         self.proc.LogWarning("Could not shutdown instance %s on node %s."
4541                              " Proceeding anyway. Please make sure node"
4542                              " %s is down. Error details: %s",
4543                              instance.name, source_node, source_node, msg)
4544       else:
4545         raise errors.OpExecError("Could not shutdown instance %s on"
4546                                  " node %s: %s" %
4547                                  (instance.name, source_node, msg))
4548
4549     # create the target disks
4550     try:
4551       _CreateDisks(self, instance, target_node=target_node)
4552     except errors.OpExecError:
4553       self.LogWarning("Device creation failed, reverting...")
4554       try:
4555         _RemoveDisks(self, instance, target_node=target_node)
4556       finally:
4557         self.cfg.ReleaseDRBDMinors(instance.name)
4558         raise
4559
4560     cluster_name = self.cfg.GetClusterInfo().cluster_name
4561
4562     errs = []
4563     # activate, get path, copy the data over
4564     for idx, disk in enumerate(instance.disks):
4565       self.LogInfo("Copying data for disk %d", idx)
4566       result = self.rpc.call_blockdev_assemble(target_node, disk,
4567                                                instance.name, True)
4568       if result.fail_msg:
4569         self.LogWarning("Can't assemble newly created disk %d: %s",
4570                         idx, result.fail_msg)
4571         errs.append(result.fail_msg)
4572         break
4573       dev_path = result.payload
4574       result = self.rpc.call_blockdev_export(source_node, disk,
4575                                              target_node, dev_path,
4576                                              cluster_name)
4577       if result.fail_msg:
4578         self.LogWarning("Can't copy data over for disk %d: %s",
4579                         idx, result.fail_msg)
4580         errs.append(result.fail_msg)
4581         break
4582
4583     if errs:
4584       self.LogWarning("Some disks failed to copy, aborting")
4585       try:
4586         _RemoveDisks(self, instance, target_node=target_node)
4587       finally:
4588         self.cfg.ReleaseDRBDMinors(instance.name)
4589         raise errors.OpExecError("Errors during disk copy: %s" %
4590                                  (",".join(errs),))
4591
4592     instance.primary_node = target_node
4593     self.cfg.Update(instance)
4594
4595     self.LogInfo("Removing the disks on the original node")
4596     _RemoveDisks(self, instance, target_node=source_node)
4597
4598     # Only start the instance if it's marked as up
4599     if instance.admin_up:
4600       self.LogInfo("Starting instance %s on node %s",
4601                    instance.name, target_node)
4602
4603       disks_ok, _ = _AssembleInstanceDisks(self, instance,
4604                                            ignore_secondaries=True)
4605       if not disks_ok:
4606         _ShutdownInstanceDisks(self, instance)
4607         raise errors.OpExecError("Can't activate the instance's disks")
4608
4609       result = self.rpc.call_instance_start(target_node, instance, None, None)
4610       msg = result.fail_msg
4611       if msg:
4612         _ShutdownInstanceDisks(self, instance)
4613         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4614                                  (instance.name, target_node, msg))
4615
4616
4617 class LUMigrateNode(LogicalUnit):
4618   """Migrate all instances from a node.
4619
4620   """
4621   HPATH = "node-migrate"
4622   HTYPE = constants.HTYPE_NODE
4623   _OP_REQP = ["node_name", "live"]
4624   REQ_BGL = False
4625
4626   def ExpandNames(self):
4627     self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4628     if self.op.node_name is None:
4629       raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
4630
4631     self.needed_locks = {
4632       locking.LEVEL_NODE: [self.op.node_name],
4633       }
4634
4635     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4636
4637     # Create tasklets for migrating instances for all instances on this node
4638     names = []
4639     tasklets = []
4640
4641     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4642       logging.debug("Migrating instance %s", inst.name)
4643       names.append(inst.name)
4644
4645       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4646
4647     self.tasklets = tasklets
4648
4649     # Declare instance locks
4650     self.needed_locks[locking.LEVEL_INSTANCE] = names
4651
4652   def DeclareLocks(self, level):
4653     if level == locking.LEVEL_NODE:
4654       self._LockInstancesNodes()
4655
4656   def BuildHooksEnv(self):
4657     """Build hooks env.
4658
4659     This runs on the master, the primary and all the secondaries.
4660
4661     """
4662     env = {
4663       "NODE_NAME": self.op.node_name,
4664       }
4665
4666     nl = [self.cfg.GetMasterNode()]
4667
4668     return (env, nl, nl)
4669
4670
4671 class TLMigrateInstance(Tasklet):
4672   def __init__(self, lu, instance_name, live, cleanup):
4673     """Initializes this class.
4674
4675     """
4676     Tasklet.__init__(self, lu)
4677
4678     # Parameters
4679     self.instance_name = instance_name
4680     self.live = live
4681     self.cleanup = cleanup
4682
4683   def CheckPrereq(self):
4684     """Check prerequisites.
4685
4686     This checks that the instance is in the cluster.
4687
4688     """
4689     instance = self.cfg.GetInstanceInfo(
4690       self.cfg.ExpandInstanceName(self.instance_name))
4691     if instance is None:
4692       raise errors.OpPrereqError("Instance '%s' not known" %
4693                                  self.instance_name)
4694
4695     if instance.disk_template != constants.DT_DRBD8:
4696       raise errors.OpPrereqError("Instance's disk layout is not"
4697                                  " drbd8, cannot migrate.")
4698
4699     secondary_nodes = instance.secondary_nodes
4700     if not secondary_nodes:
4701       raise errors.ConfigurationError("No secondary node but using"
4702                                       " drbd8 disk template")
4703
4704     i_be = self.cfg.GetClusterInfo().FillBE(instance)
4705
4706     target_node = secondary_nodes[0]
4707     # check memory requirements on the secondary node
4708     _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
4709                          instance.name, i_be[constants.BE_MEMORY],
4710                          instance.hypervisor)
4711
4712     # check bridge existance
4713     _CheckInstanceBridgesExist(self, instance, node=target_node)
4714
4715     if not self.cleanup:
4716       _CheckNodeNotDrained(self, target_node)
4717       result = self.rpc.call_instance_migratable(instance.primary_node,
4718                                                  instance)
4719       result.Raise("Can't migrate, please use failover", prereq=True)
4720
4721     self.instance = instance
4722
4723   def _WaitUntilSync(self):
4724     """Poll with custom rpc for disk sync.
4725
4726     This uses our own step-based rpc call.
4727
4728     """
4729     self.feedback_fn("* wait until resync is done")
4730     all_done = False
4731     while not all_done:
4732       all_done = True
4733       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4734                                             self.nodes_ip,
4735                                             self.instance.disks)
4736       min_percent = 100
4737       for node, nres in result.items():
4738         nres.Raise("Cannot resync disks on node %s" % node)
4739         node_done, node_percent = nres.payload
4740         all_done = all_done and node_done
4741         if node_percent is not None:
4742           min_percent = min(min_percent, node_percent)
4743       if not all_done:
4744         if min_percent < 100:
4745           self.feedback_fn("   - progress: %.1f%%" % min_percent)
4746         time.sleep(2)
4747
4748   def _EnsureSecondary(self, node):
4749     """Demote a node to secondary.
4750
4751     """
4752     self.feedback_fn("* switching node %s to secondary mode" % node)
4753
4754     for dev in self.instance.disks:
4755       self.cfg.SetDiskID(dev, node)
4756
4757     result = self.rpc.call_blockdev_close(node, self.instance.name,
4758                                           self.instance.disks)
4759     result.Raise("Cannot change disk to secondary on node %s" % node)
4760
4761   def _GoStandalone(self):
4762     """Disconnect from the network.
4763
4764     """
4765     self.feedback_fn("* changing into standalone mode")
4766     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4767                                                self.instance.disks)
4768     for node, nres in result.items():
4769       nres.Raise("Cannot disconnect disks node %s" % node)
4770
4771   def _GoReconnect(self, multimaster):
4772     """Reconnect to the network.
4773
4774     """
4775     if multimaster:
4776       msg = "dual-master"
4777     else:
4778       msg = "single-master"
4779     self.feedback_fn("* changing disks into %s mode" % msg)
4780     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4781                                            self.instance.disks,
4782                                            self.instance.name, multimaster)
4783     for node, nres in result.items():
4784       nres.Raise("Cannot change disks config on node %s" % node)
4785
4786   def _ExecCleanup(self):
4787     """Try to cleanup after a failed migration.
4788
4789     The cleanup is done by:
4790       - check that the instance is running only on one node
4791         (and update the config if needed)
4792       - change disks on its secondary node to secondary
4793       - wait until disks are fully synchronized
4794       - disconnect from the network
4795       - change disks into single-master mode
4796       - wait again until disks are fully synchronized
4797
4798     """
4799     instance = self.instance
4800     target_node = self.target_node
4801     source_node = self.source_node
4802
4803     # check running on only one node
4804     self.feedback_fn("* checking where the instance actually runs"
4805                      " (if this hangs, the hypervisor might be in"
4806                      " a bad state)")
4807     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4808     for node, result in ins_l.items():
4809       result.Raise("Can't contact node %s" % node)
4810
4811     runningon_source = instance.name in ins_l[source_node].payload
4812     runningon_target = instance.name in ins_l[target_node].payload
4813
4814     if runningon_source and runningon_target:
4815       raise errors.OpExecError("Instance seems to be running on two nodes,"
4816                                " or the hypervisor is confused. You will have"
4817                                " to ensure manually that it runs only on one"
4818                                " and restart this operation.")
4819
4820     if not (runningon_source or runningon_target):
4821       raise errors.OpExecError("Instance does not seem to be running at all."
4822                                " In this case, it's safer to repair by"
4823                                " running 'gnt-instance stop' to ensure disk"
4824                                " shutdown, and then restarting it.")
4825
4826     if runningon_target:
4827       # the migration has actually succeeded, we need to update the config
4828       self.feedback_fn("* instance running on secondary node (%s),"
4829                        " updating config" % target_node)
4830       instance.primary_node = target_node
4831       self.cfg.Update(instance)
4832       demoted_node = source_node
4833     else:
4834       self.feedback_fn("* instance confirmed to be running on its"
4835                        " primary node (%s)" % source_node)
4836       demoted_node = target_node
4837
4838     self._EnsureSecondary(demoted_node)
4839     try:
4840       self._WaitUntilSync()
4841     except errors.OpExecError:
4842       # we ignore here errors, since if the device is standalone, it
4843       # won't be able to sync
4844       pass
4845     self._GoStandalone()
4846     self._GoReconnect(False)
4847     self._WaitUntilSync()
4848
4849     self.feedback_fn("* done")
4850
4851   def _RevertDiskStatus(self):
4852     """Try to revert the disk status after a failed migration.
4853
4854     """
4855     target_node = self.target_node
4856     try:
4857       self._EnsureSecondary(target_node)
4858       self._GoStandalone()
4859       self._GoReconnect(False)
4860       self._WaitUntilSync()
4861     except errors.OpExecError, err:
4862       self.lu.LogWarning("Migration failed and I can't reconnect the"
4863                          " drives: error '%s'\n"
4864                          "Please look and recover the instance status" %
4865                          str(err))
4866
4867   def _AbortMigration(self):
4868     """Call the hypervisor code to abort a started migration.
4869
4870     """
4871     instance = self.instance
4872     target_node = self.target_node
4873     migration_info = self.migration_info
4874
4875     abort_result = self.rpc.call_finalize_migration(target_node,
4876                                                     instance,
4877                                                     migration_info,
4878                                                     False)
4879     abort_msg = abort_result.fail_msg
4880     if abort_msg:
4881       logging.error("Aborting migration failed on target node %s: %s" %
4882                     (target_node, abort_msg))
4883       # Don't raise an exception here, as we stil have to try to revert the
4884       # disk status, even if this step failed.
4885
4886   def _ExecMigration(self):
4887     """Migrate an instance.
4888
4889     The migrate is done by:
4890       - change the disks into dual-master mode
4891       - wait until disks are fully synchronized again
4892       - migrate the instance
4893       - change disks on the new secondary node (the old primary) to secondary
4894       - wait until disks are fully synchronized
4895       - change disks into single-master mode
4896
4897     """
4898     instance = self.instance
4899     target_node = self.target_node
4900     source_node = self.source_node
4901
4902     self.feedback_fn("* checking disk consistency between source and target")
4903     for dev in instance.disks:
4904       if not _CheckDiskConsistency(self, dev, target_node, False):
4905         raise errors.OpExecError("Disk %s is degraded or not fully"
4906                                  " synchronized on target node,"
4907                                  " aborting migrate." % dev.iv_name)
4908
4909     # First get the migration information from the remote node
4910     result = self.rpc.call_migration_info(source_node, instance)
4911     msg = result.fail_msg
4912     if msg:
4913       log_err = ("Failed fetching source migration information from %s: %s" %
4914                  (source_node, msg))
4915       logging.error(log_err)
4916       raise errors.OpExecError(log_err)
4917
4918     self.migration_info = migration_info = result.payload
4919
4920     # Then switch the disks to master/master mode
4921     self._EnsureSecondary(target_node)
4922     self._GoStandalone()
4923     self._GoReconnect(True)
4924     self._WaitUntilSync()
4925
4926     self.feedback_fn("* preparing %s to accept the instance" % target_node)
4927     result = self.rpc.call_accept_instance(target_node,
4928                                            instance,
4929                                            migration_info,
4930                                            self.nodes_ip[target_node])
4931
4932     msg = result.fail_msg
4933     if msg:
4934       logging.error("Instance pre-migration failed, trying to revert"
4935                     " disk status: %s", msg)
4936       self._AbortMigration()
4937       self._RevertDiskStatus()
4938       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4939                                (instance.name, msg))
4940
4941     self.feedback_fn("* migrating instance to %s" % target_node)
4942     time.sleep(10)
4943     result = self.rpc.call_instance_migrate(source_node, instance,
4944                                             self.nodes_ip[target_node],
4945                                             self.live)
4946     msg = result.fail_msg
4947     if msg:
4948       logging.error("Instance migration failed, trying to revert"
4949                     " disk status: %s", msg)
4950       self._AbortMigration()
4951       self._RevertDiskStatus()
4952       raise errors.OpExecError("Could not migrate instance %s: %s" %
4953                                (instance.name, msg))
4954     time.sleep(10)
4955
4956     instance.primary_node = target_node
4957     # distribute new instance config to the other nodes
4958     self.cfg.Update(instance)
4959
4960     result = self.rpc.call_finalize_migration(target_node,
4961                                               instance,
4962                                               migration_info,
4963                                               True)
4964     msg = result.fail_msg
4965     if msg:
4966       logging.error("Instance migration succeeded, but finalization failed:"
4967                     " %s" % msg)
4968       raise errors.OpExecError("Could not finalize instance migration: %s" %
4969                                msg)
4970
4971     self._EnsureSecondary(source_node)
4972     self._WaitUntilSync()
4973     self._GoStandalone()
4974     self._GoReconnect(False)
4975     self._WaitUntilSync()
4976
4977     self.feedback_fn("* done")
4978
4979   def Exec(self, feedback_fn):
4980     """Perform the migration.
4981
4982     """
4983     feedback_fn("Migrating instance %s" % self.instance.name)
4984
4985     self.feedback_fn = feedback_fn
4986
4987     self.source_node = self.instance.primary_node
4988     self.target_node = self.instance.secondary_nodes[0]
4989     self.all_nodes = [self.source_node, self.target_node]
4990     self.nodes_ip = {
4991       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4992       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4993       }
4994
4995     if self.cleanup:
4996       return self._ExecCleanup()
4997     else:
4998       return self._ExecMigration()
4999
5000
5001 def _CreateBlockDev(lu, node, instance, device, force_create,
5002                     info, force_open):
5003   """Create a tree of block devices on a given node.
5004
5005   If this device type has to be created on secondaries, create it and
5006   all its children.
5007
5008   If not, just recurse to children keeping the same 'force' value.
5009
5010   @param lu: the lu on whose behalf we execute
5011   @param node: the node on which to create the device
5012   @type instance: L{objects.Instance}
5013   @param instance: the instance which owns the device
5014   @type device: L{objects.Disk}
5015   @param device: the device to create
5016   @type force_create: boolean
5017   @param force_create: whether to force creation of this device; this
5018       will be change to True whenever we find a device which has
5019       CreateOnSecondary() attribute
5020   @param info: the extra 'metadata' we should attach to the device
5021       (this will be represented as a LVM tag)
5022   @type force_open: boolean
5023   @param force_open: this parameter will be passes to the
5024       L{backend.BlockdevCreate} function where it specifies
5025       whether we run on primary or not, and it affects both
5026       the child assembly and the device own Open() execution
5027
5028   """
5029   if device.CreateOnSecondary():
5030     force_create = True
5031
5032   if device.children:
5033     for child in device.children:
5034       _CreateBlockDev(lu, node, instance, child, force_create,
5035                       info, force_open)
5036
5037   if not force_create:
5038     return
5039
5040   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5041
5042
5043 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5044   """Create a single block device on a given node.
5045
5046   This will not recurse over children of the device, so they must be
5047   created in advance.
5048
5049   @param lu: the lu on whose behalf we execute
5050   @param node: the node on which to create the device
5051   @type instance: L{objects.Instance}
5052   @param instance: the instance which owns the device
5053   @type device: L{objects.Disk}
5054   @param device: the device to create
5055   @param info: the extra 'metadata' we should attach to the device
5056       (this will be represented as a LVM tag)
5057   @type force_open: boolean
5058   @param force_open: this parameter will be passes to the
5059       L{backend.BlockdevCreate} function where it specifies
5060       whether we run on primary or not, and it affects both
5061       the child assembly and the device own Open() execution
5062
5063   """
5064   lu.cfg.SetDiskID(device, node)
5065   result = lu.rpc.call_blockdev_create(node, device, device.size,
5066                                        instance.name, force_open, info)
5067   result.Raise("Can't create block device %s on"
5068                " node %s for instance %s" % (device, node, instance.name))
5069   if device.physical_id is None:
5070     device.physical_id = result.payload
5071
5072
5073 def _GenerateUniqueNames(lu, exts):
5074   """Generate a suitable LV name.
5075
5076   This will generate a logical volume name for the given instance.
5077
5078   """
5079   results = []
5080   for val in exts:
5081     new_id = lu.cfg.GenerateUniqueID()
5082     results.append("%s%s" % (new_id, val))
5083   return results
5084
5085
5086 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5087                          p_minor, s_minor):
5088   """Generate a drbd8 device complete with its children.
5089
5090   """
5091   port = lu.cfg.AllocatePort()
5092   vgname = lu.cfg.GetVGName()
5093   shared_secret = lu.cfg.GenerateDRBDSecret()
5094   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5095                           logical_id=(vgname, names[0]))
5096   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5097                           logical_id=(vgname, names[1]))
5098   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5099                           logical_id=(primary, secondary, port,
5100                                       p_minor, s_minor,
5101                                       shared_secret),
5102                           children=[dev_data, dev_meta],
5103                           iv_name=iv_name)
5104   return drbd_dev
5105
5106
5107 def _GenerateDiskTemplate(lu, template_name,
5108                           instance_name, primary_node,
5109                           secondary_nodes, disk_info,
5110                           file_storage_dir, file_driver,
5111                           base_index):
5112   """Generate the entire disk layout for a given template type.
5113
5114   """
5115   #TODO: compute space requirements
5116
5117   vgname = lu.cfg.GetVGName()
5118   disk_count = len(disk_info)
5119   disks = []
5120   if template_name == constants.DT_DISKLESS:
5121     pass
5122   elif template_name == constants.DT_PLAIN:
5123     if len(secondary_nodes) != 0:
5124       raise errors.ProgrammerError("Wrong template configuration")
5125
5126     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5127                                       for i in range(disk_count)])
5128     for idx, disk in enumerate(disk_info):
5129       disk_index = idx + base_index
5130       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5131                               logical_id=(vgname, names[idx]),
5132                               iv_name="disk/%d" % disk_index,
5133                               mode=disk["mode"])
5134       disks.append(disk_dev)
5135   elif template_name == constants.DT_DRBD8:
5136     if len(secondary_nodes) != 1:
5137       raise errors.ProgrammerError("Wrong template configuration")
5138     remote_node = secondary_nodes[0]
5139     minors = lu.cfg.AllocateDRBDMinor(
5140       [primary_node, remote_node] * len(disk_info), instance_name)
5141
5142     names = []
5143     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5144                                                for i in range(disk_count)]):
5145       names.append(lv_prefix + "_data")
5146       names.append(lv_prefix + "_meta")
5147     for idx, disk in enumerate(disk_info):
5148       disk_index = idx + base_index
5149       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5150                                       disk["size"], names[idx*2:idx*2+2],
5151                                       "disk/%d" % disk_index,
5152                                       minors[idx*2], minors[idx*2+1])
5153       disk_dev.mode = disk["mode"]
5154       disks.append(disk_dev)
5155   elif template_name == constants.DT_FILE:
5156     if len(secondary_nodes) != 0:
5157       raise errors.ProgrammerError("Wrong template configuration")
5158
5159     for idx, disk in enumerate(disk_info):
5160       disk_index = idx + base_index
5161       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5162                               iv_name="disk/%d" % disk_index,
5163                               logical_id=(file_driver,
5164                                           "%s/disk%d" % (file_storage_dir,
5165                                                          disk_index)),
5166                               mode=disk["mode"])
5167       disks.append(disk_dev)
5168   else:
5169     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5170   return disks
5171
5172
5173 def _GetInstanceInfoText(instance):
5174   """Compute that text that should be added to the disk's metadata.
5175
5176   """
5177   return "originstname+%s" % instance.name
5178
5179
5180 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5181   """Create all disks for an instance.
5182
5183   This abstracts away some work from AddInstance.
5184
5185   @type lu: L{LogicalUnit}
5186   @param lu: the logical unit on whose behalf we execute
5187   @type instance: L{objects.Instance}
5188   @param instance: the instance whose disks we should create
5189   @type to_skip: list
5190   @param to_skip: list of indices to skip
5191   @type target_node: string
5192   @param target_node: if passed, overrides the target node for creation
5193   @rtype: boolean
5194   @return: the success of the creation
5195
5196   """
5197   info = _GetInstanceInfoText(instance)
5198   if target_node is None:
5199     pnode = instance.primary_node
5200     all_nodes = instance.all_nodes
5201   else:
5202     pnode = target_node
5203     all_nodes = [pnode]
5204
5205   if instance.disk_template == constants.DT_FILE:
5206     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5207     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5208
5209     result.Raise("Failed to create directory '%s' on"
5210                  " node %s" % (file_storage_dir, pnode))
5211
5212   # Note: this needs to be kept in sync with adding of disks in
5213   # LUSetInstanceParams
5214   for idx, device in enumerate(instance.disks):
5215     if to_skip and idx in to_skip:
5216       continue
5217     logging.info("Creating volume %s for instance %s",
5218                  device.iv_name, instance.name)
5219     #HARDCODE
5220     for node in all_nodes:
5221       f_create = node == pnode
5222       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5223
5224
5225 def _RemoveDisks(lu, instance, target_node=None):
5226   """Remove all disks for an instance.
5227
5228   This abstracts away some work from `AddInstance()` and
5229   `RemoveInstance()`. Note that in case some of the devices couldn't
5230   be removed, the removal will continue with the other ones (compare
5231   with `_CreateDisks()`).
5232
5233   @type lu: L{LogicalUnit}
5234   @param lu: the logical unit on whose behalf we execute
5235   @type instance: L{objects.Instance}
5236   @param instance: the instance whose disks we should remove
5237   @type target_node: string
5238   @param target_node: used to override the node on which to remove the disks
5239   @rtype: boolean
5240   @return: the success of the removal
5241
5242   """
5243   logging.info("Removing block devices for instance %s", instance.name)
5244
5245   all_result = True
5246   for device in instance.disks:
5247     if target_node:
5248       edata = [(target_node, device)]
5249     else:
5250       edata = device.ComputeNodeTree(instance.primary_node)
5251     for node, disk in edata:
5252       lu.cfg.SetDiskID(disk, node)
5253       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5254       if msg:
5255         lu.LogWarning("Could not remove block device %s on node %s,"
5256                       " continuing anyway: %s", device.iv_name, node, msg)
5257         all_result = False
5258
5259   if instance.disk_template == constants.DT_FILE:
5260     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5261     if target_node:
5262       tgt = target_node
5263     else:
5264       tgt = instance.primary_node
5265     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5266     if result.fail_msg:
5267       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5268                     file_storage_dir, instance.primary_node, result.fail_msg)
5269       all_result = False
5270
5271   return all_result
5272
5273
5274 def _ComputeDiskSize(disk_template, disks):
5275   """Compute disk size requirements in the volume group
5276
5277   """
5278   # Required free disk space as a function of disk and swap space
5279   req_size_dict = {
5280     constants.DT_DISKLESS: None,
5281     constants.DT_PLAIN: sum(d["size"] for d in disks),
5282     # 128 MB are added for drbd metadata for each disk
5283     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5284     constants.DT_FILE: None,
5285   }
5286
5287   if disk_template not in req_size_dict:
5288     raise errors.ProgrammerError("Disk template '%s' size requirement"
5289                                  " is unknown" %  disk_template)
5290
5291   return req_size_dict[disk_template]
5292
5293
5294 def _CheckHVParams(lu, nodenames, hvname, hvparams):
5295   """Hypervisor parameter validation.
5296
5297   This function abstract the hypervisor parameter validation to be
5298   used in both instance create and instance modify.
5299
5300   @type lu: L{LogicalUnit}
5301   @param lu: the logical unit for which we check
5302   @type nodenames: list
5303   @param nodenames: the list of nodes on which we should check
5304   @type hvname: string
5305   @param hvname: the name of the hypervisor we should use
5306   @type hvparams: dict
5307   @param hvparams: the parameters which we need to check
5308   @raise errors.OpPrereqError: if the parameters are not valid
5309
5310   """
5311   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5312                                                   hvname,
5313                                                   hvparams)
5314   for node in nodenames:
5315     info = hvinfo[node]
5316     if info.offline:
5317       continue
5318     info.Raise("Hypervisor parameter validation failed on node %s" % node)
5319
5320
5321 class LUCreateInstance(LogicalUnit):
5322   """Create an instance.
5323
5324   """
5325   HPATH = "instance-add"
5326   HTYPE = constants.HTYPE_INSTANCE
5327   _OP_REQP = ["instance_name", "disks", "disk_template",
5328               "mode", "start",
5329               "wait_for_sync", "ip_check", "nics",
5330               "hvparams", "beparams"]
5331   REQ_BGL = False
5332
5333   def _ExpandNode(self, node):
5334     """Expands and checks one node name.
5335
5336     """
5337     node_full = self.cfg.ExpandNodeName(node)
5338     if node_full is None:
5339       raise errors.OpPrereqError("Unknown node %s" % node)
5340     return node_full
5341
5342   def ExpandNames(self):
5343     """ExpandNames for CreateInstance.
5344
5345     Figure out the right locks for instance creation.
5346
5347     """
5348     self.needed_locks = {}
5349
5350     # set optional parameters to none if they don't exist
5351     for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5352       if not hasattr(self.op, attr):
5353         setattr(self.op, attr, None)
5354
5355     # cheap checks, mostly valid constants given
5356
5357     # verify creation mode
5358     if self.op.mode not in (constants.INSTANCE_CREATE,
5359                             constants.INSTANCE_IMPORT):
5360       raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5361                                  self.op.mode)
5362
5363     # disk template and mirror node verification
5364     if self.op.disk_template not in constants.DISK_TEMPLATES:
5365       raise errors.OpPrereqError("Invalid disk template name")
5366
5367     if self.op.hypervisor is None:
5368       self.op.hypervisor = self.cfg.GetHypervisorType()
5369
5370     cluster = self.cfg.GetClusterInfo()
5371     enabled_hvs = cluster.enabled_hypervisors
5372     if self.op.hypervisor not in enabled_hvs:
5373       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5374                                  " cluster (%s)" % (self.op.hypervisor,
5375                                   ",".join(enabled_hvs)))
5376
5377     # check hypervisor parameter syntax (locally)
5378     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5379     filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5380                                   self.op.hvparams)
5381     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5382     hv_type.CheckParameterSyntax(filled_hvp)
5383     self.hv_full = filled_hvp
5384
5385     # fill and remember the beparams dict
5386     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5387     self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5388                                     self.op.beparams)
5389
5390     #### instance parameters check
5391
5392     # instance name verification
5393     hostname1 = utils.HostInfo(self.op.instance_name)
5394     self.op.instance_name = instance_name = hostname1.name
5395
5396     # this is just a preventive check, but someone might still add this
5397     # instance in the meantime, and creation will fail at lock-add time
5398     if instance_name in self.cfg.GetInstanceList():
5399       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5400                                  instance_name)
5401
5402     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5403
5404     # NIC buildup
5405     self.nics = []
5406     for idx, nic in enumerate(self.op.nics):
5407       nic_mode_req = nic.get("mode", None)
5408       nic_mode = nic_mode_req
5409       if nic_mode is None:
5410         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5411
5412       # in routed mode, for the first nic, the default ip is 'auto'
5413       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5414         default_ip_mode = constants.VALUE_AUTO
5415       else:
5416         default_ip_mode = constants.VALUE_NONE
5417
5418       # ip validity checks
5419       ip = nic.get("ip", default_ip_mode)
5420       if ip is None or ip.lower() == constants.VALUE_NONE:
5421         nic_ip = None
5422       elif ip.lower() == constants.VALUE_AUTO:
5423         nic_ip = hostname1.ip
5424       else:
5425         if not utils.IsValidIP(ip):
5426           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5427                                      " like a valid IP" % ip)
5428         nic_ip = ip
5429
5430       # TODO: check the ip for uniqueness !!
5431       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5432         raise errors.OpPrereqError("Routed nic mode requires an ip address")
5433
5434       # MAC address verification
5435       mac = nic.get("mac", constants.VALUE_AUTO)
5436       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5437         if not utils.IsValidMac(mac.lower()):
5438           raise errors.OpPrereqError("Invalid MAC address specified: %s" %
5439                                      mac)
5440         else:
5441           # or validate/reserve the current one
5442           if self.cfg.IsMacInUse(mac):
5443             raise errors.OpPrereqError("MAC address %s already in use"
5444                                        " in cluster" % mac)
5445
5446       # bridge verification
5447       bridge = nic.get("bridge", None)
5448       link = nic.get("link", None)
5449       if bridge and link:
5450         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5451                                    " at the same time")
5452       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5453         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
5454       elif bridge:
5455         link = bridge
5456
5457       nicparams = {}
5458       if nic_mode_req:
5459         nicparams[constants.NIC_MODE] = nic_mode_req
5460       if link:
5461         nicparams[constants.NIC_LINK] = link
5462
5463       check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5464                                       nicparams)
5465       objects.NIC.CheckParameterSyntax(check_params)
5466       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5467
5468     # disk checks/pre-build
5469     self.disks = []
5470     for disk in self.op.disks:
5471       mode = disk.get("mode", constants.DISK_RDWR)
5472       if mode not in constants.DISK_ACCESS_SET:
5473         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5474                                    mode)
5475       size = disk.get("size", None)
5476       if size is None:
5477         raise errors.OpPrereqError("Missing disk size")
5478       try:
5479         size = int(size)
5480       except ValueError:
5481         raise errors.OpPrereqError("Invalid disk size '%s'" % size)
5482       self.disks.append({"size": size, "mode": mode})
5483
5484     # used in CheckPrereq for ip ping check
5485     self.check_ip = hostname1.ip
5486
5487     # file storage checks
5488     if (self.op.file_driver and
5489         not self.op.file_driver in constants.FILE_DRIVER):
5490       raise errors.OpPrereqError("Invalid file driver name '%s'" %
5491                                  self.op.file_driver)
5492
5493     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5494       raise errors.OpPrereqError("File storage directory path not absolute")
5495
5496     ### Node/iallocator related checks
5497     if [self.op.iallocator, self.op.pnode].count(None) != 1:
5498       raise errors.OpPrereqError("One and only one of iallocator and primary"
5499                                  " node must be given")
5500
5501     if self.op.iallocator:
5502       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5503     else:
5504       self.op.pnode = self._ExpandNode(self.op.pnode)
5505       nodelist = [self.op.pnode]
5506       if self.op.snode is not None:
5507         self.op.snode = self._ExpandNode(self.op.snode)
5508         nodelist.append(self.op.snode)
5509       self.needed_locks[locking.LEVEL_NODE] = nodelist
5510
5511     # in case of import lock the source node too
5512     if self.op.mode == constants.INSTANCE_IMPORT:
5513       src_node = getattr(self.op, "src_node", None)
5514       src_path = getattr(self.op, "src_path", None)
5515
5516       if src_path is None:
5517         self.op.src_path = src_path = self.op.instance_name
5518
5519       if src_node is None:
5520         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5521         self.op.src_node = None
5522         if os.path.isabs(src_path):
5523           raise errors.OpPrereqError("Importing an instance from an absolute"
5524                                      " path requires a source node option.")
5525       else:
5526         self.op.src_node = src_node = self._ExpandNode(src_node)
5527         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5528           self.needed_locks[locking.LEVEL_NODE].append(src_node)
5529         if not os.path.isabs(src_path):
5530           self.op.src_path = src_path = \
5531             os.path.join(constants.EXPORT_DIR, src_path)
5532
5533     else: # INSTANCE_CREATE
5534       if getattr(self.op, "os_type", None) is None:
5535         raise errors.OpPrereqError("No guest OS specified")
5536
5537   def _RunAllocator(self):
5538     """Run the allocator based on input opcode.
5539
5540     """
5541     nics = [n.ToDict() for n in self.nics]
5542     ial = IAllocator(self.cfg, self.rpc,
5543                      mode=constants.IALLOCATOR_MODE_ALLOC,
5544                      name=self.op.instance_name,
5545                      disk_template=self.op.disk_template,
5546                      tags=[],
5547                      os=self.op.os_type,
5548                      vcpus=self.be_full[constants.BE_VCPUS],
5549                      mem_size=self.be_full[constants.BE_MEMORY],
5550                      disks=self.disks,
5551                      nics=nics,
5552                      hypervisor=self.op.hypervisor,
5553                      )
5554
5555     ial.Run(self.op.iallocator)
5556
5557     if not ial.success:
5558       raise errors.OpPrereqError("Can't compute nodes using"
5559                                  " iallocator '%s': %s" % (self.op.iallocator,
5560                                                            ial.info))
5561     if len(ial.nodes) != ial.required_nodes:
5562       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5563                                  " of nodes (%s), required %s" %
5564                                  (self.op.iallocator, len(ial.nodes),
5565                                   ial.required_nodes))
5566     self.op.pnode = ial.nodes[0]
5567     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5568                  self.op.instance_name, self.op.iallocator,
5569                  ", ".join(ial.nodes))
5570     if ial.required_nodes == 2:
5571       self.op.snode = ial.nodes[1]
5572
5573   def BuildHooksEnv(self):
5574     """Build hooks env.
5575
5576     This runs on master, primary and secondary nodes of the instance.
5577
5578     """
5579     env = {
5580       "ADD_MODE": self.op.mode,
5581       }
5582     if self.op.mode == constants.INSTANCE_IMPORT:
5583       env["SRC_NODE"] = self.op.src_node
5584       env["SRC_PATH"] = self.op.src_path
5585       env["SRC_IMAGES"] = self.src_images
5586
5587     env.update(_BuildInstanceHookEnv(
5588       name=self.op.instance_name,
5589       primary_node=self.op.pnode,
5590       secondary_nodes=self.secondaries,
5591       status=self.op.start,
5592       os_type=self.op.os_type,
5593       memory=self.be_full[constants.BE_MEMORY],
5594       vcpus=self.be_full[constants.BE_VCPUS],
5595       nics=_NICListToTuple(self, self.nics),
5596       disk_template=self.op.disk_template,
5597       disks=[(d["size"], d["mode"]) for d in self.disks],
5598       bep=self.be_full,
5599       hvp=self.hv_full,
5600       hypervisor_name=self.op.hypervisor,
5601     ))
5602
5603     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5604           self.secondaries)
5605     return env, nl, nl
5606
5607
5608   def CheckPrereq(self):
5609     """Check prerequisites.
5610
5611     """
5612     if (not self.cfg.GetVGName() and
5613         self.op.disk_template not in constants.DTS_NOT_LVM):
5614       raise errors.OpPrereqError("Cluster does not support lvm-based"
5615                                  " instances")
5616
5617     if self.op.mode == constants.INSTANCE_IMPORT:
5618       src_node = self.op.src_node
5619       src_path = self.op.src_path
5620
5621       if src_node is None:
5622         locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5623         exp_list = self.rpc.call_export_list(locked_nodes)
5624         found = False
5625         for node in exp_list:
5626           if exp_list[node].fail_msg:
5627             continue
5628           if src_path in exp_list[node].payload:
5629             found = True
5630             self.op.src_node = src_node = node
5631             self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5632                                                        src_path)
5633             break
5634         if not found:
5635           raise errors.OpPrereqError("No export found for relative path %s" %
5636                                       src_path)
5637
5638       _CheckNodeOnline(self, src_node)
5639       result = self.rpc.call_export_info(src_node, src_path)
5640       result.Raise("No export or invalid export found in dir %s" % src_path)
5641
5642       export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5643       if not export_info.has_section(constants.INISECT_EXP):
5644         raise errors.ProgrammerError("Corrupted export config")
5645
5646       ei_version = export_info.get(constants.INISECT_EXP, 'version')
5647       if (int(ei_version) != constants.EXPORT_VERSION):
5648         raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5649                                    (ei_version, constants.EXPORT_VERSION))
5650
5651       # Check that the new instance doesn't have less disks than the export
5652       instance_disks = len(self.disks)
5653       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
5654       if instance_disks < export_disks:
5655         raise errors.OpPrereqError("Not enough disks to import."
5656                                    " (instance: %d, export: %d)" %
5657                                    (instance_disks, export_disks))
5658
5659       self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
5660       disk_images = []
5661       for idx in range(export_disks):
5662         option = 'disk%d_dump' % idx
5663         if export_info.has_option(constants.INISECT_INS, option):
5664           # FIXME: are the old os-es, disk sizes, etc. useful?
5665           export_name = export_info.get(constants.INISECT_INS, option)
5666           image = os.path.join(src_path, export_name)
5667           disk_images.append(image)
5668         else:
5669           disk_images.append(False)
5670
5671       self.src_images = disk_images
5672
5673       old_name = export_info.get(constants.INISECT_INS, 'name')
5674       # FIXME: int() here could throw a ValueError on broken exports
5675       exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
5676       if self.op.instance_name == old_name:
5677         for idx, nic in enumerate(self.nics):
5678           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
5679             nic_mac_ini = 'nic%d_mac' % idx
5680             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
5681
5682     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
5683     # ip ping checks (we use the same ip that was resolved in ExpandNames)
5684     if self.op.start and not self.op.ip_check:
5685       raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
5686                                  " adding an instance in start mode")
5687
5688     if self.op.ip_check:
5689       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
5690         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5691                                    (self.check_ip, self.op.instance_name))
5692
5693     #### mac address generation
5694     # By generating here the mac address both the allocator and the hooks get
5695     # the real final mac address rather than the 'auto' or 'generate' value.
5696     # There is a race condition between the generation and the instance object
5697     # creation, which means that we know the mac is valid now, but we're not
5698     # sure it will be when we actually add the instance. If things go bad
5699     # adding the instance will abort because of a duplicate mac, and the
5700     # creation job will fail.
5701     for nic in self.nics:
5702       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5703         nic.mac = self.cfg.GenerateMAC()
5704
5705     #### allocator run
5706
5707     if self.op.iallocator is not None:
5708       self._RunAllocator()
5709
5710     #### node related checks
5711
5712     # check primary node
5713     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
5714     assert self.pnode is not None, \
5715       "Cannot retrieve locked node %s" % self.op.pnode
5716     if pnode.offline:
5717       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
5718                                  pnode.name)
5719     if pnode.drained:
5720       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
5721                                  pnode.name)
5722
5723     self.secondaries = []
5724
5725     # mirror node verification
5726     if self.op.disk_template in constants.DTS_NET_MIRROR:
5727       if self.op.snode is None:
5728         raise errors.OpPrereqError("The networked disk templates need"
5729                                    " a mirror node")
5730       if self.op.snode == pnode.name:
5731         raise errors.OpPrereqError("The secondary node cannot be"
5732                                    " the primary node.")
5733       _CheckNodeOnline(self, self.op.snode)
5734       _CheckNodeNotDrained(self, self.op.snode)
5735       self.secondaries.append(self.op.snode)
5736
5737     nodenames = [pnode.name] + self.secondaries
5738
5739     req_size = _ComputeDiskSize(self.op.disk_template,
5740                                 self.disks)
5741
5742     # Check lv size requirements
5743     if req_size is not None:
5744       nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5745                                          self.op.hypervisor)
5746       for node in nodenames:
5747         info = nodeinfo[node]
5748         info.Raise("Cannot get current information from node %s" % node)
5749         info = info.payload
5750         vg_free = info.get('vg_free', None)
5751         if not isinstance(vg_free, int):
5752           raise errors.OpPrereqError("Can't compute free disk space on"
5753                                      " node %s" % node)
5754         if req_size > vg_free:
5755           raise errors.OpPrereqError("Not enough disk space on target node %s."
5756                                      " %d MB available, %d MB required" %
5757                                      (node, vg_free, req_size))
5758
5759     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
5760
5761     # os verification
5762     result = self.rpc.call_os_get(pnode.name, self.op.os_type)
5763     result.Raise("OS '%s' not in supported os list for primary node %s" %
5764                  (self.op.os_type, pnode.name), prereq=True)
5765
5766     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
5767
5768     # memory check on primary node
5769     if self.op.start:
5770       _CheckNodeFreeMemory(self, self.pnode.name,
5771                            "creating instance %s" % self.op.instance_name,
5772                            self.be_full[constants.BE_MEMORY],
5773                            self.op.hypervisor)
5774
5775     self.dry_run_result = list(nodenames)
5776
5777   def Exec(self, feedback_fn):
5778     """Create and add the instance to the cluster.
5779
5780     """
5781     instance = self.op.instance_name
5782     pnode_name = self.pnode.name
5783
5784     ht_kind = self.op.hypervisor
5785     if ht_kind in constants.HTS_REQ_PORT:
5786       network_port = self.cfg.AllocatePort()
5787     else:
5788       network_port = None
5789
5790     ##if self.op.vnc_bind_address is None:
5791     ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5792
5793     # this is needed because os.path.join does not accept None arguments
5794     if self.op.file_storage_dir is None:
5795       string_file_storage_dir = ""
5796     else:
5797       string_file_storage_dir = self.op.file_storage_dir
5798
5799     # build the full file storage dir path
5800     file_storage_dir = os.path.normpath(os.path.join(
5801                                         self.cfg.GetFileStorageDir(),
5802                                         string_file_storage_dir, instance))
5803
5804
5805     disks = _GenerateDiskTemplate(self,
5806                                   self.op.disk_template,
5807                                   instance, pnode_name,
5808                                   self.secondaries,
5809                                   self.disks,
5810                                   file_storage_dir,
5811                                   self.op.file_driver,
5812                                   0)
5813
5814     iobj = objects.Instance(name=instance, os=self.op.os_type,
5815                             primary_node=pnode_name,
5816                             nics=self.nics, disks=disks,
5817                             disk_template=self.op.disk_template,
5818                             admin_up=False,
5819                             network_port=network_port,
5820                             beparams=self.op.beparams,
5821                             hvparams=self.op.hvparams,
5822                             hypervisor=self.op.hypervisor,
5823                             )
5824
5825     feedback_fn("* creating instance disks...")
5826     try:
5827       _CreateDisks(self, iobj)
5828     except errors.OpExecError:
5829       self.LogWarning("Device creation failed, reverting...")
5830       try:
5831         _RemoveDisks(self, iobj)
5832       finally:
5833         self.cfg.ReleaseDRBDMinors(instance)
5834         raise
5835
5836     feedback_fn("adding instance %s to cluster config" % instance)
5837
5838     self.cfg.AddInstance(iobj)
5839     # Declare that we don't want to remove the instance lock anymore, as we've
5840     # added the instance to the config
5841     del self.remove_locks[locking.LEVEL_INSTANCE]
5842     # Unlock all the nodes
5843     if self.op.mode == constants.INSTANCE_IMPORT:
5844       nodes_keep = [self.op.src_node]
5845       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5846                        if node != self.op.src_node]
5847       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5848       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5849     else:
5850       self.context.glm.release(locking.LEVEL_NODE)
5851       del self.acquired_locks[locking.LEVEL_NODE]
5852
5853     if self.op.wait_for_sync:
5854       disk_abort = not _WaitForSync(self, iobj)
5855     elif iobj.disk_template in constants.DTS_NET_MIRROR:
5856       # make sure the disks are not degraded (still sync-ing is ok)
5857       time.sleep(15)
5858       feedback_fn("* checking mirrors status")
5859       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
5860     else:
5861       disk_abort = False
5862
5863     if disk_abort:
5864       _RemoveDisks(self, iobj)
5865       self.cfg.RemoveInstance(iobj.name)
5866       # Make sure the instance lock gets removed
5867       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
5868       raise errors.OpExecError("There are some degraded disks for"
5869                                " this instance")
5870
5871     feedback_fn("creating os for instance %s on node %s" %
5872                 (instance, pnode_name))
5873
5874     if iobj.disk_template != constants.DT_DISKLESS:
5875       if self.op.mode == constants.INSTANCE_CREATE:
5876         feedback_fn("* running the instance OS create scripts...")
5877         result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
5878         result.Raise("Could not add os for instance %s"
5879                      " on node %s" % (instance, pnode_name))
5880
5881       elif self.op.mode == constants.INSTANCE_IMPORT:
5882         feedback_fn("* running the instance OS import scripts...")
5883         src_node = self.op.src_node
5884         src_images = self.src_images
5885         cluster_name = self.cfg.GetClusterName()
5886         import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
5887                                                          src_node, src_images,
5888                                                          cluster_name)
5889         msg = import_result.fail_msg
5890         if msg:
5891           self.LogWarning("Error while importing the disk images for instance"
5892                           " %s on node %s: %s" % (instance, pnode_name, msg))
5893       else:
5894         # also checked in the prereq part
5895         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
5896                                      % self.op.mode)
5897
5898     if self.op.start:
5899       iobj.admin_up = True
5900       self.cfg.Update(iobj)
5901       logging.info("Starting instance %s on node %s", instance, pnode_name)
5902       feedback_fn("* starting instance...")
5903       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
5904       result.Raise("Could not start instance")
5905
5906     return list(iobj.all_nodes)
5907
5908
5909 class LUConnectConsole(NoHooksLU):
5910   """Connect to an instance's console.
5911
5912   This is somewhat special in that it returns the command line that
5913   you need to run on the master node in order to connect to the
5914   console.
5915
5916   """
5917   _OP_REQP = ["instance_name"]
5918   REQ_BGL = False
5919
5920   def ExpandNames(self):
5921     self._ExpandAndLockInstance()
5922
5923   def CheckPrereq(self):
5924     """Check prerequisites.
5925
5926     This checks that the instance is in the cluster.
5927
5928     """
5929     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5930     assert self.instance is not None, \
5931       "Cannot retrieve locked instance %s" % self.op.instance_name
5932     _CheckNodeOnline(self, self.instance.primary_node)
5933
5934   def Exec(self, feedback_fn):
5935     """Connect to the console of an instance
5936
5937     """
5938     instance = self.instance
5939     node = instance.primary_node
5940
5941     node_insts = self.rpc.call_instance_list([node],
5942                                              [instance.hypervisor])[node]
5943     node_insts.Raise("Can't get node information from %s" % node)
5944
5945     if instance.name not in node_insts.payload:
5946       raise errors.OpExecError("Instance %s is not running." % instance.name)
5947
5948     logging.debug("Connecting to console of %s on %s", instance.name, node)
5949
5950     hyper = hypervisor.GetHypervisor(instance.hypervisor)
5951     cluster = self.cfg.GetClusterInfo()
5952     # beparams and hvparams are passed separately, to avoid editing the
5953     # instance and then saving the defaults in the instance itself.
5954     hvparams = cluster.FillHV(instance)
5955     beparams = cluster.FillBE(instance)
5956     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5957
5958     # build ssh cmdline
5959     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5960
5961
5962 class LUReplaceDisks(LogicalUnit):
5963   """Replace the disks of an instance.
5964
5965   """
5966   HPATH = "mirrors-replace"
5967   HTYPE = constants.HTYPE_INSTANCE
5968   _OP_REQP = ["instance_name", "mode", "disks"]
5969   REQ_BGL = False
5970
5971   def CheckArguments(self):
5972     if not hasattr(self.op, "remote_node"):
5973       self.op.remote_node = None
5974     if not hasattr(self.op, "iallocator"):
5975       self.op.iallocator = None
5976
5977     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
5978                                   self.op.iallocator)
5979
5980   def ExpandNames(self):
5981     self._ExpandAndLockInstance()
5982
5983     if self.op.iallocator is not None:
5984       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5985
5986     elif self.op.remote_node is not None:
5987       remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5988       if remote_node is None:
5989         raise errors.OpPrereqError("Node '%s' not known" %
5990                                    self.op.remote_node)
5991
5992       self.op.remote_node = remote_node
5993
5994       # Warning: do not remove the locking of the new secondary here
5995       # unless DRBD8.AddChildren is changed to work in parallel;
5996       # currently it doesn't since parallel invocations of
5997       # FindUnusedMinor will conflict
5998       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5999       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6000
6001     else:
6002       self.needed_locks[locking.LEVEL_NODE] = []
6003       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6004
6005     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6006                                    self.op.iallocator, self.op.remote_node,
6007                                    self.op.disks)
6008
6009     self.tasklets = [self.replacer]
6010
6011   def DeclareLocks(self, level):
6012     # If we're not already locking all nodes in the set we have to declare the
6013     # instance's primary/secondary nodes.
6014     if (level == locking.LEVEL_NODE and
6015         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6016       self._LockInstancesNodes()
6017
6018   def BuildHooksEnv(self):
6019     """Build hooks env.
6020
6021     This runs on the master, the primary and all the secondaries.
6022
6023     """
6024     instance = self.replacer.instance
6025     env = {
6026       "MODE": self.op.mode,
6027       "NEW_SECONDARY": self.op.remote_node,
6028       "OLD_SECONDARY": instance.secondary_nodes[0],
6029       }
6030     env.update(_BuildInstanceHookEnvByObject(self, instance))
6031     nl = [
6032       self.cfg.GetMasterNode(),
6033       instance.primary_node,
6034       ]
6035     if self.op.remote_node is not None:
6036       nl.append(self.op.remote_node)
6037     return env, nl, nl
6038
6039
6040 class LUEvacuateNode(LogicalUnit):
6041   """Relocate the secondary instances from a node.
6042
6043   """
6044   HPATH = "node-evacuate"
6045   HTYPE = constants.HTYPE_NODE
6046   _OP_REQP = ["node_name"]
6047   REQ_BGL = False
6048
6049   def CheckArguments(self):
6050     if not hasattr(self.op, "remote_node"):
6051       self.op.remote_node = None
6052     if not hasattr(self.op, "iallocator"):
6053       self.op.iallocator = None
6054
6055     TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6056                                   self.op.remote_node,
6057                                   self.op.iallocator)
6058
6059   def ExpandNames(self):
6060     self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
6061     if self.op.node_name is None:
6062       raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
6063
6064     self.needed_locks = {}
6065
6066     # Declare node locks
6067     if self.op.iallocator is not None:
6068       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6069
6070     elif self.op.remote_node is not None:
6071       remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6072       if remote_node is None:
6073         raise errors.OpPrereqError("Node '%s' not known" %
6074                                    self.op.remote_node)
6075
6076       self.op.remote_node = remote_node
6077
6078       # Warning: do not remove the locking of the new secondary here
6079       # unless DRBD8.AddChildren is changed to work in parallel;
6080       # currently it doesn't since parallel invocations of
6081       # FindUnusedMinor will conflict
6082       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6083       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6084
6085     else:
6086       raise errors.OpPrereqError("Invalid parameters")
6087
6088     # Create tasklets for replacing disks for all secondary instances on this
6089     # node
6090     names = []
6091     tasklets = []
6092
6093     for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6094       logging.debug("Replacing disks for instance %s", inst.name)
6095       names.append(inst.name)
6096
6097       replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6098                                 self.op.iallocator, self.op.remote_node, [])
6099       tasklets.append(replacer)
6100
6101     self.tasklets = tasklets
6102     self.instance_names = names
6103
6104     # Declare instance locks
6105     self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6106
6107   def DeclareLocks(self, level):
6108     # If we're not already locking all nodes in the set we have to declare the
6109     # instance's primary/secondary nodes.
6110     if (level == locking.LEVEL_NODE and
6111         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6112       self._LockInstancesNodes()
6113
6114   def BuildHooksEnv(self):
6115     """Build hooks env.
6116
6117     This runs on the master, the primary and all the secondaries.
6118
6119     """
6120     env = {
6121       "NODE_NAME": self.op.node_name,
6122       }
6123
6124     nl = [self.cfg.GetMasterNode()]
6125
6126     if self.op.remote_node is not None:
6127       env["NEW_SECONDARY"] = self.op.remote_node
6128       nl.append(self.op.remote_node)
6129
6130     return (env, nl, nl)
6131
6132
6133 class TLReplaceDisks(Tasklet):
6134   """Replaces disks for an instance.
6135
6136   Note: Locking is not within the scope of this class.
6137
6138   """
6139   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6140                disks):
6141     """Initializes this class.
6142
6143     """
6144     Tasklet.__init__(self, lu)
6145
6146     # Parameters
6147     self.instance_name = instance_name
6148     self.mode = mode
6149     self.iallocator_name = iallocator_name
6150     self.remote_node = remote_node
6151     self.disks = disks
6152
6153     # Runtime data
6154     self.instance = None
6155     self.new_node = None
6156     self.target_node = None
6157     self.other_node = None
6158     self.remote_node_info = None
6159     self.node_secondary_ip = None
6160
6161   @staticmethod
6162   def CheckArguments(mode, remote_node, iallocator):
6163     """Helper function for users of this class.
6164
6165     """
6166     # check for valid parameter combination
6167     if mode == constants.REPLACE_DISK_CHG:
6168       if remote_node is None and iallocator is None:
6169         raise errors.OpPrereqError("When changing the secondary either an"
6170                                    " iallocator script must be used or the"
6171                                    " new node given")
6172
6173       if remote_node is not None and iallocator is not None:
6174         raise errors.OpPrereqError("Give either the iallocator or the new"
6175                                    " secondary, not both")
6176
6177     elif remote_node is not None or iallocator is not None:
6178       # Not replacing the secondary
6179       raise errors.OpPrereqError("The iallocator and new node options can"
6180                                  " only be used when changing the"
6181                                  " secondary node")
6182
6183   @staticmethod
6184   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6185     """Compute a new secondary node using an IAllocator.
6186
6187     """
6188     ial = IAllocator(lu.cfg, lu.rpc,
6189                      mode=constants.IALLOCATOR_MODE_RELOC,
6190                      name=instance_name,
6191                      relocate_from=relocate_from)
6192
6193     ial.Run(iallocator_name)
6194
6195     if not ial.success:
6196       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6197                                  " %s" % (iallocator_name, ial.info))
6198
6199     if len(ial.nodes) != ial.required_nodes:
6200       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6201                                  " of nodes (%s), required %s" %
6202                                  (len(ial.nodes), ial.required_nodes))
6203
6204     remote_node_name = ial.nodes[0]
6205
6206     lu.LogInfo("Selected new secondary for instance '%s': %s",
6207                instance_name, remote_node_name)
6208
6209     return remote_node_name
6210
6211   def _FindFaultyDisks(self, node_name):
6212     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6213                                     node_name, True)
6214
6215   def CheckPrereq(self):
6216     """Check prerequisites.
6217
6218     This checks that the instance is in the cluster.
6219
6220     """
6221     self.instance = self.cfg.GetInstanceInfo(self.instance_name)
6222     assert self.instance is not None, \
6223       "Cannot retrieve locked instance %s" % self.instance_name
6224
6225     if self.instance.disk_template != constants.DT_DRBD8:
6226       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6227                                  " instances")
6228
6229     if len(self.instance.secondary_nodes) != 1:
6230       raise errors.OpPrereqError("The instance has a strange layout,"
6231                                  " expected one secondary but found %d" %
6232                                  len(self.instance.secondary_nodes))
6233
6234     secondary_node = self.instance.secondary_nodes[0]
6235
6236     if self.iallocator_name is None:
6237       remote_node = self.remote_node
6238     else:
6239       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6240                                        self.instance.name, secondary_node)
6241
6242     if remote_node is not None:
6243       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6244       assert self.remote_node_info is not None, \
6245         "Cannot retrieve locked node %s" % remote_node
6246     else:
6247       self.remote_node_info = None
6248
6249     if remote_node == self.instance.primary_node:
6250       raise errors.OpPrereqError("The specified node is the primary node of"
6251                                  " the instance.")
6252
6253     if remote_node == secondary_node:
6254       raise errors.OpPrereqError("The specified node is already the"
6255                                  " secondary node of the instance.")
6256
6257     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6258                                     constants.REPLACE_DISK_CHG):
6259       raise errors.OpPrereqError("Cannot specify disks to be replaced")
6260
6261     if self.mode == constants.REPLACE_DISK_AUTO:
6262       faulty_primary = self._FindFaultyDisks(self.instance.primary_node)
6263       faulty_secondary = self._FindFaultyDisks(secondary_node)
6264
6265       if faulty_primary and faulty_secondary:
6266         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6267                                    " one node and can not be repaired"
6268                                    " automatically" % self.instance_name)
6269
6270       if faulty_primary:
6271         self.disks = faulty_primary
6272         self.target_node = self.instance.primary_node
6273         self.other_node = secondary_node
6274         check_nodes = [self.target_node, self.other_node]
6275       elif faulty_secondary:
6276         self.disks = faulty_secondary
6277         self.target_node = secondary_node
6278         self.other_node = self.instance.primary_node
6279         check_nodes = [self.target_node, self.other_node]
6280       else:
6281         self.disks = []
6282         check_nodes = []
6283
6284     else:
6285       # Non-automatic modes
6286       if self.mode == constants.REPLACE_DISK_PRI:
6287         self.target_node = self.instance.primary_node
6288         self.other_node = secondary_node
6289         check_nodes = [self.target_node, self.other_node]
6290
6291       elif self.mode == constants.REPLACE_DISK_SEC:
6292         self.target_node = secondary_node
6293         self.other_node = self.instance.primary_node
6294         check_nodes = [self.target_node, self.other_node]
6295
6296       elif self.mode == constants.REPLACE_DISK_CHG:
6297         self.new_node = remote_node
6298         self.other_node = self.instance.primary_node
6299         self.target_node = secondary_node
6300         check_nodes = [self.new_node, self.other_node]
6301
6302         _CheckNodeNotDrained(self.lu, remote_node)
6303
6304       else:
6305         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6306                                      self.mode)
6307
6308       # If not specified all disks should be replaced
6309       if not self.disks:
6310         self.disks = range(len(self.instance.disks))
6311
6312     for node in check_nodes:
6313       _CheckNodeOnline(self.lu, node)
6314
6315     # Check whether disks are valid
6316     for disk_idx in self.disks:
6317       self.instance.FindDisk(disk_idx)
6318
6319     # Get secondary node IP addresses
6320     node_2nd_ip = {}
6321
6322     for node_name in [self.target_node, self.other_node, self.new_node]:
6323       if node_name is not None:
6324         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6325
6326     self.node_secondary_ip = node_2nd_ip
6327
6328   def Exec(self, feedback_fn):
6329     """Execute disk replacement.
6330
6331     This dispatches the disk replacement to the appropriate handler.
6332
6333     """
6334     if not self.disks:
6335       feedback_fn("No disks need replacement")
6336       return
6337
6338     feedback_fn("Replacing disk(s) %s for %s" %
6339                 (", ".join([str(i) for i in self.disks]), self.instance.name))
6340
6341     activate_disks = (not self.instance.admin_up)
6342
6343     # Activate the instance disks if we're replacing them on a down instance
6344     if activate_disks:
6345       _StartInstanceDisks(self.lu, self.instance, True)
6346
6347     try:
6348       # Should we replace the secondary node?
6349       if self.new_node is not None:
6350         return self._ExecDrbd8Secondary()
6351       else:
6352         return self._ExecDrbd8DiskOnly()
6353
6354     finally:
6355       # Deactivate the instance disks if we're replacing them on a down instance
6356       if activate_disks:
6357         _SafeShutdownInstanceDisks(self.lu, self.instance)
6358
6359   def _CheckVolumeGroup(self, nodes):
6360     self.lu.LogInfo("Checking volume groups")
6361
6362     vgname = self.cfg.GetVGName()
6363
6364     # Make sure volume group exists on all involved nodes
6365     results = self.rpc.call_vg_list(nodes)
6366     if not results:
6367       raise errors.OpExecError("Can't list volume groups on the nodes")
6368
6369     for node in nodes:
6370       res = results[node]
6371       res.Raise("Error checking node %s" % node)
6372       if vgname not in res.payload:
6373         raise errors.OpExecError("Volume group '%s' not found on node %s" %
6374                                  (vgname, node))
6375
6376   def _CheckDisksExistence(self, nodes):
6377     # Check disk existence
6378     for idx, dev in enumerate(self.instance.disks):
6379       if idx not in self.disks:
6380         continue
6381
6382       for node in nodes:
6383         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6384         self.cfg.SetDiskID(dev, node)
6385
6386         result = self.rpc.call_blockdev_find(node, dev)
6387
6388         msg = result.fail_msg
6389         if msg or not result.payload:
6390           if not msg:
6391             msg = "disk not found"
6392           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6393                                    (idx, node, msg))
6394
6395   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6396     for idx, dev in enumerate(self.instance.disks):
6397       if idx not in self.disks:
6398         continue
6399
6400       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6401                       (idx, node_name))
6402
6403       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6404                                    ldisk=ldisk):
6405         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6406                                  " replace disks for instance %s" %
6407                                  (node_name, self.instance.name))
6408
6409   def _CreateNewStorage(self, node_name):
6410     vgname = self.cfg.GetVGName()
6411     iv_names = {}
6412
6413     for idx, dev in enumerate(self.instance.disks):
6414       if idx not in self.disks:
6415         continue
6416
6417       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6418
6419       self.cfg.SetDiskID(dev, node_name)
6420
6421       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6422       names = _GenerateUniqueNames(self.lu, lv_names)
6423
6424       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6425                              logical_id=(vgname, names[0]))
6426       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6427                              logical_id=(vgname, names[1]))
6428
6429       new_lvs = [lv_data, lv_meta]
6430       old_lvs = dev.children
6431       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6432
6433       # we pass force_create=True to force the LVM creation
6434       for new_lv in new_lvs:
6435         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6436                         _GetInstanceInfoText(self.instance), False)
6437
6438     return iv_names
6439
6440   def _CheckDevices(self, node_name, iv_names):
6441     for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
6442       self.cfg.SetDiskID(dev, node_name)
6443
6444       result = self.rpc.call_blockdev_find(node_name, dev)
6445
6446       msg = result.fail_msg
6447       if msg or not result.payload:
6448         if not msg:
6449           msg = "disk not found"
6450         raise errors.OpExecError("Can't find DRBD device %s: %s" %
6451                                  (name, msg))
6452
6453       if result.payload.is_degraded:
6454         raise errors.OpExecError("DRBD device %s is degraded!" % name)
6455
6456   def _RemoveOldStorage(self, node_name, iv_names):
6457     for name, (dev, old_lvs, _) in iv_names.iteritems():
6458       self.lu.LogInfo("Remove logical volumes for %s" % name)
6459
6460       for lv in old_lvs:
6461         self.cfg.SetDiskID(lv, node_name)
6462
6463         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6464         if msg:
6465           self.lu.LogWarning("Can't remove old LV: %s" % msg,
6466                              hint="remove unused LVs manually")
6467
6468   def _ExecDrbd8DiskOnly(self):
6469     """Replace a disk on the primary or secondary for DRBD 8.
6470
6471     The algorithm for replace is quite complicated:
6472
6473       1. for each disk to be replaced:
6474
6475         1. create new LVs on the target node with unique names
6476         1. detach old LVs from the drbd device
6477         1. rename old LVs to name_replaced.<time_t>
6478         1. rename new LVs to old LVs
6479         1. attach the new LVs (with the old names now) to the drbd device
6480
6481       1. wait for sync across all devices
6482
6483       1. for each modified disk:
6484
6485         1. remove old LVs (which have the name name_replaces.<time_t>)
6486
6487     Failures are not very well handled.
6488
6489     """
6490     steps_total = 6
6491
6492     # Step: check device activation
6493     self.lu.LogStep(1, steps_total, "Check device existence")
6494     self._CheckDisksExistence([self.other_node, self.target_node])
6495     self._CheckVolumeGroup([self.target_node, self.other_node])
6496
6497     # Step: check other node consistency
6498     self.lu.LogStep(2, steps_total, "Check peer consistency")
6499     self._CheckDisksConsistency(self.other_node,
6500                                 self.other_node == self.instance.primary_node,
6501                                 False)
6502
6503     # Step: create new storage
6504     self.lu.LogStep(3, steps_total, "Allocate new storage")
6505     iv_names = self._CreateNewStorage(self.target_node)
6506
6507     # Step: for each lv, detach+rename*2+attach
6508     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6509     for dev, old_lvs, new_lvs in iv_names.itervalues():
6510       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6511
6512       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
6513                                                      old_lvs)
6514       result.Raise("Can't detach drbd from local storage on node"
6515                    " %s for device %s" % (self.target_node, dev.iv_name))
6516       #dev.children = []
6517       #cfg.Update(instance)
6518
6519       # ok, we created the new LVs, so now we know we have the needed
6520       # storage; as such, we proceed on the target node to rename
6521       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6522       # using the assumption that logical_id == physical_id (which in
6523       # turn is the unique_id on that node)
6524
6525       # FIXME(iustin): use a better name for the replaced LVs
6526       temp_suffix = int(time.time())
6527       ren_fn = lambda d, suff: (d.physical_id[0],
6528                                 d.physical_id[1] + "_replaced-%s" % suff)
6529
6530       # Build the rename list based on what LVs exist on the node
6531       rename_old_to_new = []
6532       for to_ren in old_lvs:
6533         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6534         if not result.fail_msg and result.payload:
6535           # device exists
6536           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6537
6538       self.lu.LogInfo("Renaming the old LVs on the target node")
6539       result = self.rpc.call_blockdev_rename(self.target_node,
6540                                              rename_old_to_new)
6541       result.Raise("Can't rename old LVs on node %s" % self.target_node)
6542
6543       # Now we rename the new LVs to the old LVs
6544       self.lu.LogInfo("Renaming the new LVs on the target node")
6545       rename_new_to_old = [(new, old.physical_id)
6546                            for old, new in zip(old_lvs, new_lvs)]
6547       result = self.rpc.call_blockdev_rename(self.target_node,
6548                                              rename_new_to_old)
6549       result.Raise("Can't rename new LVs on node %s" % self.target_node)
6550
6551       for old, new in zip(old_lvs, new_lvs):
6552         new.logical_id = old.logical_id
6553         self.cfg.SetDiskID(new, self.target_node)
6554
6555       for disk in old_lvs:
6556         disk.logical_id = ren_fn(disk, temp_suffix)
6557         self.cfg.SetDiskID(disk, self.target_node)
6558
6559       # Now that the new lvs have the old name, we can add them to the device
6560       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6561       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
6562                                                   new_lvs)
6563       msg = result.fail_msg
6564       if msg:
6565         for new_lv in new_lvs:
6566           msg2 = self.rpc.call_blockdev_remove(self.target_node,
6567                                                new_lv).fail_msg
6568           if msg2:
6569             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6570                                hint=("cleanup manually the unused logical"
6571                                      "volumes"))
6572         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6573
6574       dev.children = new_lvs
6575
6576       self.cfg.Update(self.instance)
6577
6578     # Wait for sync
6579     # This can fail as the old devices are degraded and _WaitForSync
6580     # does a combined result over all disks, so we don't check its return value
6581     self.lu.LogStep(5, steps_total, "Sync devices")
6582     _WaitForSync(self.lu, self.instance, unlock=True)
6583
6584     # Check all devices manually
6585     self._CheckDevices(self.instance.primary_node, iv_names)
6586
6587     # Step: remove old storage
6588     self.lu.LogStep(6, steps_total, "Removing old storage")
6589     self._RemoveOldStorage(self.target_node, iv_names)
6590
6591   def _ExecDrbd8Secondary(self):
6592     """Replace the secondary node for DRBD 8.
6593
6594     The algorithm for replace is quite complicated:
6595       - for all disks of the instance:
6596         - create new LVs on the new node with same names
6597         - shutdown the drbd device on the old secondary
6598         - disconnect the drbd network on the primary
6599         - create the drbd device on the new secondary
6600         - network attach the drbd on the primary, using an artifice:
6601           the drbd code for Attach() will connect to the network if it
6602           finds a device which is connected to the good local disks but
6603           not network enabled
6604       - wait for sync across all devices
6605       - remove all disks from the old secondary
6606
6607     Failures are not very well handled.
6608
6609     """
6610     steps_total = 6
6611
6612     # Step: check device activation
6613     self.lu.LogStep(1, steps_total, "Check device existence")
6614     self._CheckDisksExistence([self.instance.primary_node])
6615     self._CheckVolumeGroup([self.instance.primary_node])
6616
6617     # Step: check other node consistency
6618     self.lu.LogStep(2, steps_total, "Check peer consistency")
6619     self._CheckDisksConsistency(self.instance.primary_node, True, True)
6620
6621     # Step: create new storage
6622     self.lu.LogStep(3, steps_total, "Allocate new storage")
6623     for idx, dev in enumerate(self.instance.disks):
6624       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
6625                       (self.new_node, idx))
6626       # we pass force_create=True to force LVM creation
6627       for new_lv in dev.children:
6628         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
6629                         _GetInstanceInfoText(self.instance), False)
6630
6631     # Step 4: dbrd minors and drbd setups changes
6632     # after this, we must manually remove the drbd minors on both the
6633     # error and the success paths
6634     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6635     minors = self.cfg.AllocateDRBDMinor([self.new_node
6636                                          for dev in self.instance.disks],
6637                                         self.instance.name)
6638     logging.debug("Allocated minors %r" % (minors,))
6639
6640     iv_names = {}
6641     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
6642       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
6643                       (self.new_node, idx))
6644       # create new devices on new_node; note that we create two IDs:
6645       # one without port, so the drbd will be activated without
6646       # networking information on the new node at this stage, and one
6647       # with network, for the latter activation in step 4
6648       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
6649       if self.instance.primary_node == o_node1:
6650         p_minor = o_minor1
6651       else:
6652         p_minor = o_minor2
6653
6654       new_alone_id = (self.instance.primary_node, self.new_node, None,
6655                       p_minor, new_minor, o_secret)
6656       new_net_id = (self.instance.primary_node, self.new_node, o_port,
6657                     p_minor, new_minor, o_secret)
6658
6659       iv_names[idx] = (dev, dev.children, new_net_id)
6660       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
6661                     new_net_id)
6662       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
6663                               logical_id=new_alone_id,
6664                               children=dev.children,
6665                               size=dev.size)
6666       try:
6667         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
6668                               _GetInstanceInfoText(self.instance), False)
6669       except errors.GenericError:
6670         self.cfg.ReleaseDRBDMinors(self.instance.name)
6671         raise
6672
6673     # We have new devices, shutdown the drbd on the old secondary
6674     for idx, dev in enumerate(self.instance.disks):
6675       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
6676       self.cfg.SetDiskID(dev, self.target_node)
6677       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
6678       if msg:
6679         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
6680                            "node: %s" % (idx, msg),
6681                            hint=("Please cleanup this device manually as"
6682                                  " soon as possible"))
6683
6684     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
6685     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
6686                                                self.node_secondary_ip,
6687                                                self.instance.disks)\
6688                                               [self.instance.primary_node]
6689
6690     msg = result.fail_msg
6691     if msg:
6692       # detaches didn't succeed (unlikely)
6693       self.cfg.ReleaseDRBDMinors(self.instance.name)
6694       raise errors.OpExecError("Can't detach the disks from the network on"
6695                                " old node: %s" % (msg,))
6696
6697     # if we managed to detach at least one, we update all the disks of
6698     # the instance to point to the new secondary
6699     self.lu.LogInfo("Updating instance configuration")
6700     for dev, _, new_logical_id in iv_names.itervalues():
6701       dev.logical_id = new_logical_id
6702       self.cfg.SetDiskID(dev, self.instance.primary_node)
6703
6704     self.cfg.Update(self.instance)
6705
6706     # and now perform the drbd attach
6707     self.lu.LogInfo("Attaching primary drbds to new secondary"
6708                     " (standalone => connected)")
6709     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
6710                                             self.new_node],
6711                                            self.node_secondary_ip,
6712                                            self.instance.disks,
6713                                            self.instance.name,
6714                                            False)
6715     for to_node, to_result in result.items():
6716       msg = to_result.fail_msg
6717       if msg:
6718         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
6719                            to_node, msg,
6720                            hint=("please do a gnt-instance info to see the"
6721                                  " status of disks"))
6722
6723     # Wait for sync
6724     # This can fail as the old devices are degraded and _WaitForSync
6725     # does a combined result over all disks, so we don't check its return value
6726     self.lu.LogStep(5, steps_total, "Sync devices")
6727     _WaitForSync(self.lu, self.instance, unlock=True)
6728
6729     # Check all devices manually
6730     self._CheckDevices(self.instance.primary_node, iv_names)
6731
6732     # Step: remove old storage
6733     self.lu.LogStep(6, steps_total, "Removing old storage")
6734     self._RemoveOldStorage(self.target_node, iv_names)
6735
6736
6737 class LURepairNodeStorage(NoHooksLU):
6738   """Repairs the volume group on a node.
6739
6740   """
6741   _OP_REQP = ["node_name"]
6742   REQ_BGL = False
6743
6744   def CheckArguments(self):
6745     node_name = self.cfg.ExpandNodeName(self.op.node_name)
6746     if node_name is None:
6747       raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
6748
6749     self.op.node_name = node_name
6750
6751   def ExpandNames(self):
6752     self.needed_locks = {
6753       locking.LEVEL_NODE: [self.op.node_name],
6754       }
6755
6756   def _CheckFaultyDisks(self, instance, node_name):
6757     if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
6758                                 node_name, True):
6759       raise errors.OpPrereqError("Instance '%s' has faulty disks on"
6760                                  " node '%s'" % (instance.name, node_name))
6761
6762   def CheckPrereq(self):
6763     """Check prerequisites.
6764
6765     """
6766     storage_type = self.op.storage_type
6767
6768     if (constants.SO_FIX_CONSISTENCY not in
6769         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
6770       raise errors.OpPrereqError("Storage units of type '%s' can not be"
6771                                  " repaired" % storage_type)
6772
6773     # Check whether any instance on this node has faulty disks
6774     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
6775       check_nodes = set(inst.all_nodes)
6776       check_nodes.discard(self.op.node_name)
6777       for inst_node_name in check_nodes:
6778         self._CheckFaultyDisks(inst, inst_node_name)
6779
6780   def Exec(self, feedback_fn):
6781     feedback_fn("Repairing storage unit '%s' on %s ..." %
6782                 (self.op.name, self.op.node_name))
6783
6784     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
6785     result = self.rpc.call_storage_execute(self.op.node_name,
6786                                            self.op.storage_type, st_args,
6787                                            self.op.name,
6788                                            constants.SO_FIX_CONSISTENCY)
6789     result.Raise("Failed to repair storage unit '%s' on %s" %
6790                  (self.op.name, self.op.node_name))
6791
6792
6793 class LUGrowDisk(LogicalUnit):
6794   """Grow a disk of an instance.
6795
6796   """
6797   HPATH = "disk-grow"
6798   HTYPE = constants.HTYPE_INSTANCE
6799   _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
6800   REQ_BGL = False
6801
6802   def ExpandNames(self):
6803     self._ExpandAndLockInstance()
6804     self.needed_locks[locking.LEVEL_NODE] = []
6805     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6806
6807   def DeclareLocks(self, level):
6808     if level == locking.LEVEL_NODE:
6809       self._LockInstancesNodes()
6810
6811   def BuildHooksEnv(self):
6812     """Build hooks env.
6813
6814     This runs on the master, the primary and all the secondaries.
6815
6816     """
6817     env = {
6818       "DISK": self.op.disk,
6819       "AMOUNT": self.op.amount,
6820       }
6821     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6822     nl = [
6823       self.cfg.GetMasterNode(),
6824       self.instance.primary_node,
6825       ]
6826     return env, nl, nl
6827
6828   def CheckPrereq(self):
6829     """Check prerequisites.
6830
6831     This checks that the instance is in the cluster.
6832
6833     """
6834     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6835     assert instance is not None, \
6836       "Cannot retrieve locked instance %s" % self.op.instance_name
6837     nodenames = list(instance.all_nodes)
6838     for node in nodenames:
6839       _CheckNodeOnline(self, node)
6840
6841
6842     self.instance = instance
6843
6844     if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
6845       raise errors.OpPrereqError("Instance's disk layout does not support"
6846                                  " growing.")
6847
6848     self.disk = instance.FindDisk(self.op.disk)
6849
6850     nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6851                                        instance.hypervisor)
6852     for node in nodenames:
6853       info = nodeinfo[node]
6854       info.Raise("Cannot get current information from node %s" % node)
6855       vg_free = info.payload.get('vg_free', None)
6856       if not isinstance(vg_free, int):
6857         raise errors.OpPrereqError("Can't compute free disk space on"
6858                                    " node %s" % node)
6859       if self.op.amount > vg_free:
6860         raise errors.OpPrereqError("Not enough disk space on target node %s:"
6861                                    " %d MiB available, %d MiB required" %
6862                                    (node, vg_free, self.op.amount))
6863
6864   def Exec(self, feedback_fn):
6865     """Execute disk grow.
6866
6867     """
6868     instance = self.instance
6869     disk = self.disk
6870     for node in instance.all_nodes:
6871       self.cfg.SetDiskID(disk, node)
6872       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
6873       result.Raise("Grow request failed to node %s" % node)
6874     disk.RecordGrow(self.op.amount)
6875     self.cfg.Update(instance)
6876     if self.op.wait_for_sync:
6877       disk_abort = not _WaitForSync(self, instance)
6878       if disk_abort:
6879         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
6880                              " status.\nPlease check the instance.")
6881
6882
6883 class LUQueryInstanceData(NoHooksLU):
6884   """Query runtime instance data.
6885
6886   """
6887   _OP_REQP = ["instances", "static"]
6888   REQ_BGL = False
6889
6890   def ExpandNames(self):
6891     self.needed_locks = {}
6892     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6893
6894     if not isinstance(self.op.instances, list):
6895       raise errors.OpPrereqError("Invalid argument type 'instances'")
6896
6897     if self.op.instances:
6898       self.wanted_names = []
6899       for name in self.op.instances:
6900         full_name = self.cfg.ExpandInstanceName(name)
6901         if full_name is None:
6902           raise errors.OpPrereqError("Instance '%s' not known" % name)
6903         self.wanted_names.append(full_name)
6904       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
6905     else:
6906       self.wanted_names = None
6907       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
6908
6909     self.needed_locks[locking.LEVEL_NODE] = []
6910     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6911
6912   def DeclareLocks(self, level):
6913     if level == locking.LEVEL_NODE:
6914       self._LockInstancesNodes()
6915
6916   def CheckPrereq(self):
6917     """Check prerequisites.
6918
6919     This only checks the optional instance list against the existing names.
6920
6921     """
6922     if self.wanted_names is None:
6923       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
6924
6925     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
6926                              in self.wanted_names]
6927     return
6928
6929   def _ComputeBlockdevStatus(self, node, instance_name, dev):
6930     """Returns the status of a block device
6931
6932     """
6933     if self.op.static or not node:
6934       return None
6935
6936     self.cfg.SetDiskID(dev, node)
6937
6938     result = self.rpc.call_blockdev_find(node, dev)
6939     if result.offline:
6940       return None
6941
6942     result.Raise("Can't compute disk status for %s" % instance_name)
6943
6944     status = result.payload
6945     if status is None:
6946       return None
6947
6948     return (status.dev_path, status.major, status.minor,
6949             status.sync_percent, status.estimated_time,
6950             status.is_degraded, status.ldisk_status)
6951
6952   def _ComputeDiskStatus(self, instance, snode, dev):
6953     """Compute block device status.
6954
6955     """
6956     if dev.dev_type in constants.LDS_DRBD:
6957       # we change the snode then (otherwise we use the one passed in)
6958       if dev.logical_id[0] == instance.primary_node:
6959         snode = dev.logical_id[1]
6960       else:
6961         snode = dev.logical_id[0]
6962
6963     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
6964                                               instance.name, dev)
6965     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
6966
6967     if dev.children:
6968       dev_children = [self._ComputeDiskStatus(instance, snode, child)
6969                       for child in dev.children]
6970     else:
6971       dev_children = []
6972
6973     data = {
6974       "iv_name": dev.iv_name,
6975       "dev_type": dev.dev_type,
6976       "logical_id": dev.logical_id,
6977       "physical_id": dev.physical_id,
6978       "pstatus": dev_pstatus,
6979       "sstatus": dev_sstatus,
6980       "children": dev_children,
6981       "mode": dev.mode,
6982       "size": dev.size,
6983       }
6984
6985     return data
6986
6987   def Exec(self, feedback_fn):
6988     """Gather and return data"""
6989     result = {}
6990
6991     cluster = self.cfg.GetClusterInfo()
6992
6993     for instance in self.wanted_instances:
6994       if not self.op.static:
6995         remote_info = self.rpc.call_instance_info(instance.primary_node,
6996                                                   instance.name,
6997                                                   instance.hypervisor)
6998         remote_info.Raise("Error checking node %s" % instance.primary_node)
6999         remote_info = remote_info.payload
7000         if remote_info and "state" in remote_info:
7001           remote_state = "up"
7002         else:
7003           remote_state = "down"
7004       else:
7005         remote_state = None
7006       if instance.admin_up:
7007         config_state = "up"
7008       else:
7009         config_state = "down"
7010
7011       disks = [self._ComputeDiskStatus(instance, None, device)
7012                for device in instance.disks]
7013
7014       idict = {
7015         "name": instance.name,
7016         "config_state": config_state,
7017         "run_state": remote_state,
7018         "pnode": instance.primary_node,
7019         "snodes": instance.secondary_nodes,
7020         "os": instance.os,
7021         # this happens to be the same format used for hooks
7022         "nics": _NICListToTuple(self, instance.nics),
7023         "disks": disks,
7024         "hypervisor": instance.hypervisor,
7025         "network_port": instance.network_port,
7026         "hv_instance": instance.hvparams,
7027         "hv_actual": cluster.FillHV(instance),
7028         "be_instance": instance.beparams,
7029         "be_actual": cluster.FillBE(instance),
7030         "serial_no": instance.serial_no,
7031         "mtime": instance.mtime,
7032         "ctime": instance.ctime,
7033         }
7034
7035       result[instance.name] = idict
7036
7037     return result
7038
7039
7040 class LUSetInstanceParams(LogicalUnit):
7041   """Modifies an instances's parameters.
7042
7043   """
7044   HPATH = "instance-modify"
7045   HTYPE = constants.HTYPE_INSTANCE
7046   _OP_REQP = ["instance_name"]
7047   REQ_BGL = False
7048
7049   def CheckArguments(self):
7050     if not hasattr(self.op, 'nics'):
7051       self.op.nics = []
7052     if not hasattr(self.op, 'disks'):
7053       self.op.disks = []
7054     if not hasattr(self.op, 'beparams'):
7055       self.op.beparams = {}
7056     if not hasattr(self.op, 'hvparams'):
7057       self.op.hvparams = {}
7058     self.op.force = getattr(self.op, "force", False)
7059     if not (self.op.nics or self.op.disks or
7060             self.op.hvparams or self.op.beparams):
7061       raise errors.OpPrereqError("No changes submitted")
7062
7063     # Disk validation
7064     disk_addremove = 0
7065     for disk_op, disk_dict in self.op.disks:
7066       if disk_op == constants.DDM_REMOVE:
7067         disk_addremove += 1
7068         continue
7069       elif disk_op == constants.DDM_ADD:
7070         disk_addremove += 1
7071       else:
7072         if not isinstance(disk_op, int):
7073           raise errors.OpPrereqError("Invalid disk index")
7074         if not isinstance(disk_dict, dict):
7075           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7076           raise errors.OpPrereqError(msg)
7077
7078       if disk_op == constants.DDM_ADD:
7079         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7080         if mode not in constants.DISK_ACCESS_SET:
7081           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
7082         size = disk_dict.get('size', None)
7083         if size is None:
7084           raise errors.OpPrereqError("Required disk parameter size missing")
7085         try:
7086           size = int(size)
7087         except ValueError, err:
7088           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7089                                      str(err))
7090         disk_dict['size'] = size
7091       else:
7092         # modification of disk
7093         if 'size' in disk_dict:
7094           raise errors.OpPrereqError("Disk size change not possible, use"
7095                                      " grow-disk")
7096
7097     if disk_addremove > 1:
7098       raise errors.OpPrereqError("Only one disk add or remove operation"
7099                                  " supported at a time")
7100
7101     # NIC validation
7102     nic_addremove = 0
7103     for nic_op, nic_dict in self.op.nics:
7104       if nic_op == constants.DDM_REMOVE:
7105         nic_addremove += 1
7106         continue
7107       elif nic_op == constants.DDM_ADD:
7108         nic_addremove += 1
7109       else:
7110         if not isinstance(nic_op, int):
7111           raise errors.OpPrereqError("Invalid nic index")
7112         if not isinstance(nic_dict, dict):
7113           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7114           raise errors.OpPrereqError(msg)
7115
7116       # nic_dict should be a dict
7117       nic_ip = nic_dict.get('ip', None)
7118       if nic_ip is not None:
7119         if nic_ip.lower() == constants.VALUE_NONE:
7120           nic_dict['ip'] = None
7121         else:
7122           if not utils.IsValidIP(nic_ip):
7123             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
7124
7125       nic_bridge = nic_dict.get('bridge', None)
7126       nic_link = nic_dict.get('link', None)
7127       if nic_bridge and nic_link:
7128         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7129                                    " at the same time")
7130       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7131         nic_dict['bridge'] = None
7132       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7133         nic_dict['link'] = None
7134
7135       if nic_op == constants.DDM_ADD:
7136         nic_mac = nic_dict.get('mac', None)
7137         if nic_mac is None:
7138           nic_dict['mac'] = constants.VALUE_AUTO
7139
7140       if 'mac' in nic_dict:
7141         nic_mac = nic_dict['mac']
7142         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7143           if not utils.IsValidMac(nic_mac):
7144             raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
7145         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7146           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7147                                      " modifying an existing nic")
7148
7149     if nic_addremove > 1:
7150       raise errors.OpPrereqError("Only one NIC add or remove operation"
7151                                  " supported at a time")
7152
7153   def ExpandNames(self):
7154     self._ExpandAndLockInstance()
7155     self.needed_locks[locking.LEVEL_NODE] = []
7156     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7157
7158   def DeclareLocks(self, level):
7159     if level == locking.LEVEL_NODE:
7160       self._LockInstancesNodes()
7161
7162   def BuildHooksEnv(self):
7163     """Build hooks env.
7164
7165     This runs on the master, primary and secondaries.
7166
7167     """
7168     args = dict()
7169     if constants.BE_MEMORY in self.be_new:
7170       args['memory'] = self.be_new[constants.BE_MEMORY]
7171     if constants.BE_VCPUS in self.be_new:
7172       args['vcpus'] = self.be_new[constants.BE_VCPUS]
7173     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7174     # information at all.
7175     if self.op.nics:
7176       args['nics'] = []
7177       nic_override = dict(self.op.nics)
7178       c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7179       for idx, nic in enumerate(self.instance.nics):
7180         if idx in nic_override:
7181           this_nic_override = nic_override[idx]
7182         else:
7183           this_nic_override = {}
7184         if 'ip' in this_nic_override:
7185           ip = this_nic_override['ip']
7186         else:
7187           ip = nic.ip
7188         if 'mac' in this_nic_override:
7189           mac = this_nic_override['mac']
7190         else:
7191           mac = nic.mac
7192         if idx in self.nic_pnew:
7193           nicparams = self.nic_pnew[idx]
7194         else:
7195           nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7196         mode = nicparams[constants.NIC_MODE]
7197         link = nicparams[constants.NIC_LINK]
7198         args['nics'].append((ip, mac, mode, link))
7199       if constants.DDM_ADD in nic_override:
7200         ip = nic_override[constants.DDM_ADD].get('ip', None)
7201         mac = nic_override[constants.DDM_ADD]['mac']
7202         nicparams = self.nic_pnew[constants.DDM_ADD]
7203         mode = nicparams[constants.NIC_MODE]
7204         link = nicparams[constants.NIC_LINK]
7205         args['nics'].append((ip, mac, mode, link))
7206       elif constants.DDM_REMOVE in nic_override:
7207         del args['nics'][-1]
7208
7209     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7210     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7211     return env, nl, nl
7212
7213   def _GetUpdatedParams(self, old_params, update_dict,
7214                         default_values, parameter_types):
7215     """Return the new params dict for the given params.
7216
7217     @type old_params: dict
7218     @param old_params: old parameters
7219     @type update_dict: dict
7220     @param update_dict: dict containing new parameter values,
7221                         or constants.VALUE_DEFAULT to reset the
7222                         parameter to its default value
7223     @type default_values: dict
7224     @param default_values: default values for the filled parameters
7225     @type parameter_types: dict
7226     @param parameter_types: dict mapping target dict keys to types
7227                             in constants.ENFORCEABLE_TYPES
7228     @rtype: (dict, dict)
7229     @return: (new_parameters, filled_parameters)
7230
7231     """
7232     params_copy = copy.deepcopy(old_params)
7233     for key, val in update_dict.iteritems():
7234       if val == constants.VALUE_DEFAULT:
7235         try:
7236           del params_copy[key]
7237         except KeyError:
7238           pass
7239       else:
7240         params_copy[key] = val
7241     utils.ForceDictType(params_copy, parameter_types)
7242     params_filled = objects.FillDict(default_values, params_copy)
7243     return (params_copy, params_filled)
7244
7245   def CheckPrereq(self):
7246     """Check prerequisites.
7247
7248     This only checks the instance list against the existing names.
7249
7250     """
7251     self.force = self.op.force
7252
7253     # checking the new params on the primary/secondary nodes
7254
7255     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7256     cluster = self.cluster = self.cfg.GetClusterInfo()
7257     assert self.instance is not None, \
7258       "Cannot retrieve locked instance %s" % self.op.instance_name
7259     pnode = instance.primary_node
7260     nodelist = list(instance.all_nodes)
7261
7262     # hvparams processing
7263     if self.op.hvparams:
7264       i_hvdict, hv_new = self._GetUpdatedParams(
7265                              instance.hvparams, self.op.hvparams,
7266                              cluster.hvparams[instance.hypervisor],
7267                              constants.HVS_PARAMETER_TYPES)
7268       # local check
7269       hypervisor.GetHypervisor(
7270         instance.hypervisor).CheckParameterSyntax(hv_new)
7271       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7272       self.hv_new = hv_new # the new actual values
7273       self.hv_inst = i_hvdict # the new dict (without defaults)
7274     else:
7275       self.hv_new = self.hv_inst = {}
7276
7277     # beparams processing
7278     if self.op.beparams:
7279       i_bedict, be_new = self._GetUpdatedParams(
7280                              instance.beparams, self.op.beparams,
7281                              cluster.beparams[constants.PP_DEFAULT],
7282                              constants.BES_PARAMETER_TYPES)
7283       self.be_new = be_new # the new actual values
7284       self.be_inst = i_bedict # the new dict (without defaults)
7285     else:
7286       self.be_new = self.be_inst = {}
7287
7288     self.warn = []
7289
7290     if constants.BE_MEMORY in self.op.beparams and not self.force:
7291       mem_check_list = [pnode]
7292       if be_new[constants.BE_AUTO_BALANCE]:
7293         # either we changed auto_balance to yes or it was from before
7294         mem_check_list.extend(instance.secondary_nodes)
7295       instance_info = self.rpc.call_instance_info(pnode, instance.name,
7296                                                   instance.hypervisor)
7297       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7298                                          instance.hypervisor)
7299       pninfo = nodeinfo[pnode]
7300       msg = pninfo.fail_msg
7301       if msg:
7302         # Assume the primary node is unreachable and go ahead
7303         self.warn.append("Can't get info from primary node %s: %s" %
7304                          (pnode,  msg))
7305       elif not isinstance(pninfo.payload.get('memory_free', None), int):
7306         self.warn.append("Node data from primary node %s doesn't contain"
7307                          " free memory information" % pnode)
7308       elif instance_info.fail_msg:
7309         self.warn.append("Can't get instance runtime information: %s" %
7310                         instance_info.fail_msg)
7311       else:
7312         if instance_info.payload:
7313           current_mem = int(instance_info.payload['memory'])
7314         else:
7315           # Assume instance not running
7316           # (there is a slight race condition here, but it's not very probable,
7317           # and we have no other way to check)
7318           current_mem = 0
7319         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7320                     pninfo.payload['memory_free'])
7321         if miss_mem > 0:
7322           raise errors.OpPrereqError("This change will prevent the instance"
7323                                      " from starting, due to %d MB of memory"
7324                                      " missing on its primary node" % miss_mem)
7325
7326       if be_new[constants.BE_AUTO_BALANCE]:
7327         for node, nres in nodeinfo.items():
7328           if node not in instance.secondary_nodes:
7329             continue
7330           msg = nres.fail_msg
7331           if msg:
7332             self.warn.append("Can't get info from secondary node %s: %s" %
7333                              (node, msg))
7334           elif not isinstance(nres.payload.get('memory_free', None), int):
7335             self.warn.append("Secondary node %s didn't return free"
7336                              " memory information" % node)
7337           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7338             self.warn.append("Not enough memory to failover instance to"
7339                              " secondary node %s" % node)
7340
7341     # NIC processing
7342     self.nic_pnew = {}
7343     self.nic_pinst = {}
7344     for nic_op, nic_dict in self.op.nics:
7345       if nic_op == constants.DDM_REMOVE:
7346         if not instance.nics:
7347           raise errors.OpPrereqError("Instance has no NICs, cannot remove")
7348         continue
7349       if nic_op != constants.DDM_ADD:
7350         # an existing nic
7351         if nic_op < 0 or nic_op >= len(instance.nics):
7352           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7353                                      " are 0 to %d" %
7354                                      (nic_op, len(instance.nics)))
7355         old_nic_params = instance.nics[nic_op].nicparams
7356         old_nic_ip = instance.nics[nic_op].ip
7357       else:
7358         old_nic_params = {}
7359         old_nic_ip = None
7360
7361       update_params_dict = dict([(key, nic_dict[key])
7362                                  for key in constants.NICS_PARAMETERS
7363                                  if key in nic_dict])
7364
7365       if 'bridge' in nic_dict:
7366         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7367
7368       new_nic_params, new_filled_nic_params = \
7369           self._GetUpdatedParams(old_nic_params, update_params_dict,
7370                                  cluster.nicparams[constants.PP_DEFAULT],
7371                                  constants.NICS_PARAMETER_TYPES)
7372       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7373       self.nic_pinst[nic_op] = new_nic_params
7374       self.nic_pnew[nic_op] = new_filled_nic_params
7375       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7376
7377       if new_nic_mode == constants.NIC_MODE_BRIDGED:
7378         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7379         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7380         if msg:
7381           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7382           if self.force:
7383             self.warn.append(msg)
7384           else:
7385             raise errors.OpPrereqError(msg)
7386       if new_nic_mode == constants.NIC_MODE_ROUTED:
7387         if 'ip' in nic_dict:
7388           nic_ip = nic_dict['ip']
7389         else:
7390           nic_ip = old_nic_ip
7391         if nic_ip is None:
7392           raise errors.OpPrereqError('Cannot set the nic ip to None'
7393                                      ' on a routed nic')
7394       if 'mac' in nic_dict:
7395         nic_mac = nic_dict['mac']
7396         if nic_mac is None:
7397           raise errors.OpPrereqError('Cannot set the nic mac to None')
7398         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7399           # otherwise generate the mac
7400           nic_dict['mac'] = self.cfg.GenerateMAC()
7401         else:
7402           # or validate/reserve the current one
7403           if self.cfg.IsMacInUse(nic_mac):
7404             raise errors.OpPrereqError("MAC address %s already in use"
7405                                        " in cluster" % nic_mac)
7406
7407     # DISK processing
7408     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7409       raise errors.OpPrereqError("Disk operations not supported for"
7410                                  " diskless instances")
7411     for disk_op, disk_dict in self.op.disks:
7412       if disk_op == constants.DDM_REMOVE:
7413         if len(instance.disks) == 1:
7414           raise errors.OpPrereqError("Cannot remove the last disk of"
7415                                      " an instance")
7416         ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7417         ins_l = ins_l[pnode]
7418         msg = ins_l.fail_msg
7419         if msg:
7420           raise errors.OpPrereqError("Can't contact node %s: %s" %
7421                                      (pnode, msg))
7422         if instance.name in ins_l.payload:
7423           raise errors.OpPrereqError("Instance is running, can't remove"
7424                                      " disks.")
7425
7426       if (disk_op == constants.DDM_ADD and
7427           len(instance.nics) >= constants.MAX_DISKS):
7428         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7429                                    " add more" % constants.MAX_DISKS)
7430       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7431         # an existing disk
7432         if disk_op < 0 or disk_op >= len(instance.disks):
7433           raise errors.OpPrereqError("Invalid disk index %s, valid values"
7434                                      " are 0 to %d" %
7435                                      (disk_op, len(instance.disks)))
7436
7437     return
7438
7439   def Exec(self, feedback_fn):
7440     """Modifies an instance.
7441
7442     All parameters take effect only at the next restart of the instance.
7443
7444     """
7445     # Process here the warnings from CheckPrereq, as we don't have a
7446     # feedback_fn there.
7447     for warn in self.warn:
7448       feedback_fn("WARNING: %s" % warn)
7449
7450     result = []
7451     instance = self.instance
7452     cluster = self.cluster
7453     # disk changes
7454     for disk_op, disk_dict in self.op.disks:
7455       if disk_op == constants.DDM_REMOVE:
7456         # remove the last disk
7457         device = instance.disks.pop()
7458         device_idx = len(instance.disks)
7459         for node, disk in device.ComputeNodeTree(instance.primary_node):
7460           self.cfg.SetDiskID(disk, node)
7461           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7462           if msg:
7463             self.LogWarning("Could not remove disk/%d on node %s: %s,"
7464                             " continuing anyway", device_idx, node, msg)
7465         result.append(("disk/%d" % device_idx, "remove"))
7466       elif disk_op == constants.DDM_ADD:
7467         # add a new disk
7468         if instance.disk_template == constants.DT_FILE:
7469           file_driver, file_path = instance.disks[0].logical_id
7470           file_path = os.path.dirname(file_path)
7471         else:
7472           file_driver = file_path = None
7473         disk_idx_base = len(instance.disks)
7474         new_disk = _GenerateDiskTemplate(self,
7475                                          instance.disk_template,
7476                                          instance.name, instance.primary_node,
7477                                          instance.secondary_nodes,
7478                                          [disk_dict],
7479                                          file_path,
7480                                          file_driver,
7481                                          disk_idx_base)[0]
7482         instance.disks.append(new_disk)
7483         info = _GetInstanceInfoText(instance)
7484
7485         logging.info("Creating volume %s for instance %s",
7486                      new_disk.iv_name, instance.name)
7487         # Note: this needs to be kept in sync with _CreateDisks
7488         #HARDCODE
7489         for node in instance.all_nodes:
7490           f_create = node == instance.primary_node
7491           try:
7492             _CreateBlockDev(self, node, instance, new_disk,
7493                             f_create, info, f_create)
7494           except errors.OpExecError, err:
7495             self.LogWarning("Failed to create volume %s (%s) on"
7496                             " node %s: %s",
7497                             new_disk.iv_name, new_disk, node, err)
7498         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7499                        (new_disk.size, new_disk.mode)))
7500       else:
7501         # change a given disk
7502         instance.disks[disk_op].mode = disk_dict['mode']
7503         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7504     # NIC changes
7505     for nic_op, nic_dict in self.op.nics:
7506       if nic_op == constants.DDM_REMOVE:
7507         # remove the last nic
7508         del instance.nics[-1]
7509         result.append(("nic.%d" % len(instance.nics), "remove"))
7510       elif nic_op == constants.DDM_ADD:
7511         # mac and bridge should be set, by now
7512         mac = nic_dict['mac']
7513         ip = nic_dict.get('ip', None)
7514         nicparams = self.nic_pinst[constants.DDM_ADD]
7515         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7516         instance.nics.append(new_nic)
7517         result.append(("nic.%d" % (len(instance.nics) - 1),
7518                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
7519                        (new_nic.mac, new_nic.ip,
7520                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7521                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7522                        )))
7523       else:
7524         for key in 'mac', 'ip':
7525           if key in nic_dict:
7526             setattr(instance.nics[nic_op], key, nic_dict[key])
7527         if nic_op in self.nic_pnew:
7528           instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
7529         for key, val in nic_dict.iteritems():
7530           result.append(("nic.%s/%d" % (key, nic_op), val))
7531
7532     # hvparams changes
7533     if self.op.hvparams:
7534       instance.hvparams = self.hv_inst
7535       for key, val in self.op.hvparams.iteritems():
7536         result.append(("hv/%s" % key, val))
7537
7538     # beparams changes
7539     if self.op.beparams:
7540       instance.beparams = self.be_inst
7541       for key, val in self.op.beparams.iteritems():
7542         result.append(("be/%s" % key, val))
7543
7544     self.cfg.Update(instance)
7545
7546     return result
7547
7548
7549 class LUQueryExports(NoHooksLU):
7550   """Query the exports list
7551
7552   """
7553   _OP_REQP = ['nodes']
7554   REQ_BGL = False
7555
7556   def ExpandNames(self):
7557     self.needed_locks = {}
7558     self.share_locks[locking.LEVEL_NODE] = 1
7559     if not self.op.nodes:
7560       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7561     else:
7562       self.needed_locks[locking.LEVEL_NODE] = \
7563         _GetWantedNodes(self, self.op.nodes)
7564
7565   def CheckPrereq(self):
7566     """Check prerequisites.
7567
7568     """
7569     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
7570
7571   def Exec(self, feedback_fn):
7572     """Compute the list of all the exported system images.
7573
7574     @rtype: dict
7575     @return: a dictionary with the structure node->(export-list)
7576         where export-list is a list of the instances exported on
7577         that node.
7578
7579     """
7580     rpcresult = self.rpc.call_export_list(self.nodes)
7581     result = {}
7582     for node in rpcresult:
7583       if rpcresult[node].fail_msg:
7584         result[node] = False
7585       else:
7586         result[node] = rpcresult[node].payload
7587
7588     return result
7589
7590
7591 class LUExportInstance(LogicalUnit):
7592   """Export an instance to an image in the cluster.
7593
7594   """
7595   HPATH = "instance-export"
7596   HTYPE = constants.HTYPE_INSTANCE
7597   _OP_REQP = ["instance_name", "target_node", "shutdown"]
7598   REQ_BGL = False
7599
7600   def ExpandNames(self):
7601     self._ExpandAndLockInstance()
7602     # FIXME: lock only instance primary and destination node
7603     #
7604     # Sad but true, for now we have do lock all nodes, as we don't know where
7605     # the previous export might be, and and in this LU we search for it and
7606     # remove it from its current node. In the future we could fix this by:
7607     #  - making a tasklet to search (share-lock all), then create the new one,
7608     #    then one to remove, after
7609     #  - removing the removal operation altogether
7610     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7611
7612   def DeclareLocks(self, level):
7613     """Last minute lock declaration."""
7614     # All nodes are locked anyway, so nothing to do here.
7615
7616   def BuildHooksEnv(self):
7617     """Build hooks env.
7618
7619     This will run on the master, primary node and target node.
7620
7621     """
7622     env = {
7623       "EXPORT_NODE": self.op.target_node,
7624       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
7625       }
7626     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7627     nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
7628           self.op.target_node]
7629     return env, nl, nl
7630
7631   def CheckPrereq(self):
7632     """Check prerequisites.
7633
7634     This checks that the instance and node names are valid.
7635
7636     """
7637     instance_name = self.op.instance_name
7638     self.instance = self.cfg.GetInstanceInfo(instance_name)
7639     assert self.instance is not None, \
7640           "Cannot retrieve locked instance %s" % self.op.instance_name
7641     _CheckNodeOnline(self, self.instance.primary_node)
7642
7643     self.dst_node = self.cfg.GetNodeInfo(
7644       self.cfg.ExpandNodeName(self.op.target_node))
7645
7646     if self.dst_node is None:
7647       # This is wrong node name, not a non-locked node
7648       raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
7649     _CheckNodeOnline(self, self.dst_node.name)
7650     _CheckNodeNotDrained(self, self.dst_node.name)
7651
7652     # instance disk type verification
7653     for disk in self.instance.disks:
7654       if disk.dev_type == constants.LD_FILE:
7655         raise errors.OpPrereqError("Export not supported for instances with"
7656                                    " file-based disks")
7657
7658   def Exec(self, feedback_fn):
7659     """Export an instance to an image in the cluster.
7660
7661     """
7662     instance = self.instance
7663     dst_node = self.dst_node
7664     src_node = instance.primary_node
7665
7666     if self.op.shutdown:
7667       # shutdown the instance, but not the disks
7668       feedback_fn("Shutting down instance %s" % instance.name)
7669       result = self.rpc.call_instance_shutdown(src_node, instance)
7670       result.Raise("Could not shutdown instance %s on"
7671                    " node %s" % (instance.name, src_node))
7672
7673     vgname = self.cfg.GetVGName()
7674
7675     snap_disks = []
7676
7677     # set the disks ID correctly since call_instance_start needs the
7678     # correct drbd minor to create the symlinks
7679     for disk in instance.disks:
7680       self.cfg.SetDiskID(disk, src_node)
7681
7682     # per-disk results
7683     dresults = []
7684     try:
7685       for idx, disk in enumerate(instance.disks):
7686         feedback_fn("Creating a snapshot of disk/%s on node %s" %
7687                     (idx, src_node))
7688
7689         # result.payload will be a snapshot of an lvm leaf of the one we passed
7690         result = self.rpc.call_blockdev_snapshot(src_node, disk)
7691         msg = result.fail_msg
7692         if msg:
7693           self.LogWarning("Could not snapshot disk/%s on node %s: %s",
7694                           idx, src_node, msg)
7695           snap_disks.append(False)
7696         else:
7697           disk_id = (vgname, result.payload)
7698           new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
7699                                  logical_id=disk_id, physical_id=disk_id,
7700                                  iv_name=disk.iv_name)
7701           snap_disks.append(new_dev)
7702
7703     finally:
7704       if self.op.shutdown and instance.admin_up:
7705         feedback_fn("Starting instance %s" % instance.name)
7706         result = self.rpc.call_instance_start(src_node, instance, None, None)
7707         msg = result.fail_msg
7708         if msg:
7709           _ShutdownInstanceDisks(self, instance)
7710           raise errors.OpExecError("Could not start instance: %s" % msg)
7711
7712     # TODO: check for size
7713
7714     cluster_name = self.cfg.GetClusterName()
7715     for idx, dev in enumerate(snap_disks):
7716       feedback_fn("Exporting snapshot %s from %s to %s" %
7717                   (idx, src_node, dst_node.name))
7718       if dev:
7719         result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
7720                                                instance, cluster_name, idx)
7721         msg = result.fail_msg
7722         if msg:
7723           self.LogWarning("Could not export disk/%s from node %s to"
7724                           " node %s: %s", idx, src_node, dst_node.name, msg)
7725           dresults.append(False)
7726         else:
7727           dresults.append(True)
7728         msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
7729         if msg:
7730           self.LogWarning("Could not remove snapshot for disk/%d from node"
7731                           " %s: %s", idx, src_node, msg)
7732       else:
7733         dresults.append(False)
7734
7735     feedback_fn("Finalizing export on %s" % dst_node.name)
7736     result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
7737     fin_resu = True
7738     msg = result.fail_msg
7739     if msg:
7740       self.LogWarning("Could not finalize export for instance %s"
7741                       " on node %s: %s", instance.name, dst_node.name, msg)
7742       fin_resu = False
7743
7744     nodelist = self.cfg.GetNodeList()
7745     nodelist.remove(dst_node.name)
7746
7747     # on one-node clusters nodelist will be empty after the removal
7748     # if we proceed the backup would be removed because OpQueryExports
7749     # substitutes an empty list with the full cluster node list.
7750     iname = instance.name
7751     if nodelist:
7752       feedback_fn("Removing old exports for instance %s" % iname)
7753       exportlist = self.rpc.call_export_list(nodelist)
7754       for node in exportlist:
7755         if exportlist[node].fail_msg:
7756           continue
7757         if iname in exportlist[node].payload:
7758           msg = self.rpc.call_export_remove(node, iname).fail_msg
7759           if msg:
7760             self.LogWarning("Could not remove older export for instance %s"
7761                             " on node %s: %s", iname, node, msg)
7762     return fin_resu, dresults
7763
7764
7765 class LURemoveExport(NoHooksLU):
7766   """Remove exports related to the named instance.
7767
7768   """
7769   _OP_REQP = ["instance_name"]
7770   REQ_BGL = False
7771
7772   def ExpandNames(self):
7773     self.needed_locks = {}
7774     # We need all nodes to be locked in order for RemoveExport to work, but we
7775     # don't need to lock the instance itself, as nothing will happen to it (and
7776     # we can remove exports also for a removed instance)
7777     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7778
7779   def CheckPrereq(self):
7780     """Check prerequisites.
7781     """
7782     pass
7783
7784   def Exec(self, feedback_fn):
7785     """Remove any export.
7786
7787     """
7788     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
7789     # If the instance was not found we'll try with the name that was passed in.
7790     # This will only work if it was an FQDN, though.
7791     fqdn_warn = False
7792     if not instance_name:
7793       fqdn_warn = True
7794       instance_name = self.op.instance_name
7795
7796     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7797     exportlist = self.rpc.call_export_list(locked_nodes)
7798     found = False
7799     for node in exportlist:
7800       msg = exportlist[node].fail_msg
7801       if msg:
7802         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
7803         continue
7804       if instance_name in exportlist[node].payload:
7805         found = True
7806         result = self.rpc.call_export_remove(node, instance_name)
7807         msg = result.fail_msg
7808         if msg:
7809           logging.error("Could not remove export for instance %s"
7810                         " on node %s: %s", instance_name, node, msg)
7811
7812     if fqdn_warn and not found:
7813       feedback_fn("Export not found. If trying to remove an export belonging"
7814                   " to a deleted instance please use its Fully Qualified"
7815                   " Domain Name.")
7816
7817
7818 class TagsLU(NoHooksLU):
7819   """Generic tags LU.
7820
7821   This is an abstract class which is the parent of all the other tags LUs.
7822
7823   """
7824
7825   def ExpandNames(self):
7826     self.needed_locks = {}
7827     if self.op.kind == constants.TAG_NODE:
7828       name = self.cfg.ExpandNodeName(self.op.name)
7829       if name is None:
7830         raise errors.OpPrereqError("Invalid node name (%s)" %
7831                                    (self.op.name,))
7832       self.op.name = name
7833       self.needed_locks[locking.LEVEL_NODE] = name
7834     elif self.op.kind == constants.TAG_INSTANCE:
7835       name = self.cfg.ExpandInstanceName(self.op.name)
7836       if name is None:
7837         raise errors.OpPrereqError("Invalid instance name (%s)" %
7838                                    (self.op.name,))
7839       self.op.name = name
7840       self.needed_locks[locking.LEVEL_INSTANCE] = name
7841
7842   def CheckPrereq(self):
7843     """Check prerequisites.
7844
7845     """
7846     if self.op.kind == constants.TAG_CLUSTER:
7847       self.target = self.cfg.GetClusterInfo()
7848     elif self.op.kind == constants.TAG_NODE:
7849       self.target = self.cfg.GetNodeInfo(self.op.name)
7850     elif self.op.kind == constants.TAG_INSTANCE:
7851       self.target = self.cfg.GetInstanceInfo(self.op.name)
7852     else:
7853       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
7854                                  str(self.op.kind))
7855
7856
7857 class LUGetTags(TagsLU):
7858   """Returns the tags of a given object.
7859
7860   """
7861   _OP_REQP = ["kind", "name"]
7862   REQ_BGL = False
7863
7864   def Exec(self, feedback_fn):
7865     """Returns the tag list.
7866
7867     """
7868     return list(self.target.GetTags())
7869
7870
7871 class LUSearchTags(NoHooksLU):
7872   """Searches the tags for a given pattern.
7873
7874   """
7875   _OP_REQP = ["pattern"]
7876   REQ_BGL = False
7877
7878   def ExpandNames(self):
7879     self.needed_locks = {}
7880
7881   def CheckPrereq(self):
7882     """Check prerequisites.
7883
7884     This checks the pattern passed for validity by compiling it.
7885
7886     """
7887     try:
7888       self.re = re.compile(self.op.pattern)
7889     except re.error, err:
7890       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
7891                                  (self.op.pattern, err))
7892
7893   def Exec(self, feedback_fn):
7894     """Returns the tag list.
7895
7896     """
7897     cfg = self.cfg
7898     tgts = [("/cluster", cfg.GetClusterInfo())]
7899     ilist = cfg.GetAllInstancesInfo().values()
7900     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
7901     nlist = cfg.GetAllNodesInfo().values()
7902     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
7903     results = []
7904     for path, target in tgts:
7905       for tag in target.GetTags():
7906         if self.re.search(tag):
7907           results.append((path, tag))
7908     return results
7909
7910
7911 class LUAddTags(TagsLU):
7912   """Sets a tag on a given object.
7913
7914   """
7915   _OP_REQP = ["kind", "name", "tags"]
7916   REQ_BGL = False
7917
7918   def CheckPrereq(self):
7919     """Check prerequisites.
7920
7921     This checks the type and length of the tag name and value.
7922
7923     """
7924     TagsLU.CheckPrereq(self)
7925     for tag in self.op.tags:
7926       objects.TaggableObject.ValidateTag(tag)
7927
7928   def Exec(self, feedback_fn):
7929     """Sets the tag.
7930
7931     """
7932     try:
7933       for tag in self.op.tags:
7934         self.target.AddTag(tag)
7935     except errors.TagError, err:
7936       raise errors.OpExecError("Error while setting tag: %s" % str(err))
7937     try:
7938       self.cfg.Update(self.target)
7939     except errors.ConfigurationError:
7940       raise errors.OpRetryError("There has been a modification to the"
7941                                 " config file and the operation has been"
7942                                 " aborted. Please retry.")
7943
7944
7945 class LUDelTags(TagsLU):
7946   """Delete a list of tags from a given object.
7947
7948   """
7949   _OP_REQP = ["kind", "name", "tags"]
7950   REQ_BGL = False
7951
7952   def CheckPrereq(self):
7953     """Check prerequisites.
7954
7955     This checks that we have the given tag.
7956
7957     """
7958     TagsLU.CheckPrereq(self)
7959     for tag in self.op.tags:
7960       objects.TaggableObject.ValidateTag(tag)
7961     del_tags = frozenset(self.op.tags)
7962     cur_tags = self.target.GetTags()
7963     if not del_tags <= cur_tags:
7964       diff_tags = del_tags - cur_tags
7965       diff_names = ["'%s'" % tag for tag in diff_tags]
7966       diff_names.sort()
7967       raise errors.OpPrereqError("Tag(s) %s not found" %
7968                                  (",".join(diff_names)))
7969
7970   def Exec(self, feedback_fn):
7971     """Remove the tag from the object.
7972
7973     """
7974     for tag in self.op.tags:
7975       self.target.RemoveTag(tag)
7976     try:
7977       self.cfg.Update(self.target)
7978     except errors.ConfigurationError:
7979       raise errors.OpRetryError("There has been a modification to the"
7980                                 " config file and the operation has been"
7981                                 " aborted. Please retry.")
7982
7983
7984 class LUTestDelay(NoHooksLU):
7985   """Sleep for a specified amount of time.
7986
7987   This LU sleeps on the master and/or nodes for a specified amount of
7988   time.
7989
7990   """
7991   _OP_REQP = ["duration", "on_master", "on_nodes"]
7992   REQ_BGL = False
7993
7994   def ExpandNames(self):
7995     """Expand names and set required locks.
7996
7997     This expands the node list, if any.
7998
7999     """
8000     self.needed_locks = {}
8001     if self.op.on_nodes:
8002       # _GetWantedNodes can be used here, but is not always appropriate to use
8003       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8004       # more information.
8005       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8006       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8007
8008   def CheckPrereq(self):
8009     """Check prerequisites.
8010
8011     """
8012
8013   def Exec(self, feedback_fn):
8014     """Do the actual sleep.
8015
8016     """
8017     if self.op.on_master:
8018       if not utils.TestDelay(self.op.duration):
8019         raise errors.OpExecError("Error during master delay test")
8020     if self.op.on_nodes:
8021       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8022       for node, node_result in result.items():
8023         node_result.Raise("Failure during rpc call to node %s" % node)
8024
8025
8026 class IAllocator(object):
8027   """IAllocator framework.
8028
8029   An IAllocator instance has three sets of attributes:
8030     - cfg that is needed to query the cluster
8031     - input data (all members of the _KEYS class attribute are required)
8032     - four buffer attributes (in|out_data|text), that represent the
8033       input (to the external script) in text and data structure format,
8034       and the output from it, again in two formats
8035     - the result variables from the script (success, info, nodes) for
8036       easy usage
8037
8038   """
8039   _ALLO_KEYS = [
8040     "mem_size", "disks", "disk_template",
8041     "os", "tags", "nics", "vcpus", "hypervisor",
8042     ]
8043   _RELO_KEYS = [
8044     "relocate_from",
8045     ]
8046
8047   def __init__(self, cfg, rpc, mode, name, **kwargs):
8048     self.cfg = cfg
8049     self.rpc = rpc
8050     # init buffer variables
8051     self.in_text = self.out_text = self.in_data = self.out_data = None
8052     # init all input fields so that pylint is happy
8053     self.mode = mode
8054     self.name = name
8055     self.mem_size = self.disks = self.disk_template = None
8056     self.os = self.tags = self.nics = self.vcpus = None
8057     self.hypervisor = None
8058     self.relocate_from = None
8059     # computed fields
8060     self.required_nodes = None
8061     # init result fields
8062     self.success = self.info = self.nodes = None
8063     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8064       keyset = self._ALLO_KEYS
8065     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8066       keyset = self._RELO_KEYS
8067     else:
8068       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8069                                    " IAllocator" % self.mode)
8070     for key in kwargs:
8071       if key not in keyset:
8072         raise errors.ProgrammerError("Invalid input parameter '%s' to"
8073                                      " IAllocator" % key)
8074       setattr(self, key, kwargs[key])
8075     for key in keyset:
8076       if key not in kwargs:
8077         raise errors.ProgrammerError("Missing input parameter '%s' to"
8078                                      " IAllocator" % key)
8079     self._BuildInputData()
8080
8081   def _ComputeClusterData(self):
8082     """Compute the generic allocator input data.
8083
8084     This is the data that is independent of the actual operation.
8085
8086     """
8087     cfg = self.cfg
8088     cluster_info = cfg.GetClusterInfo()
8089     # cluster data
8090     data = {
8091       "version": constants.IALLOCATOR_VERSION,
8092       "cluster_name": cfg.GetClusterName(),
8093       "cluster_tags": list(cluster_info.GetTags()),
8094       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8095       # we don't have job IDs
8096       }
8097     iinfo = cfg.GetAllInstancesInfo().values()
8098     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8099
8100     # node data
8101     node_results = {}
8102     node_list = cfg.GetNodeList()
8103
8104     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8105       hypervisor_name = self.hypervisor
8106     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8107       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8108
8109     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8110                                         hypervisor_name)
8111     node_iinfo = \
8112       self.rpc.call_all_instances_info(node_list,
8113                                        cluster_info.enabled_hypervisors)
8114     for nname, nresult in node_data.items():
8115       # first fill in static (config-based) values
8116       ninfo = cfg.GetNodeInfo(nname)
8117       pnr = {
8118         "tags": list(ninfo.GetTags()),
8119         "primary_ip": ninfo.primary_ip,
8120         "secondary_ip": ninfo.secondary_ip,
8121         "offline": ninfo.offline,
8122         "drained": ninfo.drained,
8123         "master_candidate": ninfo.master_candidate,
8124         }
8125
8126       if not (ninfo.offline or ninfo.drained):
8127         nresult.Raise("Can't get data for node %s" % nname)
8128         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8129                                 nname)
8130         remote_info = nresult.payload
8131
8132         for attr in ['memory_total', 'memory_free', 'memory_dom0',
8133                      'vg_size', 'vg_free', 'cpu_total']:
8134           if attr not in remote_info:
8135             raise errors.OpExecError("Node '%s' didn't return attribute"
8136                                      " '%s'" % (nname, attr))
8137           if not isinstance(remote_info[attr], int):
8138             raise errors.OpExecError("Node '%s' returned invalid value"
8139                                      " for '%s': %s" %
8140                                      (nname, attr, remote_info[attr]))
8141         # compute memory used by primary instances
8142         i_p_mem = i_p_up_mem = 0
8143         for iinfo, beinfo in i_list:
8144           if iinfo.primary_node == nname:
8145             i_p_mem += beinfo[constants.BE_MEMORY]
8146             if iinfo.name not in node_iinfo[nname].payload:
8147               i_used_mem = 0
8148             else:
8149               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8150             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8151             remote_info['memory_free'] -= max(0, i_mem_diff)
8152
8153             if iinfo.admin_up:
8154               i_p_up_mem += beinfo[constants.BE_MEMORY]
8155
8156         # compute memory used by instances
8157         pnr_dyn = {
8158           "total_memory": remote_info['memory_total'],
8159           "reserved_memory": remote_info['memory_dom0'],
8160           "free_memory": remote_info['memory_free'],
8161           "total_disk": remote_info['vg_size'],
8162           "free_disk": remote_info['vg_free'],
8163           "total_cpus": remote_info['cpu_total'],
8164           "i_pri_memory": i_p_mem,
8165           "i_pri_up_memory": i_p_up_mem,
8166           }
8167         pnr.update(pnr_dyn)
8168
8169       node_results[nname] = pnr
8170     data["nodes"] = node_results
8171
8172     # instance data
8173     instance_data = {}
8174     for iinfo, beinfo in i_list:
8175       nic_data = []
8176       for nic in iinfo.nics:
8177         filled_params = objects.FillDict(
8178             cluster_info.nicparams[constants.PP_DEFAULT],
8179             nic.nicparams)
8180         nic_dict = {"mac": nic.mac,
8181                     "ip": nic.ip,
8182                     "mode": filled_params[constants.NIC_MODE],
8183                     "link": filled_params[constants.NIC_LINK],
8184                    }
8185         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8186           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8187         nic_data.append(nic_dict)
8188       pir = {
8189         "tags": list(iinfo.GetTags()),
8190         "admin_up": iinfo.admin_up,
8191         "vcpus": beinfo[constants.BE_VCPUS],
8192         "memory": beinfo[constants.BE_MEMORY],
8193         "os": iinfo.os,
8194         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8195         "nics": nic_data,
8196         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8197         "disk_template": iinfo.disk_template,
8198         "hypervisor": iinfo.hypervisor,
8199         }
8200       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8201                                                  pir["disks"])
8202       instance_data[iinfo.name] = pir
8203
8204     data["instances"] = instance_data
8205
8206     self.in_data = data
8207
8208   def _AddNewInstance(self):
8209     """Add new instance data to allocator structure.
8210
8211     This in combination with _AllocatorGetClusterData will create the
8212     correct structure needed as input for the allocator.
8213
8214     The checks for the completeness of the opcode must have already been
8215     done.
8216
8217     """
8218     data = self.in_data
8219
8220     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8221
8222     if self.disk_template in constants.DTS_NET_MIRROR:
8223       self.required_nodes = 2
8224     else:
8225       self.required_nodes = 1
8226     request = {
8227       "type": "allocate",
8228       "name": self.name,
8229       "disk_template": self.disk_template,
8230       "tags": self.tags,
8231       "os": self.os,
8232       "vcpus": self.vcpus,
8233       "memory": self.mem_size,
8234       "disks": self.disks,
8235       "disk_space_total": disk_space,
8236       "nics": self.nics,
8237       "required_nodes": self.required_nodes,
8238       }
8239     data["request"] = request
8240
8241   def _AddRelocateInstance(self):
8242     """Add relocate instance data to allocator structure.
8243
8244     This in combination with _IAllocatorGetClusterData will create the
8245     correct structure needed as input for the allocator.
8246
8247     The checks for the completeness of the opcode must have already been
8248     done.
8249
8250     """
8251     instance = self.cfg.GetInstanceInfo(self.name)
8252     if instance is None:
8253       raise errors.ProgrammerError("Unknown instance '%s' passed to"
8254                                    " IAllocator" % self.name)
8255
8256     if instance.disk_template not in constants.DTS_NET_MIRROR:
8257       raise errors.OpPrereqError("Can't relocate non-mirrored instances")
8258
8259     if len(instance.secondary_nodes) != 1:
8260       raise errors.OpPrereqError("Instance has not exactly one secondary node")
8261
8262     self.required_nodes = 1
8263     disk_sizes = [{'size': disk.size} for disk in instance.disks]
8264     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8265
8266     request = {
8267       "type": "relocate",
8268       "name": self.name,
8269       "disk_space_total": disk_space,
8270       "required_nodes": self.required_nodes,
8271       "relocate_from": self.relocate_from,
8272       }
8273     self.in_data["request"] = request
8274
8275   def _BuildInputData(self):
8276     """Build input data structures.
8277
8278     """
8279     self._ComputeClusterData()
8280
8281     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8282       self._AddNewInstance()
8283     else:
8284       self._AddRelocateInstance()
8285
8286     self.in_text = serializer.Dump(self.in_data)
8287
8288   def Run(self, name, validate=True, call_fn=None):
8289     """Run an instance allocator and return the results.
8290
8291     """
8292     if call_fn is None:
8293       call_fn = self.rpc.call_iallocator_runner
8294
8295     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8296     result.Raise("Failure while running the iallocator script")
8297
8298     self.out_text = result.payload
8299     if validate:
8300       self._ValidateResult()
8301
8302   def _ValidateResult(self):
8303     """Process the allocator results.
8304
8305     This will process and if successful save the result in
8306     self.out_data and the other parameters.
8307
8308     """
8309     try:
8310       rdict = serializer.Load(self.out_text)
8311     except Exception, err:
8312       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8313
8314     if not isinstance(rdict, dict):
8315       raise errors.OpExecError("Can't parse iallocator results: not a dict")
8316
8317     for key in "success", "info", "nodes":
8318       if key not in rdict:
8319         raise errors.OpExecError("Can't parse iallocator results:"
8320                                  " missing key '%s'" % key)
8321       setattr(self, key, rdict[key])
8322
8323     if not isinstance(rdict["nodes"], list):
8324       raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8325                                " is not a list")
8326     self.out_data = rdict
8327
8328
8329 class LUTestAllocator(NoHooksLU):
8330   """Run allocator tests.
8331
8332   This LU runs the allocator tests
8333
8334   """
8335   _OP_REQP = ["direction", "mode", "name"]
8336
8337   def CheckPrereq(self):
8338     """Check prerequisites.
8339
8340     This checks the opcode parameters depending on the director and mode test.
8341
8342     """
8343     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8344       for attr in ["name", "mem_size", "disks", "disk_template",
8345                    "os", "tags", "nics", "vcpus"]:
8346         if not hasattr(self.op, attr):
8347           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8348                                      attr)
8349       iname = self.cfg.ExpandInstanceName(self.op.name)
8350       if iname is not None:
8351         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8352                                    iname)
8353       if not isinstance(self.op.nics, list):
8354         raise errors.OpPrereqError("Invalid parameter 'nics'")
8355       for row in self.op.nics:
8356         if (not isinstance(row, dict) or
8357             "mac" not in row or
8358             "ip" not in row or
8359             "bridge" not in row):
8360           raise errors.OpPrereqError("Invalid contents of the"
8361                                      " 'nics' parameter")
8362       if not isinstance(self.op.disks, list):
8363         raise errors.OpPrereqError("Invalid parameter 'disks'")
8364       for row in self.op.disks:
8365         if (not isinstance(row, dict) or
8366             "size" not in row or
8367             not isinstance(row["size"], int) or
8368             "mode" not in row or
8369             row["mode"] not in ['r', 'w']):
8370           raise errors.OpPrereqError("Invalid contents of the"
8371                                      " 'disks' parameter")
8372       if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8373         self.op.hypervisor = self.cfg.GetHypervisorType()
8374     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8375       if not hasattr(self.op, "name"):
8376         raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
8377       fname = self.cfg.ExpandInstanceName(self.op.name)
8378       if fname is None:
8379         raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8380                                    self.op.name)
8381       self.op.name = fname
8382       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8383     else:
8384       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8385                                  self.op.mode)
8386
8387     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8388       if not hasattr(self.op, "allocator") or self.op.allocator is None:
8389         raise errors.OpPrereqError("Missing allocator name")
8390     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8391       raise errors.OpPrereqError("Wrong allocator test '%s'" %
8392                                  self.op.direction)
8393
8394   def Exec(self, feedback_fn):
8395     """Run the allocator test.
8396
8397     """
8398     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8399       ial = IAllocator(self.cfg, self.rpc,
8400                        mode=self.op.mode,
8401                        name=self.op.name,
8402                        mem_size=self.op.mem_size,
8403                        disks=self.op.disks,
8404                        disk_template=self.op.disk_template,
8405                        os=self.op.os,
8406                        tags=self.op.tags,
8407                        nics=self.op.nics,
8408                        vcpus=self.op.vcpus,
8409                        hypervisor=self.op.hypervisor,
8410                        )
8411     else:
8412       ial = IAllocator(self.cfg, self.rpc,
8413                        mode=self.op.mode,
8414                        name=self.op.name,
8415                        relocate_from=list(self.relocate_from),
8416                        )
8417
8418     if self.op.direction == constants.IALLOCATOR_DIR_IN:
8419       result = ial.in_text
8420     else:
8421       ial.Run(self.op.allocator, validate=False)
8422       result = ial.out_text
8423     return result