code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 import os
  30 import os.path
  31 import time
  32 import re
  33 import platform
  34 import logging
  35 import copy
  36 import OpenSSL
  37
  38 from ganeti import ssh
  39 from ganeti import utils
  40 from ganeti import errors
  41 from ganeti import hypervisor
  42 from ganeti import locking
  43 from ganeti import constants
  44 from ganeti import objects
  45 from ganeti import serializer
  46 from ganeti import ssconf
  47
  48
  49 class LogicalUnit(object):
  50   """Logical Unit base class.
  51
  52   Subclasses must follow these rules:
  53     - implement ExpandNames
  54     - implement CheckPrereq (except when tasklets are used)
  55     - implement Exec (except when tasklets are used)
  56     - implement BuildHooksEnv
  57     - redefine HPATH and HTYPE
  58     - optionally redefine their run requirements:
  59         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  60
  61   Note that all commands require root permissions.
  62
  63   @ivar dry_run_result: the value (if any) that will be returned to the caller
  64       in dry-run mode (signalled by opcode dry_run parameter)
  65
  66   """
  67   HPATH = None
  68   HTYPE = None
  69   _OP_REQP = []
  70   REQ_BGL = True
  71
  72   def __init__(self, processor, op, context, rpc):
  73     """Constructor for LogicalUnit.
  74
  75     This needs to be overridden in derived classes in order to check op
  76     validity.
  77
  78     """
  79     self.proc = processor
  80     self.op = op
  81     self.cfg = context.cfg
  82     self.context = context
  83     self.rpc = rpc
  84     # Dicts used to declare locking needs to mcpu
  85     self.needed_locks = None
  86     self.acquired_locks = {}
  87     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
  88     self.add_locks = {}
  89     self.remove_locks = {}
  90     # Used to force good behavior when calling helper functions
  91     self.recalculate_locks = {}
  92     self.__ssh = None
  93     # logging
  94     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
  95     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
  96     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
  97     # support for dry-run
  98     self.dry_run_result = None
  99     # support for generic debug attribute
 100     if (not hasattr(self.op, "debug_level") or
 101         not isinstance(self.op.debug_level, int)):
 102       self.op.debug_level = 0
 103
 104     # Tasklets
 105     self.tasklets = None
 106
 107     for attr_name in self._OP_REQP:
 108       attr_val = getattr(op, attr_name, None)
 109       if attr_val is None:
 110         raise errors.OpPrereqError("Required parameter '%s' missing" %
 111                                    attr_name, errors.ECODE_INVAL)
 112
 113     self.CheckArguments()
 114
 115   def __GetSSH(self):
 116     """Returns the SshRunner object
 117
 118     """
 119     if not self.__ssh:
 120       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 121     return self.__ssh
 122
 123   ssh = property(fget=__GetSSH)
 124
 125   def CheckArguments(self):
 126     """Check syntactic validity for the opcode arguments.
 127
 128     This method is for doing a simple syntactic check and ensure
 129     validity of opcode parameters, without any cluster-related
 130     checks. While the same can be accomplished in ExpandNames and/or
 131     CheckPrereq, doing these separate is better because:
 132
 133       - ExpandNames is left as as purely a lock-related function
 134       - CheckPrereq is run after we have acquired locks (and possible
 135         waited for them)
 136
 137     The function is allowed to change the self.op attribute so that
 138     later methods can no longer worry about missing parameters.
 139
 140     """
 141     pass
 142
 143   def ExpandNames(self):
 144     """Expand names for this LU.
 145
 146     This method is called before starting to execute the opcode, and it should
 147     update all the parameters of the opcode to their canonical form (e.g. a
 148     short node name must be fully expanded after this method has successfully
 149     completed). This way locking, hooks, logging, ecc. can work correctly.
 150
 151     LUs which implement this method must also populate the self.needed_locks
 152     member, as a dict with lock levels as keys, and a list of needed lock names
 153     as values. Rules:
 154
 155       - use an empty dict if you don't need any lock
 156       - if you don't need any lock at a particular level omit that level
 157       - don't put anything for the BGL level
 158       - if you want all locks at a level use locking.ALL_SET as a value
 159
 160     If you need to share locks (rather than acquire them exclusively) at one
 161     level you can modify self.share_locks, setting a true value (usually 1) for
 162     that level. By default locks are not shared.
 163
 164     This function can also define a list of tasklets, which then will be
 165     executed in order instead of the usual LU-level CheckPrereq and Exec
 166     functions, if those are not defined by the LU.
 167
 168     Examples::
 169
 170       # Acquire all nodes and one instance
 171       self.needed_locks = {
 172         locking.LEVEL_NODE: locking.ALL_SET,
 173         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 174       }
 175       # Acquire just two nodes
 176       self.needed_locks = {
 177         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 178       }
 179       # Acquire no locks
 180       self.needed_locks = {} # No, you can't leave it to the default value None
 181
 182     """
 183     # The implementation of this method is mandatory only if the new LU is
 184     # concurrent, so that old LUs don't need to be changed all at the same
 185     # time.
 186     if self.REQ_BGL:
 187       self.needed_locks = {} # Exclusive LUs don't need locks.
 188     else:
 189       raise NotImplementedError
 190
 191   def DeclareLocks(self, level):
 192     """Declare LU locking needs for a level
 193
 194     While most LUs can just declare their locking needs at ExpandNames time,
 195     sometimes there's the need to calculate some locks after having acquired
 196     the ones before. This function is called just before acquiring locks at a
 197     particular level, but after acquiring the ones at lower levels, and permits
 198     such calculations. It can be used to modify self.needed_locks, and by
 199     default it does nothing.
 200
 201     This function is only called if you have something already set in
 202     self.needed_locks for the level.
 203
 204     @param level: Locking level which is going to be locked
 205     @type level: member of ganeti.locking.LEVELS
 206
 207     """
 208
 209   def CheckPrereq(self):
 210     """Check prerequisites for this LU.
 211
 212     This method should check that the prerequisites for the execution
 213     of this LU are fulfilled. It can do internode communication, but
 214     it should be idempotent - no cluster or system changes are
 215     allowed.
 216
 217     The method should raise errors.OpPrereqError in case something is
 218     not fulfilled. Its return value is ignored.
 219
 220     This method should also update all the parameters of the opcode to
 221     their canonical form if it hasn't been done by ExpandNames before.
 222
 223     """
 224     if self.tasklets is not None:
 225       for (idx, tl) in enumerate(self.tasklets):
 226         logging.debug("Checking prerequisites for tasklet %s/%s",
 227                       idx + 1, len(self.tasklets))
 228         tl.CheckPrereq()
 229     else:
 230       raise NotImplementedError
 231
 232   def Exec(self, feedback_fn):
 233     """Execute the LU.
 234
 235     This method should implement the actual work. It should raise
 236     errors.OpExecError for failures that are somewhat dealt with in
 237     code, or expected.
 238
 239     """
 240     if self.tasklets is not None:
 241       for (idx, tl) in enumerate(self.tasklets):
 242         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 243         tl.Exec(feedback_fn)
 244     else:
 245       raise NotImplementedError
 246
 247   def BuildHooksEnv(self):
 248     """Build hooks environment for this LU.
 249
 250     This method should return a three-node tuple consisting of: a dict
 251     containing the environment that will be used for running the
 252     specific hook for this LU, a list of node names on which the hook
 253     should run before the execution, and a list of node names on which
 254     the hook should run after the execution.
 255
 256     The keys of the dict must not have 'GANETI_' prefixed as this will
 257     be handled in the hooks runner. Also note additional keys will be
 258     added by the hooks runner. If the LU doesn't define any
 259     environment, an empty dict (and not None) should be returned.
 260
 261     No nodes should be returned as an empty list (and not None).
 262
 263     Note that if the HPATH for a LU class is None, this function will
 264     not be called.
 265
 266     """
 267     raise NotImplementedError
 268
 269   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 270     """Notify the LU about the results of its hooks.
 271
 272     This method is called every time a hooks phase is executed, and notifies
 273     the Logical Unit about the hooks' result. The LU can then use it to alter
 274     its result based on the hooks.  By default the method does nothing and the
 275     previous result is passed back unchanged but any LU can define it if it
 276     wants to use the local cluster hook-scripts somehow.
 277
 278     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 279         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 280     @param hook_results: the results of the multi-node hooks rpc call
 281     @param feedback_fn: function used send feedback back to the caller
 282     @param lu_result: the previous Exec result this LU had, or None
 283         in the PRE phase
 284     @return: the new Exec result, based on the previous result
 285         and hook results
 286
 287     """
 288     # API must be kept, thus we ignore the unused argument and could
 289     # be a function warnings
 290     # pylint: disable-msg=W0613,R0201
 291     return lu_result
 292
 293   def _ExpandAndLockInstance(self):
 294     """Helper function to expand and lock an instance.
 295
 296     Many LUs that work on an instance take its name in self.op.instance_name
 297     and need to expand it and then declare the expanded name for locking. This
 298     function does it, and then updates self.op.instance_name to the expanded
 299     name. It also initializes needed_locks as a dict, if this hasn't been done
 300     before.
 301
 302     """
 303     if self.needed_locks is None:
 304       self.needed_locks = {}
 305     else:
 306       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 307         "_ExpandAndLockInstance called with instance-level locks set"
 308     self.op.instance_name = _ExpandInstanceName(self.cfg,
 309                                                 self.op.instance_name)
 310     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 311
 312   def _LockInstancesNodes(self, primary_only=False):
 313     """Helper function to declare instances' nodes for locking.
 314
 315     This function should be called after locking one or more instances to lock
 316     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 317     with all primary or secondary nodes for instances already locked and
 318     present in self.needed_locks[locking.LEVEL_INSTANCE].
 319
 320     It should be called from DeclareLocks, and for safety only works if
 321     self.recalculate_locks[locking.LEVEL_NODE] is set.
 322
 323     In the future it may grow parameters to just lock some instance's nodes, or
 324     to just lock primaries or secondary nodes, if needed.
 325
 326     If should be called in DeclareLocks in a way similar to::
 327
 328       if level == locking.LEVEL_NODE:
 329         self._LockInstancesNodes()
 330
 331     @type primary_only: boolean
 332     @param primary_only: only lock primary nodes of locked instances
 333
 334     """
 335     assert locking.LEVEL_NODE in self.recalculate_locks, \
 336       "_LockInstancesNodes helper function called with no nodes to recalculate"
 337
 338     # TODO: check if we're really been called with the instance locks held
 339
 340     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 341     # future we might want to have different behaviors depending on the value
 342     # of self.recalculate_locks[locking.LEVEL_NODE]
 343     wanted_nodes = []
 344     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 345       instance = self.context.cfg.GetInstanceInfo(instance_name)
 346       wanted_nodes.append(instance.primary_node)
 347       if not primary_only:
 348         wanted_nodes.extend(instance.secondary_nodes)
 349
 350     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 351       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 352     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 353       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 354
 355     del self.recalculate_locks[locking.LEVEL_NODE]
 356
 357
 358 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 359   """Simple LU which runs no hooks.
 360
 361   This LU is intended as a parent for other LogicalUnits which will
 362   run no hooks, in order to reduce duplicate code.
 363
 364   """
 365   HPATH = None
 366   HTYPE = None
 367
 368   def BuildHooksEnv(self):
 369     """Empty BuildHooksEnv for NoHooksLu.
 370
 371     This just raises an error.
 372
 373     """
 374     assert False, "BuildHooksEnv called for NoHooksLUs"
 375
 376
 377 class Tasklet:
 378   """Tasklet base class.
 379
 380   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 381   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 382   tasklets know nothing about locks.
 383
 384   Subclasses must follow these rules:
 385     - Implement CheckPrereq
 386     - Implement Exec
 387
 388   """
 389   def __init__(self, lu):
 390     self.lu = lu
 391
 392     # Shortcuts
 393     self.cfg = lu.cfg
 394     self.rpc = lu.rpc
 395
 396   def CheckPrereq(self):
 397     """Check prerequisites for this tasklets.
 398
 399     This method should check whether the prerequisites for the execution of
 400     this tasklet are fulfilled. It can do internode communication, but it
 401     should be idempotent - no cluster or system changes are allowed.
 402
 403     The method should raise errors.OpPrereqError in case something is not
 404     fulfilled. Its return value is ignored.
 405
 406     This method should also update all parameters to their canonical form if it
 407     hasn't been done before.
 408
 409     """
 410     raise NotImplementedError
 411
 412   def Exec(self, feedback_fn):
 413     """Execute the tasklet.
 414
 415     This method should implement the actual work. It should raise
 416     errors.OpExecError for failures that are somewhat dealt with in code, or
 417     expected.
 418
 419     """
 420     raise NotImplementedError
 421
 422
 423 def _GetWantedNodes(lu, nodes):
 424   """Returns list of checked and expanded node names.
 425
 426   @type lu: L{LogicalUnit}
 427   @param lu: the logical unit on whose behalf we execute
 428   @type nodes: list
 429   @param nodes: list of node names or None for all nodes
 430   @rtype: list
 431   @return: the list of nodes, sorted
 432   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 433
 434   """
 435   if not isinstance(nodes, list):
 436     raise errors.OpPrereqError("Invalid argument type 'nodes'",
 437                                errors.ECODE_INVAL)
 438
 439   if not nodes:
 440     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 441       " non-empty list of nodes whose name is to be expanded.")
 442
 443   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 444   return utils.NiceSort(wanted)
 445
 446
 447 def _GetWantedInstances(lu, instances):
 448   """Returns list of checked and expanded instance names.
 449
 450   @type lu: L{LogicalUnit}
 451   @param lu: the logical unit on whose behalf we execute
 452   @type instances: list
 453   @param instances: list of instance names or None for all instances
 454   @rtype: list
 455   @return: the list of instances, sorted
 456   @raise errors.OpPrereqError: if the instances parameter is wrong type
 457   @raise errors.OpPrereqError: if any of the passed instances is not found
 458
 459   """
 460   if not isinstance(instances, list):
 461     raise errors.OpPrereqError("Invalid argument type 'instances'",
 462                                errors.ECODE_INVAL)
 463
 464   if instances:
 465     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 466   else:
 467     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 468   return wanted
 469
 470
 471 def _CheckOutputFields(static, dynamic, selected):
 472   """Checks whether all selected fields are valid.
 473
 474   @type static: L{utils.FieldSet}
 475   @param static: static fields set
 476   @type dynamic: L{utils.FieldSet}
 477   @param dynamic: dynamic fields set
 478
 479   """
 480   f = utils.FieldSet()
 481   f.Extend(static)
 482   f.Extend(dynamic)
 483
 484   delta = f.NonMatching(selected)
 485   if delta:
 486     raise errors.OpPrereqError("Unknown output fields selected: %s"
 487                                % ",".join(delta), errors.ECODE_INVAL)
 488
 489
 490 def _CheckBooleanOpField(op, name):
 491   """Validates boolean opcode parameters.
 492
 493   This will ensure that an opcode parameter is either a boolean value,
 494   or None (but that it always exists).
 495
 496   """
 497   val = getattr(op, name, None)
 498   if not (val is None or isinstance(val, bool)):
 499     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 500                                (name, str(val)), errors.ECODE_INVAL)
 501   setattr(op, name, val)
 502
 503
 504 def _CheckGlobalHvParams(params):
 505   """Validates that given hypervisor params are not global ones.
 506
 507   This will ensure that instances don't get customised versions of
 508   global params.
 509
 510   """
 511   used_globals = constants.HVC_GLOBALS.intersection(params)
 512   if used_globals:
 513     msg = ("The following hypervisor parameters are global and cannot"
 514            " be customized at instance level, please modify them at"
 515            " cluster level: %s" % utils.CommaJoin(used_globals))
 516     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 517
 518
 519 def _CheckNodeOnline(lu, node):
 520   """Ensure that a given node is online.
 521
 522   @param lu: the LU on behalf of which we make the check
 523   @param node: the node to check
 524   @raise errors.OpPrereqError: if the node is offline
 525
 526   """
 527   if lu.cfg.GetNodeInfo(node).offline:
 528     raise errors.OpPrereqError("Can't use offline node %s" % node,
 529                                errors.ECODE_INVAL)
 530
 531
 532 def _CheckNodeNotDrained(lu, node):
 533   """Ensure that a given node is not drained.
 534
 535   @param lu: the LU on behalf of which we make the check
 536   @param node: the node to check
 537   @raise errors.OpPrereqError: if the node is drained
 538
 539   """
 540   if lu.cfg.GetNodeInfo(node).drained:
 541     raise errors.OpPrereqError("Can't use drained node %s" % node,
 542                                errors.ECODE_INVAL)
 543
 544
 545 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 546   """Ensure that a node supports a given OS.
 547
 548   @param lu: the LU on behalf of which we make the check
 549   @param node: the node to check
 550   @param os_name: the OS to query about
 551   @param force_variant: whether to ignore variant errors
 552   @raise errors.OpPrereqError: if the node is not supporting the OS
 553
 554   """
 555   result = lu.rpc.call_os_get(node, os_name)
 556   result.Raise("OS '%s' not in supported OS list for node %s" %
 557                (os_name, node),
 558                prereq=True, ecode=errors.ECODE_INVAL)
 559   if not force_variant:
 560     _CheckOSVariant(result.payload, os_name)
 561
 562
 563 def _CheckDiskTemplate(template):
 564   """Ensure a given disk template is valid.
 565
 566   """
 567   if template not in constants.DISK_TEMPLATES:
 568     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 569            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 570     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 571   if template == constants.DT_FILE and not constants.ENABLE_FILE_STORAGE:
 572     raise errors.OpPrereqError("File storage disabled at configure time",
 573                                errors.ECODE_INVAL)
 574
 575
 576 def _CheckInstanceDown(lu, instance, reason):
 577   """Ensure that an instance is not running."""
 578   if instance.admin_up:
 579     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 580                                (instance.name, reason), errors.ECODE_STATE)
 581
 582   pnode = instance.primary_node
 583   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 584   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 585               prereq=True, ecode=errors.ECODE_ENVIRON)
 586
 587   if instance.name in ins_l.payload:
 588     raise errors.OpPrereqError("Instance %s is running, %s" %
 589                                (instance.name, reason), errors.ECODE_STATE)
 590
 591
 592 def _ExpandItemName(fn, name, kind):
 593   """Expand an item name.
 594
 595   @param fn: the function to use for expansion
 596   @param name: requested item name
 597   @param kind: text description ('Node' or 'Instance')
 598   @return: the resolved (full) name
 599   @raise errors.OpPrereqError: if the item is not found
 600
 601   """
 602   full_name = fn(name)
 603   if full_name is None:
 604     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 605                                errors.ECODE_NOENT)
 606   return full_name
 607
 608
 609 def _ExpandNodeName(cfg, name):
 610   """Wrapper over L{_ExpandItemName} for nodes."""
 611   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 612
 613
 614 def _ExpandInstanceName(cfg, name):
 615   """Wrapper over L{_ExpandItemName} for instance."""
 616   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 617
 618
 619 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 620                           memory, vcpus, nics, disk_template, disks,
 621                           bep, hvp, hypervisor_name):
 622   """Builds instance related env variables for hooks
 623
 624   This builds the hook environment from individual variables.
 625
 626   @type name: string
 627   @param name: the name of the instance
 628   @type primary_node: string
 629   @param primary_node: the name of the instance's primary node
 630   @type secondary_nodes: list
 631   @param secondary_nodes: list of secondary nodes as strings
 632   @type os_type: string
 633   @param os_type: the name of the instance's OS
 634   @type status: boolean
 635   @param status: the should_run status of the instance
 636   @type memory: string
 637   @param memory: the memory size of the instance
 638   @type vcpus: string
 639   @param vcpus: the count of VCPUs the instance has
 640   @type nics: list
 641   @param nics: list of tuples (ip, mac, mode, link) representing
 642       the NICs the instance has
 643   @type disk_template: string
 644   @param disk_template: the disk template of the instance
 645   @type disks: list
 646   @param disks: the list of (size, mode) pairs
 647   @type bep: dict
 648   @param bep: the backend parameters for the instance
 649   @type hvp: dict
 650   @param hvp: the hypervisor parameters for the instance
 651   @type hypervisor_name: string
 652   @param hypervisor_name: the hypervisor for the instance
 653   @rtype: dict
 654   @return: the hook environment for this instance
 655
 656   """
 657   if status:
 658     str_status = "up"
 659   else:
 660     str_status = "down"
 661   env = {
 662     "OP_TARGET": name,
 663     "INSTANCE_NAME": name,
 664     "INSTANCE_PRIMARY": primary_node,
 665     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 666     "INSTANCE_OS_TYPE": os_type,
 667     "INSTANCE_STATUS": str_status,
 668     "INSTANCE_MEMORY": memory,
 669     "INSTANCE_VCPUS": vcpus,
 670     "INSTANCE_DISK_TEMPLATE": disk_template,
 671     "INSTANCE_HYPERVISOR": hypervisor_name,
 672   }
 673
 674   if nics:
 675     nic_count = len(nics)
 676     for idx, (ip, mac, mode, link) in enumerate(nics):
 677       if ip is None:
 678         ip = ""
 679       env["INSTANCE_NIC%d_IP" % idx] = ip
 680       env["INSTANCE_NIC%d_MAC" % idx] = mac
 681       env["INSTANCE_NIC%d_MODE" % idx] = mode
 682       env["INSTANCE_NIC%d_LINK" % idx] = link
 683       if mode == constants.NIC_MODE_BRIDGED:
 684         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 685   else:
 686     nic_count = 0
 687
 688   env["INSTANCE_NIC_COUNT"] = nic_count
 689
 690   if disks:
 691     disk_count = len(disks)
 692     for idx, (size, mode) in enumerate(disks):
 693       env["INSTANCE_DISK%d_SIZE" % idx] = size
 694       env["INSTANCE_DISK%d_MODE" % idx] = mode
 695   else:
 696     disk_count = 0
 697
 698   env["INSTANCE_DISK_COUNT"] = disk_count
 699
 700   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 701     for key, value in source.items():
 702       env["INSTANCE_%s_%s" % (kind, key)] = value
 703
 704   return env
 705
 706
 707 def _NICListToTuple(lu, nics):
 708   """Build a list of nic information tuples.
 709
 710   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 711   value in LUQueryInstanceData.
 712
 713   @type lu:  L{LogicalUnit}
 714   @param lu: the logical unit on whose behalf we execute
 715   @type nics: list of L{objects.NIC}
 716   @param nics: list of nics to convert to hooks tuples
 717
 718   """
 719   hooks_nics = []
 720   c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
 721   for nic in nics:
 722     ip = nic.ip
 723     mac = nic.mac
 724     filled_params = objects.FillDict(c_nicparams, nic.nicparams)
 725     mode = filled_params[constants.NIC_MODE]
 726     link = filled_params[constants.NIC_LINK]
 727     hooks_nics.append((ip, mac, mode, link))
 728   return hooks_nics
 729
 730
 731 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 732   """Builds instance related env variables for hooks from an object.
 733
 734   @type lu: L{LogicalUnit}
 735   @param lu: the logical unit on whose behalf we execute
 736   @type instance: L{objects.Instance}
 737   @param instance: the instance for which we should build the
 738       environment
 739   @type override: dict
 740   @param override: dictionary with key/values that will override
 741       our values
 742   @rtype: dict
 743   @return: the hook environment dictionary
 744
 745   """
 746   cluster = lu.cfg.GetClusterInfo()
 747   bep = cluster.FillBE(instance)
 748   hvp = cluster.FillHV(instance)
 749   args = {
 750     'name': instance.name,
 751     'primary_node': instance.primary_node,
 752     'secondary_nodes': instance.secondary_nodes,
 753     'os_type': instance.os,
 754     'status': instance.admin_up,
 755     'memory': bep[constants.BE_MEMORY],
 756     'vcpus': bep[constants.BE_VCPUS],
 757     'nics': _NICListToTuple(lu, instance.nics),
 758     'disk_template': instance.disk_template,
 759     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 760     'bep': bep,
 761     'hvp': hvp,
 762     'hypervisor_name': instance.hypervisor,
 763   }
 764   if override:
 765     args.update(override)
 766   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 767
 768
 769 def _AdjustCandidatePool(lu, exceptions):
 770   """Adjust the candidate pool after node operations.
 771
 772   """
 773   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 774   if mod_list:
 775     lu.LogInfo("Promoted nodes to master candidate role: %s",
 776                utils.CommaJoin(node.name for node in mod_list))
 777     for name in mod_list:
 778       lu.context.ReaddNode(name)
 779   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 780   if mc_now > mc_max:
 781     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 782                (mc_now, mc_max))
 783
 784
 785 def _DecideSelfPromotion(lu, exceptions=None):
 786   """Decide whether I should promote myself as a master candidate.
 787
 788   """
 789   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 790   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 791   # the new node will increase mc_max with one, so:
 792   mc_should = min(mc_should + 1, cp_size)
 793   return mc_now < mc_should
 794
 795
 796 def _CheckNicsBridgesExist(lu, target_nics, target_node,
 797                                profile=constants.PP_DEFAULT):
 798   """Check that the brigdes needed by a list of nics exist.
 799
 800   """
 801   c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
 802   paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
 803                 for nic in target_nics]
 804   brlist = [params[constants.NIC_LINK] for params in paramslist
 805             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 806   if brlist:
 807     result = lu.rpc.call_bridges_exist(target_node, brlist)
 808     result.Raise("Error checking bridges on destination node '%s'" %
 809                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 810
 811
 812 def _CheckInstanceBridgesExist(lu, instance, node=None):
 813   """Check that the brigdes needed by an instance exist.
 814
 815   """
 816   if node is None:
 817     node = instance.primary_node
 818   _CheckNicsBridgesExist(lu, instance.nics, node)
 819
 820
 821 def _CheckOSVariant(os_obj, name):
 822   """Check whether an OS name conforms to the os variants specification.
 823
 824   @type os_obj: L{objects.OS}
 825   @param os_obj: OS object to check
 826   @type name: string
 827   @param name: OS name passed by the user, to check for validity
 828
 829   """
 830   if not os_obj.supported_variants:
 831     return
 832   try:
 833     variant = name.split("+", 1)[1]
 834   except IndexError:
 835     raise errors.OpPrereqError("OS name must include a variant",
 836                                errors.ECODE_INVAL)
 837
 838   if variant not in os_obj.supported_variants:
 839     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
 840
 841
 842 def _GetNodeInstancesInner(cfg, fn):
 843   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 844
 845
 846 def _GetNodeInstances(cfg, node_name):
 847   """Returns a list of all primary and secondary instances on a node.
 848
 849   """
 850
 851   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
 852
 853
 854 def _GetNodePrimaryInstances(cfg, node_name):
 855   """Returns primary instances on a node.
 856
 857   """
 858   return _GetNodeInstancesInner(cfg,
 859                                 lambda inst: node_name == inst.primary_node)
 860
 861
 862 def _GetNodeSecondaryInstances(cfg, node_name):
 863   """Returns secondary instances on a node.
 864
 865   """
 866   return _GetNodeInstancesInner(cfg,
 867                                 lambda inst: node_name in inst.secondary_nodes)
 868
 869
 870 def _GetStorageTypeArgs(cfg, storage_type):
 871   """Returns the arguments for a storage type.
 872
 873   """
 874   # Special case for file storage
 875   if storage_type == constants.ST_FILE:
 876     # storage.FileStorage wants a list of storage directories
 877     return [[cfg.GetFileStorageDir()]]
 878
 879   return []
 880
 881
 882 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
 883   faulty = []
 884
 885   for dev in instance.disks:
 886     cfg.SetDiskID(dev, node_name)
 887
 888   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
 889   result.Raise("Failed to get disk status from node %s" % node_name,
 890                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 891
 892   for idx, bdev_status in enumerate(result.payload):
 893     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
 894       faulty.append(idx)
 895
 896   return faulty
 897
 898
 899 def _FormatTimestamp(secs):
 900   """Formats a Unix timestamp with the local timezone.
 901
 902   """
 903   return time.strftime("%F %T %Z", time.gmtime(secs))
 904
 905
 906 class LUPostInitCluster(LogicalUnit):
 907   """Logical unit for running hooks after cluster initialization.
 908
 909   """
 910   HPATH = "cluster-init"
 911   HTYPE = constants.HTYPE_CLUSTER
 912   _OP_REQP = []
 913
 914   def BuildHooksEnv(self):
 915     """Build hooks env.
 916
 917     """
 918     env = {"OP_TARGET": self.cfg.GetClusterName()}
 919     mn = self.cfg.GetMasterNode()
 920     return env, [], [mn]
 921
 922   def CheckPrereq(self):
 923     """No prerequisites to check.
 924
 925     """
 926     return True
 927
 928   def Exec(self, feedback_fn):
 929     """Nothing to do.
 930
 931     """
 932     return True
 933
 934
 935 class LUDestroyCluster(LogicalUnit):
 936   """Logical unit for destroying the cluster.
 937
 938   """
 939   HPATH = "cluster-destroy"
 940   HTYPE = constants.HTYPE_CLUSTER
 941   _OP_REQP = []
 942
 943   def BuildHooksEnv(self):
 944     """Build hooks env.
 945
 946     """
 947     env = {"OP_TARGET": self.cfg.GetClusterName()}
 948     return env, [], []
 949
 950   def CheckPrereq(self):
 951     """Check prerequisites.
 952
 953     This checks whether the cluster is empty.
 954
 955     Any errors are signaled by raising errors.OpPrereqError.
 956
 957     """
 958     master = self.cfg.GetMasterNode()
 959
 960     nodelist = self.cfg.GetNodeList()
 961     if len(nodelist) != 1 or nodelist[0] != master:
 962       raise errors.OpPrereqError("There are still %d node(s) in"
 963                                  " this cluster." % (len(nodelist) - 1),
 964                                  errors.ECODE_INVAL)
 965     instancelist = self.cfg.GetInstanceList()
 966     if instancelist:
 967       raise errors.OpPrereqError("There are still %d instance(s) in"
 968                                  " this cluster." % len(instancelist),
 969                                  errors.ECODE_INVAL)
 970
 971   def Exec(self, feedback_fn):
 972     """Destroys the cluster.
 973
 974     """
 975     master = self.cfg.GetMasterNode()
 976     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
 977
 978     # Run post hooks on master node before it's removed
 979     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
 980     try:
 981       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
 982     except:
 983       # pylint: disable-msg=W0702
 984       self.LogWarning("Errors occurred running hooks on %s" % master)
 985
 986     result = self.rpc.call_node_stop_master(master, False)
 987     result.Raise("Could not disable the master role")
 988
 989     if modify_ssh_setup:
 990       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
 991       utils.CreateBackup(priv_key)
 992       utils.CreateBackup(pub_key)
 993
 994     return master
 995
 996
 997 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
 998                             warn_days=constants.SSL_CERT_EXPIRATION_WARN,
 999                             error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1000   """Verifies certificate details for LUVerifyCluster.
1001
1002   """
1003   if expired:
1004     msg = "Certificate %s is expired" % filename
1005
1006     if not_before is not None and not_after is not None:
1007       msg += (" (valid from %s to %s)" %
1008               (_FormatTimestamp(not_before),
1009                _FormatTimestamp(not_after)))
1010     elif not_before is not None:
1011       msg += " (valid from %s)" % _FormatTimestamp(not_before)
1012     elif not_after is not None:
1013       msg += " (valid until %s)" % _FormatTimestamp(not_after)
1014
1015     return (LUVerifyCluster.ETYPE_ERROR, msg)
1016
1017   elif not_before is not None and not_before > now:
1018     return (LUVerifyCluster.ETYPE_WARNING,
1019             "Certificate %s not yet valid (valid from %s)" %
1020             (filename, _FormatTimestamp(not_before)))
1021
1022   elif not_after is not None:
1023     remaining_days = int((not_after - now) / (24 * 3600))
1024
1025     msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1026
1027     if remaining_days <= error_days:
1028       return (LUVerifyCluster.ETYPE_ERROR, msg)
1029
1030     if remaining_days <= warn_days:
1031       return (LUVerifyCluster.ETYPE_WARNING, msg)
1032
1033   return (None, None)
1034
1035
1036 def _VerifyCertificate(filename):
1037   """Verifies a certificate for LUVerifyCluster.
1038
1039   @type filename: string
1040   @param filename: Path to PEM file
1041
1042   """
1043   try:
1044     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1045                                            utils.ReadFile(filename))
1046   except Exception, err: # pylint: disable-msg=W0703
1047     return (LUVerifyCluster.ETYPE_ERROR,
1048             "Failed to load X509 certificate %s: %s" % (filename, err))
1049
1050   # Depending on the pyOpenSSL version, this can just return (None, None)
1051   (not_before, not_after) = utils.GetX509CertValidity(cert)
1052
1053   return _VerifyCertificateInner(filename, cert.has_expired(),
1054                                  not_before, not_after, time.time())
1055
1056
1057 class LUVerifyCluster(LogicalUnit):
1058   """Verifies the cluster status.
1059
1060   """
1061   HPATH = "cluster-verify"
1062   HTYPE = constants.HTYPE_CLUSTER
1063   _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1064   REQ_BGL = False
1065
1066   TCLUSTER = "cluster"
1067   TNODE = "node"
1068   TINSTANCE = "instance"
1069
1070   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1071   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1072   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1073   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1074   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1075   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1076   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1077   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1078   ENODEDRBD = (TNODE, "ENODEDRBD")
1079   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1080   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1081   ENODEHV = (TNODE, "ENODEHV")
1082   ENODELVM = (TNODE, "ENODELVM")
1083   ENODEN1 = (TNODE, "ENODEN1")
1084   ENODENET = (TNODE, "ENODENET")
1085   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1086   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1087   ENODERPC = (TNODE, "ENODERPC")
1088   ENODESSH = (TNODE, "ENODESSH")
1089   ENODEVERSION = (TNODE, "ENODEVERSION")
1090   ENODESETUP = (TNODE, "ENODESETUP")
1091   ENODETIME = (TNODE, "ENODETIME")
1092
1093   ETYPE_FIELD = "code"
1094   ETYPE_ERROR = "ERROR"
1095   ETYPE_WARNING = "WARNING"
1096
1097   class NodeImage(object):
1098     """A class representing the logical and physical status of a node.
1099
1100     @ivar volumes: a structure as returned from
1101         L{ganeti.backend.GetVolumeList} (runtime)
1102     @ivar instances: a list of running instances (runtime)
1103     @ivar pinst: list of configured primary instances (config)
1104     @ivar sinst: list of configured secondary instances (config)
1105     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1106         of this node (config)
1107     @ivar mfree: free memory, as reported by hypervisor (runtime)
1108     @ivar dfree: free disk, as reported by the node (runtime)
1109     @ivar offline: the offline status (config)
1110     @type rpc_fail: boolean
1111     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1112         not whether the individual keys were correct) (runtime)
1113     @type lvm_fail: boolean
1114     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1115     @type hyp_fail: boolean
1116     @ivar hyp_fail: whether the RPC call didn't return the instance list
1117     @type ghost: boolean
1118     @ivar ghost: whether this is a known node or not (config)
1119
1120     """
1121     def __init__(self, offline=False):
1122       self.volumes = {}
1123       self.instances = []
1124       self.pinst = []
1125       self.sinst = []
1126       self.sbp = {}
1127       self.mfree = 0
1128       self.dfree = 0
1129       self.offline = offline
1130       self.rpc_fail = False
1131       self.lvm_fail = False
1132       self.hyp_fail = False
1133       self.ghost = False
1134
1135   def ExpandNames(self):
1136     self.needed_locks = {
1137       locking.LEVEL_NODE: locking.ALL_SET,
1138       locking.LEVEL_INSTANCE: locking.ALL_SET,
1139     }
1140     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1141
1142   def _Error(self, ecode, item, msg, *args, **kwargs):
1143     """Format an error message.
1144
1145     Based on the opcode's error_codes parameter, either format a
1146     parseable error code, or a simpler error string.
1147
1148     This must be called only from Exec and functions called from Exec.
1149
1150     """
1151     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1152     itype, etxt = ecode
1153     # first complete the msg
1154     if args:
1155       msg = msg % args
1156     # then format the whole message
1157     if self.op.error_codes:
1158       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1159     else:
1160       if item:
1161         item = " " + item
1162       else:
1163         item = ""
1164       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1165     # and finally report it via the feedback_fn
1166     self._feedback_fn("  - %s" % msg)
1167
1168   def _ErrorIf(self, cond, *args, **kwargs):
1169     """Log an error message if the passed condition is True.
1170
1171     """
1172     cond = bool(cond) or self.op.debug_simulate_errors
1173     if cond:
1174       self._Error(*args, **kwargs)
1175     # do not mark the operation as failed for WARN cases only
1176     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1177       self.bad = self.bad or cond
1178
1179   def _VerifyNode(self, ninfo, nresult):
1180     """Run multiple tests against a node.
1181
1182     Test list:
1183
1184       - compares ganeti version
1185       - checks vg existence and size > 20G
1186       - checks config file checksum
1187       - checks ssh to other nodes
1188
1189     @type ninfo: L{objects.Node}
1190     @param ninfo: the node to check
1191     @param nresult: the results from the node
1192     @rtype: boolean
1193     @return: whether overall this call was successful (and we can expect
1194          reasonable values in the respose)
1195
1196     """
1197     node = ninfo.name
1198     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1199
1200     # main result, nresult should be a non-empty dict
1201     test = not nresult or not isinstance(nresult, dict)
1202     _ErrorIf(test, self.ENODERPC, node,
1203                   "unable to verify node: no data returned")
1204     if test:
1205       return False
1206
1207     # compares ganeti version
1208     local_version = constants.PROTOCOL_VERSION
1209     remote_version = nresult.get("version", None)
1210     test = not (remote_version and
1211                 isinstance(remote_version, (list, tuple)) and
1212                 len(remote_version) == 2)
1213     _ErrorIf(test, self.ENODERPC, node,
1214              "connection to node returned invalid data")
1215     if test:
1216       return False
1217
1218     test = local_version != remote_version[0]
1219     _ErrorIf(test, self.ENODEVERSION, node,
1220              "incompatible protocol versions: master %s,"
1221              " node %s", local_version, remote_version[0])
1222     if test:
1223       return False
1224
1225     # node seems compatible, we can actually try to look into its results
1226
1227     # full package version
1228     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1229                   self.ENODEVERSION, node,
1230                   "software version mismatch: master %s, node %s",
1231                   constants.RELEASE_VERSION, remote_version[1],
1232                   code=self.ETYPE_WARNING)
1233
1234     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1235     if isinstance(hyp_result, dict):
1236       for hv_name, hv_result in hyp_result.iteritems():
1237         test = hv_result is not None
1238         _ErrorIf(test, self.ENODEHV, node,
1239                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1240
1241
1242     test = nresult.get(constants.NV_NODESETUP,
1243                            ["Missing NODESETUP results"])
1244     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1245              "; ".join(test))
1246
1247     return True
1248
1249   def _VerifyNodeTime(self, ninfo, nresult,
1250                       nvinfo_starttime, nvinfo_endtime):
1251     """Check the node time.
1252
1253     @type ninfo: L{objects.Node}
1254     @param ninfo: the node to check
1255     @param nresult: the remote results for the node
1256     @param nvinfo_starttime: the start time of the RPC call
1257     @param nvinfo_endtime: the end time of the RPC call
1258
1259     """
1260     node = ninfo.name
1261     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1262
1263     ntime = nresult.get(constants.NV_TIME, None)
1264     try:
1265       ntime_merged = utils.MergeTime(ntime)
1266     except (ValueError, TypeError):
1267       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1268       return
1269
1270     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1271       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1272     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1273       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1274     else:
1275       ntime_diff = None
1276
1277     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1278              "Node time diverges by at least %s from master node time",
1279              ntime_diff)
1280
1281   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1282     """Check the node time.
1283
1284     @type ninfo: L{objects.Node}
1285     @param ninfo: the node to check
1286     @param nresult: the remote results for the node
1287     @param vg_name: the configured VG name
1288
1289     """
1290     if vg_name is None:
1291       return
1292
1293     node = ninfo.name
1294     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1295
1296     # checks vg existence and size > 20G
1297     vglist = nresult.get(constants.NV_VGLIST, None)
1298     test = not vglist
1299     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1300     if not test:
1301       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1302                                             constants.MIN_VG_SIZE)
1303       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1304
1305     # check pv names
1306     pvlist = nresult.get(constants.NV_PVLIST, None)
1307     test = pvlist is None
1308     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1309     if not test:
1310       # check that ':' is not present in PV names, since it's a
1311       # special character for lvcreate (denotes the range of PEs to
1312       # use on the PV)
1313       for _, pvname, owner_vg in pvlist:
1314         test = ":" in pvname
1315         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1316                  " '%s' of VG '%s'", pvname, owner_vg)
1317
1318   def _VerifyNodeNetwork(self, ninfo, nresult):
1319     """Check the node time.
1320
1321     @type ninfo: L{objects.Node}
1322     @param ninfo: the node to check
1323     @param nresult: the remote results for the node
1324
1325     """
1326     node = ninfo.name
1327     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1328
1329     test = constants.NV_NODELIST not in nresult
1330     _ErrorIf(test, self.ENODESSH, node,
1331              "node hasn't returned node ssh connectivity data")
1332     if not test:
1333       if nresult[constants.NV_NODELIST]:
1334         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1335           _ErrorIf(True, self.ENODESSH, node,
1336                    "ssh communication with node '%s': %s", a_node, a_msg)
1337
1338     test = constants.NV_NODENETTEST not in nresult
1339     _ErrorIf(test, self.ENODENET, node,
1340              "node hasn't returned node tcp connectivity data")
1341     if not test:
1342       if nresult[constants.NV_NODENETTEST]:
1343         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1344         for anode in nlist:
1345           _ErrorIf(True, self.ENODENET, node,
1346                    "tcp communication with node '%s': %s",
1347                    anode, nresult[constants.NV_NODENETTEST][anode])
1348
1349   def _VerifyInstance(self, instance, instanceconfig, node_image):
1350     """Verify an instance.
1351
1352     This function checks to see if the required block devices are
1353     available on the instance's node.
1354
1355     """
1356     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1357     node_current = instanceconfig.primary_node
1358
1359     node_vol_should = {}
1360     instanceconfig.MapLVsByNode(node_vol_should)
1361
1362     for node in node_vol_should:
1363       n_img = node_image[node]
1364       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1365         # ignore missing volumes on offline or broken nodes
1366         continue
1367       for volume in node_vol_should[node]:
1368         test = volume not in n_img.volumes
1369         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1370                  "volume %s missing on node %s", volume, node)
1371
1372     if instanceconfig.admin_up:
1373       pri_img = node_image[node_current]
1374       test = instance not in pri_img.instances and not pri_img.offline
1375       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1376                "instance not running on its primary node %s",
1377                node_current)
1378
1379     for node, n_img in node_image.items():
1380       if (not node == node_current):
1381         test = instance in n_img.instances
1382         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1383                  "instance should not run on node %s", node)
1384
1385   def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1386     """Verify if there are any unknown volumes in the cluster.
1387
1388     The .os, .swap and backup volumes are ignored. All other volumes are
1389     reported as unknown.
1390
1391     """
1392     for node, n_img in node_image.items():
1393       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1394         # skip non-healthy nodes
1395         continue
1396       for volume in n_img.volumes:
1397         test = (node not in node_vol_should or
1398                 volume not in node_vol_should[node])
1399         self._ErrorIf(test, self.ENODEORPHANLV, node,
1400                       "volume %s is unknown", volume)
1401
1402   def _VerifyOrphanInstances(self, instancelist, node_image):
1403     """Verify the list of running instances.
1404
1405     This checks what instances are running but unknown to the cluster.
1406
1407     """
1408     for node, n_img in node_image.items():
1409       for o_inst in n_img.instances:
1410         test = o_inst not in instancelist
1411         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1412                       "instance %s on node %s should not exist", o_inst, node)
1413
1414   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1415     """Verify N+1 Memory Resilience.
1416
1417     Check that if one single node dies we can still start all the
1418     instances it was primary for.
1419
1420     """
1421     for node, n_img in node_image.items():
1422       # This code checks that every node which is now listed as
1423       # secondary has enough memory to host all instances it is
1424       # supposed to should a single other node in the cluster fail.
1425       # FIXME: not ready for failover to an arbitrary node
1426       # FIXME: does not support file-backed instances
1427       # WARNING: we currently take into account down instances as well
1428       # as up ones, considering that even if they're down someone
1429       # might want to start them even in the event of a node failure.
1430       for prinode, instances in n_img.sbp.items():
1431         needed_mem = 0
1432         for instance in instances:
1433           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1434           if bep[constants.BE_AUTO_BALANCE]:
1435             needed_mem += bep[constants.BE_MEMORY]
1436         test = n_img.mfree < needed_mem
1437         self._ErrorIf(test, self.ENODEN1, node,
1438                       "not enough memory on to accommodate"
1439                       " failovers should peer node %s fail", prinode)
1440
1441   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1442                        master_files):
1443     """Verifies and computes the node required file checksums.
1444
1445     @type ninfo: L{objects.Node}
1446     @param ninfo: the node to check
1447     @param nresult: the remote results for the node
1448     @param file_list: required list of files
1449     @param local_cksum: dictionary of local files and their checksums
1450     @param master_files: list of files that only masters should have
1451
1452     """
1453     node = ninfo.name
1454     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1455
1456     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1457     test = not isinstance(remote_cksum, dict)
1458     _ErrorIf(test, self.ENODEFILECHECK, node,
1459              "node hasn't returned file checksum data")
1460     if test:
1461       return
1462
1463     for file_name in file_list:
1464       node_is_mc = ninfo.master_candidate
1465       must_have = (file_name not in master_files) or node_is_mc
1466       # missing
1467       test1 = file_name not in remote_cksum
1468       # invalid checksum
1469       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1470       # existing and good
1471       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1472       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1473                "file '%s' missing", file_name)
1474       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1475                "file '%s' has wrong checksum", file_name)
1476       # not candidate and this is not a must-have file
1477       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1478                "file '%s' should not exist on non master"
1479                " candidates (and the file is outdated)", file_name)
1480       # all good, except non-master/non-must have combination
1481       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1482                "file '%s' should not exist"
1483                " on non master candidates", file_name)
1484
1485   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1486     """Verifies and the node DRBD status.
1487
1488     @type ninfo: L{objects.Node}
1489     @param ninfo: the node to check
1490     @param nresult: the remote results for the node
1491     @param instanceinfo: the dict of instances
1492     @param drbd_map: the DRBD map as returned by
1493         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1494
1495     """
1496     node = ninfo.name
1497     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1498
1499     # compute the DRBD minors
1500     node_drbd = {}
1501     for minor, instance in drbd_map[node].items():
1502       test = instance not in instanceinfo
1503       _ErrorIf(test, self.ECLUSTERCFG, None,
1504                "ghost instance '%s' in temporary DRBD map", instance)
1505         # ghost instance should not be running, but otherwise we
1506         # don't give double warnings (both ghost instance and
1507         # unallocated minor in use)
1508       if test:
1509         node_drbd[minor] = (instance, False)
1510       else:
1511         instance = instanceinfo[instance]
1512         node_drbd[minor] = (instance.name, instance.admin_up)
1513
1514     # and now check them
1515     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1516     test = not isinstance(used_minors, (tuple, list))
1517     _ErrorIf(test, self.ENODEDRBD, node,
1518              "cannot parse drbd status file: %s", str(used_minors))
1519     if test:
1520       # we cannot check drbd status
1521       return
1522
1523     for minor, (iname, must_exist) in node_drbd.items():
1524       test = minor not in used_minors and must_exist
1525       _ErrorIf(test, self.ENODEDRBD, node,
1526                "drbd minor %d of instance %s is not active", minor, iname)
1527     for minor in used_minors:
1528       test = minor not in node_drbd
1529       _ErrorIf(test, self.ENODEDRBD, node,
1530                "unallocated drbd minor %d is in use", minor)
1531
1532   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1533     """Verifies and updates the node volume data.
1534
1535     This function will update a L{NodeImage}'s internal structures
1536     with data from the remote call.
1537
1538     @type ninfo: L{objects.Node}
1539     @param ninfo: the node to check
1540     @param nresult: the remote results for the node
1541     @param nimg: the node image object
1542     @param vg_name: the configured VG name
1543
1544     """
1545     node = ninfo.name
1546     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1547
1548     nimg.lvm_fail = True
1549     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1550     if vg_name is None:
1551       pass
1552     elif isinstance(lvdata, basestring):
1553       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1554                utils.SafeEncode(lvdata))
1555     elif not isinstance(lvdata, dict):
1556       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1557     else:
1558       nimg.volumes = lvdata
1559       nimg.lvm_fail = False
1560
1561   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1562     """Verifies and updates the node instance list.
1563
1564     If the listing was successful, then updates this node's instance
1565     list. Otherwise, it marks the RPC call as failed for the instance
1566     list key.
1567
1568     @type ninfo: L{objects.Node}
1569     @param ninfo: the node to check
1570     @param nresult: the remote results for the node
1571     @param nimg: the node image object
1572
1573     """
1574     idata = nresult.get(constants.NV_INSTANCELIST, None)
1575     test = not isinstance(idata, list)
1576     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1577                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1578     if test:
1579       nimg.hyp_fail = True
1580     else:
1581       nimg.instances = idata
1582
1583   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1584     """Verifies and computes a node information map
1585
1586     @type ninfo: L{objects.Node}
1587     @param ninfo: the node to check
1588     @param nresult: the remote results for the node
1589     @param nimg: the node image object
1590     @param vg_name: the configured VG name
1591
1592     """
1593     node = ninfo.name
1594     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1595
1596     # try to read free memory (from the hypervisor)
1597     hv_info = nresult.get(constants.NV_HVINFO, None)
1598     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1599     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1600     if not test:
1601       try:
1602         nimg.mfree = int(hv_info["memory_free"])
1603       except (ValueError, TypeError):
1604         _ErrorIf(True, self.ENODERPC, node,
1605                  "node returned invalid nodeinfo, check hypervisor")
1606
1607     # FIXME: devise a free space model for file based instances as well
1608     if vg_name is not None:
1609       test = (constants.NV_VGLIST not in nresult or
1610               vg_name not in nresult[constants.NV_VGLIST])
1611       _ErrorIf(test, self.ENODELVM, node,
1612                "node didn't return data for the volume group '%s'"
1613                " - it is either missing or broken", vg_name)
1614       if not test:
1615         try:
1616           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1617         except (ValueError, TypeError):
1618           _ErrorIf(True, self.ENODERPC, node,
1619                    "node returned invalid LVM info, check LVM status")
1620
1621   def CheckPrereq(self):
1622     """Check prerequisites.
1623
1624     Transform the list of checks we're going to skip into a set and check that
1625     all its members are valid.
1626
1627     """
1628     self.skip_set = frozenset(self.op.skip_checks)
1629     if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1630       raise errors.OpPrereqError("Invalid checks to be skipped specified",
1631                                  errors.ECODE_INVAL)
1632
1633   def BuildHooksEnv(self):
1634     """Build hooks env.
1635
1636     Cluster-Verify hooks just ran in the post phase and their failure makes
1637     the output be logged in the verify output and the verification to fail.
1638
1639     """
1640     all_nodes = self.cfg.GetNodeList()
1641     env = {
1642       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1643       }
1644     for node in self.cfg.GetAllNodesInfo().values():
1645       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1646
1647     return env, [], all_nodes
1648
1649   def Exec(self, feedback_fn):
1650     """Verify integrity of cluster, performing various test on nodes.
1651
1652     """
1653     self.bad = False
1654     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1655     verbose = self.op.verbose
1656     self._feedback_fn = feedback_fn
1657     feedback_fn("* Verifying global settings")
1658     for msg in self.cfg.VerifyConfig():
1659       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1660
1661     # Check the cluster certificates
1662     for cert_filename in constants.ALL_CERT_FILES:
1663       (errcode, msg) = _VerifyCertificate(cert_filename)
1664       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1665
1666     vg_name = self.cfg.GetVGName()
1667     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1668     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1669     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1670     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1671     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1672                         for iname in instancelist)
1673     i_non_redundant = [] # Non redundant instances
1674     i_non_a_balanced = [] # Non auto-balanced instances
1675     n_offline = 0 # Count of offline nodes
1676     n_drained = 0 # Count of nodes being drained
1677     node_vol_should = {}
1678
1679     # FIXME: verify OS list
1680     # do local checksums
1681     master_files = [constants.CLUSTER_CONF_FILE]
1682
1683     file_names = ssconf.SimpleStore().GetFileList()
1684     file_names.extend(constants.ALL_CERT_FILES)
1685     file_names.extend(master_files)
1686
1687     local_checksums = utils.FingerprintFiles(file_names)
1688
1689     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1690     node_verify_param = {
1691       constants.NV_FILELIST: file_names,
1692       constants.NV_NODELIST: [node.name for node in nodeinfo
1693                               if not node.offline],
1694       constants.NV_HYPERVISOR: hypervisors,
1695       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1696                                   node.secondary_ip) for node in nodeinfo
1697                                  if not node.offline],
1698       constants.NV_INSTANCELIST: hypervisors,
1699       constants.NV_VERSION: None,
1700       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1701       constants.NV_NODESETUP: None,
1702       constants.NV_TIME: None,
1703       }
1704
1705     if vg_name is not None:
1706       node_verify_param[constants.NV_VGLIST] = None
1707       node_verify_param[constants.NV_LVLIST] = vg_name
1708       node_verify_param[constants.NV_PVLIST] = [vg_name]
1709       node_verify_param[constants.NV_DRBDLIST] = None
1710
1711     # Build our expected cluster state
1712     node_image = dict((node.name, self.NodeImage(offline=node.offline))
1713                       for node in nodeinfo)
1714
1715     for instance in instancelist:
1716       inst_config = instanceinfo[instance]
1717
1718       for nname in inst_config.all_nodes:
1719         if nname not in node_image:
1720           # ghost node
1721           gnode = self.NodeImage()
1722           gnode.ghost = True
1723           node_image[nname] = gnode
1724
1725       inst_config.MapLVsByNode(node_vol_should)
1726
1727       pnode = inst_config.primary_node
1728       node_image[pnode].pinst.append(instance)
1729
1730       for snode in inst_config.secondary_nodes:
1731         nimg = node_image[snode]
1732         nimg.sinst.append(instance)
1733         if pnode not in nimg.sbp:
1734           nimg.sbp[pnode] = []
1735         nimg.sbp[pnode].append(instance)
1736
1737     # At this point, we have the in-memory data structures complete,
1738     # except for the runtime information, which we'll gather next
1739
1740     # Due to the way our RPC system works, exact response times cannot be
1741     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1742     # time before and after executing the request, we can at least have a time
1743     # window.
1744     nvinfo_starttime = time.time()
1745     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1746                                            self.cfg.GetClusterName())
1747     nvinfo_endtime = time.time()
1748
1749     cluster = self.cfg.GetClusterInfo()
1750     master_node = self.cfg.GetMasterNode()
1751     all_drbd_map = self.cfg.ComputeDRBDMap()
1752
1753     feedback_fn("* Verifying node status")
1754     for node_i in nodeinfo:
1755       node = node_i.name
1756       nimg = node_image[node]
1757
1758       if node_i.offline:
1759         if verbose:
1760           feedback_fn("* Skipping offline node %s" % (node,))
1761         n_offline += 1
1762         continue
1763
1764       if node == master_node:
1765         ntype = "master"
1766       elif node_i.master_candidate:
1767         ntype = "master candidate"
1768       elif node_i.drained:
1769         ntype = "drained"
1770         n_drained += 1
1771       else:
1772         ntype = "regular"
1773       if verbose:
1774         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1775
1776       msg = all_nvinfo[node].fail_msg
1777       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1778       if msg:
1779         nimg.rpc_fail = True
1780         continue
1781
1782       nresult = all_nvinfo[node].payload
1783
1784       nimg.call_ok = self._VerifyNode(node_i, nresult)
1785       self._VerifyNodeNetwork(node_i, nresult)
1786       self._VerifyNodeLVM(node_i, nresult, vg_name)
1787       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1788                             master_files)
1789       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1790       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1791
1792       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1793       self._UpdateNodeInstances(node_i, nresult, nimg)
1794       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1795
1796     feedback_fn("* Verifying instance status")
1797     for instance in instancelist:
1798       if verbose:
1799         feedback_fn("* Verifying instance %s" % instance)
1800       inst_config = instanceinfo[instance]
1801       self._VerifyInstance(instance, inst_config, node_image)
1802       inst_nodes_offline = []
1803
1804       pnode = inst_config.primary_node
1805       pnode_img = node_image[pnode]
1806       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1807                self.ENODERPC, pnode, "instance %s, connection to"
1808                " primary node failed", instance)
1809
1810       if pnode_img.offline:
1811         inst_nodes_offline.append(pnode)
1812
1813       # If the instance is non-redundant we cannot survive losing its primary
1814       # node, so we are not N+1 compliant. On the other hand we have no disk
1815       # templates with more than one secondary so that situation is not well
1816       # supported either.
1817       # FIXME: does not support file-backed instances
1818       if not inst_config.secondary_nodes:
1819         i_non_redundant.append(instance)
1820       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1821                instance, "instance has multiple secondary nodes: %s",
1822                utils.CommaJoin(inst_config.secondary_nodes),
1823                code=self.ETYPE_WARNING)
1824
1825       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1826         i_non_a_balanced.append(instance)
1827
1828       for snode in inst_config.secondary_nodes:
1829         s_img = node_image[snode]
1830         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1831                  "instance %s, connection to secondary node failed", instance)
1832
1833         if s_img.offline:
1834           inst_nodes_offline.append(snode)
1835
1836       # warn that the instance lives on offline nodes
1837       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1838                "instance lives on offline node(s) %s",
1839                utils.CommaJoin(inst_nodes_offline))
1840       # ... or ghost nodes
1841       for node in inst_config.all_nodes:
1842         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1843                  "instance lives on ghost node %s", node)
1844
1845     feedback_fn("* Verifying orphan volumes")
1846     self._VerifyOrphanVolumes(node_vol_should, node_image)
1847
1848     feedback_fn("* Verifying oprhan instances")
1849     self._VerifyOrphanInstances(instancelist, node_image)
1850
1851     if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1852       feedback_fn("* Verifying N+1 Memory redundancy")
1853       self._VerifyNPlusOneMemory(node_image, instanceinfo)
1854
1855     feedback_fn("* Other Notes")
1856     if i_non_redundant:
1857       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1858                   % len(i_non_redundant))
1859
1860     if i_non_a_balanced:
1861       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1862                   % len(i_non_a_balanced))
1863
1864     if n_offline:
1865       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
1866
1867     if n_drained:
1868       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
1869
1870     return not self.bad
1871
1872   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1873     """Analyze the post-hooks' result
1874
1875     This method analyses the hook result, handles it, and sends some
1876     nicely-formatted feedback back to the user.
1877
1878     @param phase: one of L{constants.HOOKS_PHASE_POST} or
1879         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1880     @param hooks_results: the results of the multi-node hooks rpc call
1881     @param feedback_fn: function used send feedback back to the caller
1882     @param lu_result: previous Exec result
1883     @return: the new Exec result, based on the previous result
1884         and hook results
1885
1886     """
1887     # We only really run POST phase hooks, and are only interested in
1888     # their results
1889     if phase == constants.HOOKS_PHASE_POST:
1890       # Used to change hooks' output to proper indentation
1891       indent_re = re.compile('^', re.M)
1892       feedback_fn("* Hooks Results")
1893       assert hooks_results, "invalid result from hooks"
1894
1895       for node_name in hooks_results:
1896         res = hooks_results[node_name]
1897         msg = res.fail_msg
1898         test = msg and not res.offline
1899         self._ErrorIf(test, self.ENODEHOOKS, node_name,
1900                       "Communication failure in hooks execution: %s", msg)
1901         if res.offline or msg:
1902           # No need to investigate payload if node is offline or gave an error.
1903           # override manually lu_result here as _ErrorIf only
1904           # overrides self.bad
1905           lu_result = 1
1906           continue
1907         for script, hkr, output in res.payload:
1908           test = hkr == constants.HKR_FAIL
1909           self._ErrorIf(test, self.ENODEHOOKS, node_name,
1910                         "Script %s failed, output:", script)
1911           if test:
1912             output = indent_re.sub('      ', output)
1913             feedback_fn("%s" % output)
1914             lu_result = 0
1915
1916       return lu_result
1917
1918
1919 class LUVerifyDisks(NoHooksLU):
1920   """Verifies the cluster disks status.
1921
1922   """
1923   _OP_REQP = []
1924   REQ_BGL = False
1925
1926   def ExpandNames(self):
1927     self.needed_locks = {
1928       locking.LEVEL_NODE: locking.ALL_SET,
1929       locking.LEVEL_INSTANCE: locking.ALL_SET,
1930     }
1931     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1932
1933   def CheckPrereq(self):
1934     """Check prerequisites.
1935
1936     This has no prerequisites.
1937
1938     """
1939     pass
1940
1941   def Exec(self, feedback_fn):
1942     """Verify integrity of cluster disks.
1943
1944     @rtype: tuple of three items
1945     @return: a tuple of (dict of node-to-node_error, list of instances
1946         which need activate-disks, dict of instance: (node, volume) for
1947         missing volumes
1948
1949     """
1950     result = res_nodes, res_instances, res_missing = {}, [], {}
1951
1952     vg_name = self.cfg.GetVGName()
1953     nodes = utils.NiceSort(self.cfg.GetNodeList())
1954     instances = [self.cfg.GetInstanceInfo(name)
1955                  for name in self.cfg.GetInstanceList()]
1956
1957     nv_dict = {}
1958     for inst in instances:
1959       inst_lvs = {}
1960       if (not inst.admin_up or
1961           inst.disk_template not in constants.DTS_NET_MIRROR):
1962         continue
1963       inst.MapLVsByNode(inst_lvs)
1964       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1965       for node, vol_list in inst_lvs.iteritems():
1966         for vol in vol_list:
1967           nv_dict[(node, vol)] = inst
1968
1969     if not nv_dict:
1970       return result
1971
1972     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1973
1974     for node in nodes:
1975       # node_volume
1976       node_res = node_lvs[node]
1977       if node_res.offline:
1978         continue
1979       msg = node_res.fail_msg
1980       if msg:
1981         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1982         res_nodes[node] = msg
1983         continue
1984
1985       lvs = node_res.payload
1986       for lv_name, (_, _, lv_online) in lvs.items():
1987         inst = nv_dict.pop((node, lv_name), None)
1988         if (not lv_online and inst is not None
1989             and inst.name not in res_instances):
1990           res_instances.append(inst.name)
1991
1992     # any leftover items in nv_dict are missing LVs, let's arrange the
1993     # data better
1994     for key, inst in nv_dict.iteritems():
1995       if inst.name not in res_missing:
1996         res_missing[inst.name] = []
1997       res_missing[inst.name].append(key)
1998
1999     return result
2000
2001
2002 class LURepairDiskSizes(NoHooksLU):
2003   """Verifies the cluster disks sizes.
2004
2005   """
2006   _OP_REQP = ["instances"]
2007   REQ_BGL = False
2008
2009   def ExpandNames(self):
2010     if not isinstance(self.op.instances, list):
2011       raise errors.OpPrereqError("Invalid argument type 'instances'",
2012                                  errors.ECODE_INVAL)
2013
2014     if self.op.instances:
2015       self.wanted_names = []
2016       for name in self.op.instances:
2017         full_name = _ExpandInstanceName(self.cfg, name)
2018         self.wanted_names.append(full_name)
2019       self.needed_locks = {
2020         locking.LEVEL_NODE: [],
2021         locking.LEVEL_INSTANCE: self.wanted_names,
2022         }
2023       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2024     else:
2025       self.wanted_names = None
2026       self.needed_locks = {
2027         locking.LEVEL_NODE: locking.ALL_SET,
2028         locking.LEVEL_INSTANCE: locking.ALL_SET,
2029         }
2030     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2031
2032   def DeclareLocks(self, level):
2033     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2034       self._LockInstancesNodes(primary_only=True)
2035
2036   def CheckPrereq(self):
2037     """Check prerequisites.
2038
2039     This only checks the optional instance list against the existing names.
2040
2041     """
2042     if self.wanted_names is None:
2043       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2044
2045     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2046                              in self.wanted_names]
2047
2048   def _EnsureChildSizes(self, disk):
2049     """Ensure children of the disk have the needed disk size.
2050
2051     This is valid mainly for DRBD8 and fixes an issue where the
2052     children have smaller disk size.
2053
2054     @param disk: an L{ganeti.objects.Disk} object
2055
2056     """
2057     if disk.dev_type == constants.LD_DRBD8:
2058       assert disk.children, "Empty children for DRBD8?"
2059       fchild = disk.children[0]
2060       mismatch = fchild.size < disk.size
2061       if mismatch:
2062         self.LogInfo("Child disk has size %d, parent %d, fixing",
2063                      fchild.size, disk.size)
2064         fchild.size = disk.size
2065
2066       # and we recurse on this child only, not on the metadev
2067       return self._EnsureChildSizes(fchild) or mismatch
2068     else:
2069       return False
2070
2071   def Exec(self, feedback_fn):
2072     """Verify the size of cluster disks.
2073
2074     """
2075     # TODO: check child disks too
2076     # TODO: check differences in size between primary/secondary nodes
2077     per_node_disks = {}
2078     for instance in self.wanted_instances:
2079       pnode = instance.primary_node
2080       if pnode not in per_node_disks:
2081         per_node_disks[pnode] = []
2082       for idx, disk in enumerate(instance.disks):
2083         per_node_disks[pnode].append((instance, idx, disk))
2084
2085     changed = []
2086     for node, dskl in per_node_disks.items():
2087       newl = [v[2].Copy() for v in dskl]
2088       for dsk in newl:
2089         self.cfg.SetDiskID(dsk, node)
2090       result = self.rpc.call_blockdev_getsizes(node, newl)
2091       if result.fail_msg:
2092         self.LogWarning("Failure in blockdev_getsizes call to node"
2093                         " %s, ignoring", node)
2094         continue
2095       if len(result.data) != len(dskl):
2096         self.LogWarning("Invalid result from node %s, ignoring node results",
2097                         node)
2098         continue
2099       for ((instance, idx, disk), size) in zip(dskl, result.data):
2100         if size is None:
2101           self.LogWarning("Disk %d of instance %s did not return size"
2102                           " information, ignoring", idx, instance.name)
2103           continue
2104         if not isinstance(size, (int, long)):
2105           self.LogWarning("Disk %d of instance %s did not return valid"
2106                           " size information, ignoring", idx, instance.name)
2107           continue
2108         size = size >> 20
2109         if size != disk.size:
2110           self.LogInfo("Disk %d of instance %s has mismatched size,"
2111                        " correcting: recorded %d, actual %d", idx,
2112                        instance.name, disk.size, size)
2113           disk.size = size
2114           self.cfg.Update(instance, feedback_fn)
2115           changed.append((instance.name, idx, size))
2116         if self._EnsureChildSizes(disk):
2117           self.cfg.Update(instance, feedback_fn)
2118           changed.append((instance.name, idx, disk.size))
2119     return changed
2120
2121
2122 class LURenameCluster(LogicalUnit):
2123   """Rename the cluster.
2124
2125   """
2126   HPATH = "cluster-rename"
2127   HTYPE = constants.HTYPE_CLUSTER
2128   _OP_REQP = ["name"]
2129
2130   def BuildHooksEnv(self):
2131     """Build hooks env.
2132
2133     """
2134     env = {
2135       "OP_TARGET": self.cfg.GetClusterName(),
2136       "NEW_NAME": self.op.name,
2137       }
2138     mn = self.cfg.GetMasterNode()
2139     all_nodes = self.cfg.GetNodeList()
2140     return env, [mn], all_nodes
2141
2142   def CheckPrereq(self):
2143     """Verify that the passed name is a valid one.
2144
2145     """
2146     hostname = utils.GetHostInfo(self.op.name)
2147
2148     new_name = hostname.name
2149     self.ip = new_ip = hostname.ip
2150     old_name = self.cfg.GetClusterName()
2151     old_ip = self.cfg.GetMasterIP()
2152     if new_name == old_name and new_ip == old_ip:
2153       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2154                                  " cluster has changed",
2155                                  errors.ECODE_INVAL)
2156     if new_ip != old_ip:
2157       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2158         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2159                                    " reachable on the network. Aborting." %
2160                                    new_ip, errors.ECODE_NOTUNIQUE)
2161
2162     self.op.name = new_name
2163
2164   def Exec(self, feedback_fn):
2165     """Rename the cluster.
2166
2167     """
2168     clustername = self.op.name
2169     ip = self.ip
2170
2171     # shutdown the master IP
2172     master = self.cfg.GetMasterNode()
2173     result = self.rpc.call_node_stop_master(master, False)
2174     result.Raise("Could not disable the master role")
2175
2176     try:
2177       cluster = self.cfg.GetClusterInfo()
2178       cluster.cluster_name = clustername
2179       cluster.master_ip = ip
2180       self.cfg.Update(cluster, feedback_fn)
2181
2182       # update the known hosts file
2183       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2184       node_list = self.cfg.GetNodeList()
2185       try:
2186         node_list.remove(master)
2187       except ValueError:
2188         pass
2189       result = self.rpc.call_upload_file(node_list,
2190                                          constants.SSH_KNOWN_HOSTS_FILE)
2191       for to_node, to_result in result.iteritems():
2192         msg = to_result.fail_msg
2193         if msg:
2194           msg = ("Copy of file %s to node %s failed: %s" %
2195                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2196           self.proc.LogWarning(msg)
2197
2198     finally:
2199       result = self.rpc.call_node_start_master(master, False, False)
2200       msg = result.fail_msg
2201       if msg:
2202         self.LogWarning("Could not re-enable the master role on"
2203                         " the master, please restart manually: %s", msg)
2204
2205
2206 def _RecursiveCheckIfLVMBased(disk):
2207   """Check if the given disk or its children are lvm-based.
2208
2209   @type disk: L{objects.Disk}
2210   @param disk: the disk to check
2211   @rtype: boolean
2212   @return: boolean indicating whether a LD_LV dev_type was found or not
2213
2214   """
2215   if disk.children:
2216     for chdisk in disk.children:
2217       if _RecursiveCheckIfLVMBased(chdisk):
2218         return True
2219   return disk.dev_type == constants.LD_LV
2220
2221
2222 class LUSetClusterParams(LogicalUnit):
2223   """Change the parameters of the cluster.
2224
2225   """
2226   HPATH = "cluster-modify"
2227   HTYPE = constants.HTYPE_CLUSTER
2228   _OP_REQP = []
2229   REQ_BGL = False
2230
2231   def CheckArguments(self):
2232     """Check parameters
2233
2234     """
2235     if not hasattr(self.op, "candidate_pool_size"):
2236       self.op.candidate_pool_size = None
2237     if self.op.candidate_pool_size is not None:
2238       try:
2239         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2240       except (ValueError, TypeError), err:
2241         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2242                                    str(err), errors.ECODE_INVAL)
2243       if self.op.candidate_pool_size < 1:
2244         raise errors.OpPrereqError("At least one master candidate needed",
2245                                    errors.ECODE_INVAL)
2246     _CheckBooleanOpField(self.op, "maintain_node_health")
2247
2248   def ExpandNames(self):
2249     # FIXME: in the future maybe other cluster params won't require checking on
2250     # all nodes to be modified.
2251     self.needed_locks = {
2252       locking.LEVEL_NODE: locking.ALL_SET,
2253     }
2254     self.share_locks[locking.LEVEL_NODE] = 1
2255
2256   def BuildHooksEnv(self):
2257     """Build hooks env.
2258
2259     """
2260     env = {
2261       "OP_TARGET": self.cfg.GetClusterName(),
2262       "NEW_VG_NAME": self.op.vg_name,
2263       }
2264     mn = self.cfg.GetMasterNode()
2265     return env, [mn], [mn]
2266
2267   def CheckPrereq(self):
2268     """Check prerequisites.
2269
2270     This checks whether the given params don't conflict and
2271     if the given volume group is valid.
2272
2273     """
2274     if self.op.vg_name is not None and not self.op.vg_name:
2275       instances = self.cfg.GetAllInstancesInfo().values()
2276       for inst in instances:
2277         for disk in inst.disks:
2278           if _RecursiveCheckIfLVMBased(disk):
2279             raise errors.OpPrereqError("Cannot disable lvm storage while"
2280                                        " lvm-based instances exist",
2281                                        errors.ECODE_INVAL)
2282
2283     node_list = self.acquired_locks[locking.LEVEL_NODE]
2284
2285     # if vg_name not None, checks given volume group on all nodes
2286     if self.op.vg_name:
2287       vglist = self.rpc.call_vg_list(node_list)
2288       for node in node_list:
2289         msg = vglist[node].fail_msg
2290         if msg:
2291           # ignoring down node
2292           self.LogWarning("Error while gathering data on node %s"
2293                           " (ignoring node): %s", node, msg)
2294           continue
2295         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2296                                               self.op.vg_name,
2297                                               constants.MIN_VG_SIZE)
2298         if vgstatus:
2299           raise errors.OpPrereqError("Error on node '%s': %s" %
2300                                      (node, vgstatus), errors.ECODE_ENVIRON)
2301
2302     self.cluster = cluster = self.cfg.GetClusterInfo()
2303     # validate params changes
2304     if self.op.beparams:
2305       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2306       self.new_beparams = objects.FillDict(
2307         cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2308
2309     if self.op.nicparams:
2310       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2311       self.new_nicparams = objects.FillDict(
2312         cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2313       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2314       nic_errors = []
2315
2316       # check all instances for consistency
2317       for instance in self.cfg.GetAllInstancesInfo().values():
2318         for nic_idx, nic in enumerate(instance.nics):
2319           params_copy = copy.deepcopy(nic.nicparams)
2320           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2321
2322           # check parameter syntax
2323           try:
2324             objects.NIC.CheckParameterSyntax(params_filled)
2325           except errors.ConfigurationError, err:
2326             nic_errors.append("Instance %s, nic/%d: %s" %
2327                               (instance.name, nic_idx, err))
2328
2329           # if we're moving instances to routed, check that they have an ip
2330           target_mode = params_filled[constants.NIC_MODE]
2331           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2332             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2333                               (instance.name, nic_idx))
2334       if nic_errors:
2335         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2336                                    "\n".join(nic_errors))
2337
2338     # hypervisor list/parameters
2339     self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2340     if self.op.hvparams:
2341       if not isinstance(self.op.hvparams, dict):
2342         raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2343                                    errors.ECODE_INVAL)
2344       for hv_name, hv_dict in self.op.hvparams.items():
2345         if hv_name not in self.new_hvparams:
2346           self.new_hvparams[hv_name] = hv_dict
2347         else:
2348           self.new_hvparams[hv_name].update(hv_dict)
2349
2350     # os hypervisor parameters
2351     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2352     if self.op.os_hvp:
2353       if not isinstance(self.op.os_hvp, dict):
2354         raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2355                                    errors.ECODE_INVAL)
2356       for os_name, hvs in self.op.os_hvp.items():
2357         if not isinstance(hvs, dict):
2358           raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2359                                       " input"), errors.ECODE_INVAL)
2360         if os_name not in self.new_os_hvp:
2361           self.new_os_hvp[os_name] = hvs
2362         else:
2363           for hv_name, hv_dict in hvs.items():
2364             if hv_name not in self.new_os_hvp[os_name]:
2365               self.new_os_hvp[os_name][hv_name] = hv_dict
2366             else:
2367               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2368
2369     if self.op.enabled_hypervisors is not None:
2370       self.hv_list = self.op.enabled_hypervisors
2371       if not self.hv_list:
2372         raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2373                                    " least one member",
2374                                    errors.ECODE_INVAL)
2375       invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2376       if invalid_hvs:
2377         raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2378                                    " entries: %s" %
2379                                    utils.CommaJoin(invalid_hvs),
2380                                    errors.ECODE_INVAL)
2381     else:
2382       self.hv_list = cluster.enabled_hypervisors
2383
2384     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2385       # either the enabled list has changed, or the parameters have, validate
2386       for hv_name, hv_params in self.new_hvparams.items():
2387         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2388             (self.op.enabled_hypervisors and
2389              hv_name in self.op.enabled_hypervisors)):
2390           # either this is a new hypervisor, or its parameters have changed
2391           hv_class = hypervisor.GetHypervisor(hv_name)
2392           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2393           hv_class.CheckParameterSyntax(hv_params)
2394           _CheckHVParams(self, node_list, hv_name, hv_params)
2395
2396     if self.op.os_hvp:
2397       # no need to check any newly-enabled hypervisors, since the
2398       # defaults have already been checked in the above code-block
2399       for os_name, os_hvp in self.new_os_hvp.items():
2400         for hv_name, hv_params in os_hvp.items():
2401           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2402           # we need to fill in the new os_hvp on top of the actual hv_p
2403           cluster_defaults = self.new_hvparams.get(hv_name, {})
2404           new_osp = objects.FillDict(cluster_defaults, hv_params)
2405           hv_class = hypervisor.GetHypervisor(hv_name)
2406           hv_class.CheckParameterSyntax(new_osp)
2407           _CheckHVParams(self, node_list, hv_name, new_osp)
2408
2409
2410   def Exec(self, feedback_fn):
2411     """Change the parameters of the cluster.
2412
2413     """
2414     if self.op.vg_name is not None:
2415       new_volume = self.op.vg_name
2416       if not new_volume:
2417         new_volume = None
2418       if new_volume != self.cfg.GetVGName():
2419         self.cfg.SetVGName(new_volume)
2420       else:
2421         feedback_fn("Cluster LVM configuration already in desired"
2422                     " state, not changing")
2423     if self.op.hvparams:
2424       self.cluster.hvparams = self.new_hvparams
2425     if self.op.os_hvp:
2426       self.cluster.os_hvp = self.new_os_hvp
2427     if self.op.enabled_hypervisors is not None:
2428       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2429     if self.op.beparams:
2430       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2431     if self.op.nicparams:
2432       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2433
2434     if self.op.candidate_pool_size is not None:
2435       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2436       # we need to update the pool size here, otherwise the save will fail
2437       _AdjustCandidatePool(self, [])
2438
2439     if self.op.maintain_node_health is not None:
2440       self.cluster.maintain_node_health = self.op.maintain_node_health
2441
2442     self.cfg.Update(self.cluster, feedback_fn)
2443
2444
2445 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2446   """Distribute additional files which are part of the cluster configuration.
2447
2448   ConfigWriter takes care of distributing the config and ssconf files, but
2449   there are more files which should be distributed to all nodes. This function
2450   makes sure those are copied.
2451
2452   @param lu: calling logical unit
2453   @param additional_nodes: list of nodes not in the config to distribute to
2454
2455   """
2456   # 1. Gather target nodes
2457   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2458   dist_nodes = lu.cfg.GetOnlineNodeList()
2459   if additional_nodes is not None:
2460     dist_nodes.extend(additional_nodes)
2461   if myself.name in dist_nodes:
2462     dist_nodes.remove(myself.name)
2463
2464   # 2. Gather files to distribute
2465   dist_files = set([constants.ETC_HOSTS,
2466                     constants.SSH_KNOWN_HOSTS_FILE,
2467                     constants.RAPI_CERT_FILE,
2468                     constants.RAPI_USERS_FILE,
2469                     constants.CONFD_HMAC_KEY,
2470                    ])
2471
2472   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2473   for hv_name in enabled_hypervisors:
2474     hv_class = hypervisor.GetHypervisor(hv_name)
2475     dist_files.update(hv_class.GetAncillaryFiles())
2476
2477   # 3. Perform the files upload
2478   for fname in dist_files:
2479     if os.path.exists(fname):
2480       result = lu.rpc.call_upload_file(dist_nodes, fname)
2481       for to_node, to_result in result.items():
2482         msg = to_result.fail_msg
2483         if msg:
2484           msg = ("Copy of file %s to node %s failed: %s" %
2485                  (fname, to_node, msg))
2486           lu.proc.LogWarning(msg)
2487
2488
2489 class LURedistributeConfig(NoHooksLU):
2490   """Force the redistribution of cluster configuration.
2491
2492   This is a very simple LU.
2493
2494   """
2495   _OP_REQP = []
2496   REQ_BGL = False
2497
2498   def ExpandNames(self):
2499     self.needed_locks = {
2500       locking.LEVEL_NODE: locking.ALL_SET,
2501     }
2502     self.share_locks[locking.LEVEL_NODE] = 1
2503
2504   def CheckPrereq(self):
2505     """Check prerequisites.
2506
2507     """
2508
2509   def Exec(self, feedback_fn):
2510     """Redistribute the configuration.
2511
2512     """
2513     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2514     _RedistributeAncillaryFiles(self)
2515
2516
2517 def _WaitForSync(lu, instance, oneshot=False):
2518   """Sleep and poll for an instance's disk to sync.
2519
2520   """
2521   if not instance.disks:
2522     return True
2523
2524   if not oneshot:
2525     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2526
2527   node = instance.primary_node
2528
2529   for dev in instance.disks:
2530     lu.cfg.SetDiskID(dev, node)
2531
2532   # TODO: Convert to utils.Retry
2533
2534   retries = 0
2535   degr_retries = 10 # in seconds, as we sleep 1 second each time
2536   while True:
2537     max_time = 0
2538     done = True
2539     cumul_degraded = False
2540     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2541     msg = rstats.fail_msg
2542     if msg:
2543       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2544       retries += 1
2545       if retries >= 10:
2546         raise errors.RemoteError("Can't contact node %s for mirror data,"
2547                                  " aborting." % node)
2548       time.sleep(6)
2549       continue
2550     rstats = rstats.payload
2551     retries = 0
2552     for i, mstat in enumerate(rstats):
2553       if mstat is None:
2554         lu.LogWarning("Can't compute data for node %s/%s",
2555                            node, instance.disks[i].iv_name)
2556         continue
2557
2558       cumul_degraded = (cumul_degraded or
2559                         (mstat.is_degraded and mstat.sync_percent is None))
2560       if mstat.sync_percent is not None:
2561         done = False
2562         if mstat.estimated_time is not None:
2563           rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2564           max_time = mstat.estimated_time
2565         else:
2566           rem_time = "no time estimate"
2567         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2568                         (instance.disks[i].iv_name, mstat.sync_percent,
2569                          rem_time))
2570
2571     # if we're done but degraded, let's do a few small retries, to
2572     # make sure we see a stable and not transient situation; therefore
2573     # we force restart of the loop
2574     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2575       logging.info("Degraded disks found, %d retries left", degr_retries)
2576       degr_retries -= 1
2577       time.sleep(1)
2578       continue
2579
2580     if done or oneshot:
2581       break
2582
2583     time.sleep(min(60, max_time))
2584
2585   if done:
2586     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2587   return not cumul_degraded
2588
2589
2590 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2591   """Check that mirrors are not degraded.
2592
2593   The ldisk parameter, if True, will change the test from the
2594   is_degraded attribute (which represents overall non-ok status for
2595   the device(s)) to the ldisk (representing the local storage status).
2596
2597   """
2598   lu.cfg.SetDiskID(dev, node)
2599
2600   result = True
2601
2602   if on_primary or dev.AssembleOnSecondary():
2603     rstats = lu.rpc.call_blockdev_find(node, dev)
2604     msg = rstats.fail_msg
2605     if msg:
2606       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2607       result = False
2608     elif not rstats.payload:
2609       lu.LogWarning("Can't find disk on node %s", node)
2610       result = False
2611     else:
2612       if ldisk:
2613         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2614       else:
2615         result = result and not rstats.payload.is_degraded
2616
2617   if dev.children:
2618     for child in dev.children:
2619       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2620
2621   return result
2622
2623
2624 class LUDiagnoseOS(NoHooksLU):
2625   """Logical unit for OS diagnose/query.
2626
2627   """
2628   _OP_REQP = ["output_fields", "names"]
2629   REQ_BGL = False
2630   _FIELDS_STATIC = utils.FieldSet()
2631   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2632   # Fields that need calculation of global os validity
2633   _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2634
2635   def ExpandNames(self):
2636     if self.op.names:
2637       raise errors.OpPrereqError("Selective OS query not supported",
2638                                  errors.ECODE_INVAL)
2639
2640     _CheckOutputFields(static=self._FIELDS_STATIC,
2641                        dynamic=self._FIELDS_DYNAMIC,
2642                        selected=self.op.output_fields)
2643
2644     # Lock all nodes, in shared mode
2645     # Temporary removal of locks, should be reverted later
2646     # TODO: reintroduce locks when they are lighter-weight
2647     self.needed_locks = {}
2648     #self.share_locks[locking.LEVEL_NODE] = 1
2649     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2650
2651   def CheckPrereq(self):
2652     """Check prerequisites.
2653
2654     """
2655
2656   @staticmethod
2657   def _DiagnoseByOS(rlist):
2658     """Remaps a per-node return list into an a per-os per-node dictionary
2659
2660     @param rlist: a map with node names as keys and OS objects as values
2661
2662     @rtype: dict
2663     @return: a dictionary with osnames as keys and as value another map, with
2664         nodes as keys and tuples of (path, status, diagnose) as values, eg::
2665
2666           {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2667                                      (/srv/..., False, "invalid api")],
2668                            "node2": [(/srv/..., True, "")]}
2669           }
2670
2671     """
2672     all_os = {}
2673     # we build here the list of nodes that didn't fail the RPC (at RPC
2674     # level), so that nodes with a non-responding node daemon don't
2675     # make all OSes invalid
2676     good_nodes = [node_name for node_name in rlist
2677                   if not rlist[node_name].fail_msg]
2678     for node_name, nr in rlist.items():
2679       if nr.fail_msg or not nr.payload:
2680         continue
2681       for name, path, status, diagnose, variants in nr.payload:
2682         if name not in all_os:
2683           # build a list of nodes for this os containing empty lists
2684           # for each node in node_list
2685           all_os[name] = {}
2686           for nname in good_nodes:
2687             all_os[name][nname] = []
2688         all_os[name][node_name].append((path, status, diagnose, variants))
2689     return all_os
2690
2691   def Exec(self, feedback_fn):
2692     """Compute the list of OSes.
2693
2694     """
2695     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2696     node_data = self.rpc.call_os_diagnose(valid_nodes)
2697     pol = self._DiagnoseByOS(node_data)
2698     output = []
2699     calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2700     calc_variants = "variants" in self.op.output_fields
2701
2702     for os_name, os_data in pol.items():
2703       row = []
2704       if calc_valid:
2705         valid = True
2706         variants = None
2707         for osl in os_data.values():
2708           valid = valid and osl and osl[0][1]
2709           if not valid:
2710             variants = None
2711             break
2712           if calc_variants:
2713             node_variants = osl[0][3]
2714             if variants is None:
2715               variants = node_variants
2716             else:
2717               variants = [v for v in variants if v in node_variants]
2718
2719       for field in self.op.output_fields:
2720         if field == "name":
2721           val = os_name
2722         elif field == "valid":
2723           val = valid
2724         elif field == "node_status":
2725           # this is just a copy of the dict
2726           val = {}
2727           for node_name, nos_list in os_data.items():
2728             val[node_name] = nos_list
2729         elif field == "variants":
2730           val =  variants
2731         else:
2732           raise errors.ParameterError(field)
2733         row.append(val)
2734       output.append(row)
2735
2736     return output
2737
2738
2739 class LURemoveNode(LogicalUnit):
2740   """Logical unit for removing a node.
2741
2742   """
2743   HPATH = "node-remove"
2744   HTYPE = constants.HTYPE_NODE
2745   _OP_REQP = ["node_name"]
2746
2747   def BuildHooksEnv(self):
2748     """Build hooks env.
2749
2750     This doesn't run on the target node in the pre phase as a failed
2751     node would then be impossible to remove.
2752
2753     """
2754     env = {
2755       "OP_TARGET": self.op.node_name,
2756       "NODE_NAME": self.op.node_name,
2757       }
2758     all_nodes = self.cfg.GetNodeList()
2759     try:
2760       all_nodes.remove(self.op.node_name)
2761     except ValueError:
2762       logging.warning("Node %s which is about to be removed not found"
2763                       " in the all nodes list", self.op.node_name)
2764     return env, all_nodes, all_nodes
2765
2766   def CheckPrereq(self):
2767     """Check prerequisites.
2768
2769     This checks:
2770      - the node exists in the configuration
2771      - it does not have primary or secondary instances
2772      - it's not the master
2773
2774     Any errors are signaled by raising errors.OpPrereqError.
2775
2776     """
2777     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2778     node = self.cfg.GetNodeInfo(self.op.node_name)
2779     assert node is not None
2780
2781     instance_list = self.cfg.GetInstanceList()
2782
2783     masternode = self.cfg.GetMasterNode()
2784     if node.name == masternode:
2785       raise errors.OpPrereqError("Node is the master node,"
2786                                  " you need to failover first.",
2787                                  errors.ECODE_INVAL)
2788
2789     for instance_name in instance_list:
2790       instance = self.cfg.GetInstanceInfo(instance_name)
2791       if node.name in instance.all_nodes:
2792         raise errors.OpPrereqError("Instance %s is still running on the node,"
2793                                    " please remove first." % instance_name,
2794                                    errors.ECODE_INVAL)
2795     self.op.node_name = node.name
2796     self.node = node
2797
2798   def Exec(self, feedback_fn):
2799     """Removes the node from the cluster.
2800
2801     """
2802     node = self.node
2803     logging.info("Stopping the node daemon and removing configs from node %s",
2804                  node.name)
2805
2806     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2807
2808     # Promote nodes to master candidate as needed
2809     _AdjustCandidatePool(self, exceptions=[node.name])
2810     self.context.RemoveNode(node.name)
2811
2812     # Run post hooks on the node before it's removed
2813     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2814     try:
2815       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2816     except:
2817       # pylint: disable-msg=W0702
2818       self.LogWarning("Errors occurred running hooks on %s" % node.name)
2819
2820     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2821     msg = result.fail_msg
2822     if msg:
2823       self.LogWarning("Errors encountered on the remote node while leaving"
2824                       " the cluster: %s", msg)
2825
2826
2827 class LUQueryNodes(NoHooksLU):
2828   """Logical unit for querying nodes.
2829
2830   """
2831   # pylint: disable-msg=W0142
2832   _OP_REQP = ["output_fields", "names", "use_locking"]
2833   REQ_BGL = False
2834
2835   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2836                     "master_candidate", "offline", "drained"]
2837
2838   _FIELDS_DYNAMIC = utils.FieldSet(
2839     "dtotal", "dfree",
2840     "mtotal", "mnode", "mfree",
2841     "bootid",
2842     "ctotal", "cnodes", "csockets",
2843     )
2844
2845   _FIELDS_STATIC = utils.FieldSet(*[
2846     "pinst_cnt", "sinst_cnt",
2847     "pinst_list", "sinst_list",
2848     "pip", "sip", "tags",
2849     "master",
2850     "role"] + _SIMPLE_FIELDS
2851     )
2852
2853   def ExpandNames(self):
2854     _CheckOutputFields(static=self._FIELDS_STATIC,
2855                        dynamic=self._FIELDS_DYNAMIC,
2856                        selected=self.op.output_fields)
2857
2858     self.needed_locks = {}
2859     self.share_locks[locking.LEVEL_NODE] = 1
2860
2861     if self.op.names:
2862       self.wanted = _GetWantedNodes(self, self.op.names)
2863     else:
2864       self.wanted = locking.ALL_SET
2865
2866     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2867     self.do_locking = self.do_node_query and self.op.use_locking
2868     if self.do_locking:
2869       # if we don't request only static fields, we need to lock the nodes
2870       self.needed_locks[locking.LEVEL_NODE] = self.wanted
2871
2872   def CheckPrereq(self):
2873     """Check prerequisites.
2874
2875     """
2876     # The validation of the node list is done in the _GetWantedNodes,
2877     # if non empty, and if empty, there's no validation to do
2878     pass
2879
2880   def Exec(self, feedback_fn):
2881     """Computes the list of nodes and their attributes.
2882
2883     """
2884     all_info = self.cfg.GetAllNodesInfo()
2885     if self.do_locking:
2886       nodenames = self.acquired_locks[locking.LEVEL_NODE]
2887     elif self.wanted != locking.ALL_SET:
2888       nodenames = self.wanted
2889       missing = set(nodenames).difference(all_info.keys())
2890       if missing:
2891         raise errors.OpExecError(
2892           "Some nodes were removed before retrieving their data: %s" % missing)
2893     else:
2894       nodenames = all_info.keys()
2895
2896     nodenames = utils.NiceSort(nodenames)
2897     nodelist = [all_info[name] for name in nodenames]
2898
2899     # begin data gathering
2900
2901     if self.do_node_query:
2902       live_data = {}
2903       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2904                                           self.cfg.GetHypervisorType())
2905       for name in nodenames:
2906         nodeinfo = node_data[name]
2907         if not nodeinfo.fail_msg and nodeinfo.payload:
2908           nodeinfo = nodeinfo.payload
2909           fn = utils.TryConvert
2910           live_data[name] = {
2911             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2912             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2913             "mfree": fn(int, nodeinfo.get('memory_free', None)),
2914             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2915             "dfree": fn(int, nodeinfo.get('vg_free', None)),
2916             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2917             "bootid": nodeinfo.get('bootid', None),
2918             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2919             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2920             }
2921         else:
2922           live_data[name] = {}
2923     else:
2924       live_data = dict.fromkeys(nodenames, {})
2925
2926     node_to_primary = dict([(name, set()) for name in nodenames])
2927     node_to_secondary = dict([(name, set()) for name in nodenames])
2928
2929     inst_fields = frozenset(("pinst_cnt", "pinst_list",
2930                              "sinst_cnt", "sinst_list"))
2931     if inst_fields & frozenset(self.op.output_fields):
2932       inst_data = self.cfg.GetAllInstancesInfo()
2933
2934       for inst in inst_data.values():
2935         if inst.primary_node in node_to_primary:
2936           node_to_primary[inst.primary_node].add(inst.name)
2937         for secnode in inst.secondary_nodes:
2938           if secnode in node_to_secondary:
2939             node_to_secondary[secnode].add(inst.name)
2940
2941     master_node = self.cfg.GetMasterNode()
2942
2943     # end data gathering
2944
2945     output = []
2946     for node in nodelist:
2947       node_output = []
2948       for field in self.op.output_fields:
2949         if field in self._SIMPLE_FIELDS:
2950           val = getattr(node, field)
2951         elif field == "pinst_list":
2952           val = list(node_to_primary[node.name])
2953         elif field == "sinst_list":
2954           val = list(node_to_secondary[node.name])
2955         elif field == "pinst_cnt":
2956           val = len(node_to_primary[node.name])
2957         elif field == "sinst_cnt":
2958           val = len(node_to_secondary[node.name])
2959         elif field == "pip":
2960           val = node.primary_ip
2961         elif field == "sip":
2962           val = node.secondary_ip
2963         elif field == "tags":
2964           val = list(node.GetTags())
2965         elif field == "master":
2966           val = node.name == master_node
2967         elif self._FIELDS_DYNAMIC.Matches(field):
2968           val = live_data[node.name].get(field, None)
2969         elif field == "role":
2970           if node.name == master_node:
2971             val = "M"
2972           elif node.master_candidate:
2973             val = "C"
2974           elif node.drained:
2975             val = "D"
2976           elif node.offline:
2977             val = "O"
2978           else:
2979             val = "R"
2980         else:
2981           raise errors.ParameterError(field)
2982         node_output.append(val)
2983       output.append(node_output)
2984
2985     return output
2986
2987
2988 class LUQueryNodeVolumes(NoHooksLU):
2989   """Logical unit for getting volumes on node(s).
2990
2991   """
2992   _OP_REQP = ["nodes", "output_fields"]
2993   REQ_BGL = False
2994   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2995   _FIELDS_STATIC = utils.FieldSet("node")
2996
2997   def ExpandNames(self):
2998     _CheckOutputFields(static=self._FIELDS_STATIC,
2999                        dynamic=self._FIELDS_DYNAMIC,
3000                        selected=self.op.output_fields)
3001
3002     self.needed_locks = {}
3003     self.share_locks[locking.LEVEL_NODE] = 1
3004     if not self.op.nodes:
3005       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3006     else:
3007       self.needed_locks[locking.LEVEL_NODE] = \
3008         _GetWantedNodes(self, self.op.nodes)
3009
3010   def CheckPrereq(self):
3011     """Check prerequisites.
3012
3013     This checks that the fields required are valid output fields.
3014
3015     """
3016     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3017
3018   def Exec(self, feedback_fn):
3019     """Computes the list of nodes and their attributes.
3020
3021     """
3022     nodenames = self.nodes
3023     volumes = self.rpc.call_node_volumes(nodenames)
3024
3025     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3026              in self.cfg.GetInstanceList()]
3027
3028     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3029
3030     output = []
3031     for node in nodenames:
3032       nresult = volumes[node]
3033       if nresult.offline:
3034         continue
3035       msg = nresult.fail_msg
3036       if msg:
3037         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3038         continue
3039
3040       node_vols = nresult.payload[:]
3041       node_vols.sort(key=lambda vol: vol['dev'])
3042
3043       for vol in node_vols:
3044         node_output = []
3045         for field in self.op.output_fields:
3046           if field == "node":
3047             val = node
3048           elif field == "phys":
3049             val = vol['dev']
3050           elif field == "vg":
3051             val = vol['vg']
3052           elif field == "name":
3053             val = vol['name']
3054           elif field == "size":
3055             val = int(float(vol['size']))
3056           elif field == "instance":
3057             for inst in ilist:
3058               if node not in lv_by_node[inst]:
3059                 continue
3060               if vol['name'] in lv_by_node[inst][node]:
3061                 val = inst.name
3062                 break
3063             else:
3064               val = '-'
3065           else:
3066             raise errors.ParameterError(field)
3067           node_output.append(str(val))
3068
3069         output.append(node_output)
3070
3071     return output
3072
3073
3074 class LUQueryNodeStorage(NoHooksLU):
3075   """Logical unit for getting information on storage units on node(s).
3076
3077   """
3078   _OP_REQP = ["nodes", "storage_type", "output_fields"]
3079   REQ_BGL = False
3080   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3081
3082   def ExpandNames(self):
3083     storage_type = self.op.storage_type
3084
3085     if storage_type not in constants.VALID_STORAGE_TYPES:
3086       raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
3087                                  errors.ECODE_INVAL)
3088
3089     _CheckOutputFields(static=self._FIELDS_STATIC,
3090                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3091                        selected=self.op.output_fields)
3092
3093     self.needed_locks = {}
3094     self.share_locks[locking.LEVEL_NODE] = 1
3095
3096     if self.op.nodes:
3097       self.needed_locks[locking.LEVEL_NODE] = \
3098         _GetWantedNodes(self, self.op.nodes)
3099     else:
3100       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3101
3102   def CheckPrereq(self):
3103     """Check prerequisites.
3104
3105     This checks that the fields required are valid output fields.
3106
3107     """
3108     self.op.name = getattr(self.op, "name", None)
3109
3110     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3111
3112   def Exec(self, feedback_fn):
3113     """Computes the list of nodes and their attributes.
3114
3115     """
3116     # Always get name to sort by
3117     if constants.SF_NAME in self.op.output_fields:
3118       fields = self.op.output_fields[:]
3119     else:
3120       fields = [constants.SF_NAME] + self.op.output_fields
3121
3122     # Never ask for node or type as it's only known to the LU
3123     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3124       while extra in fields:
3125         fields.remove(extra)
3126
3127     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3128     name_idx = field_idx[constants.SF_NAME]
3129
3130     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3131     data = self.rpc.call_storage_list(self.nodes,
3132                                       self.op.storage_type, st_args,
3133                                       self.op.name, fields)
3134
3135     result = []
3136
3137     for node in utils.NiceSort(self.nodes):
3138       nresult = data[node]
3139       if nresult.offline:
3140         continue
3141
3142       msg = nresult.fail_msg
3143       if msg:
3144         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3145         continue
3146
3147       rows = dict([(row[name_idx], row) for row in nresult.payload])
3148
3149       for name in utils.NiceSort(rows.keys()):
3150         row = rows[name]
3151
3152         out = []
3153
3154         for field in self.op.output_fields:
3155           if field == constants.SF_NODE:
3156             val = node
3157           elif field == constants.SF_TYPE:
3158             val = self.op.storage_type
3159           elif field in field_idx:
3160             val = row[field_idx[field]]
3161           else:
3162             raise errors.ParameterError(field)
3163
3164           out.append(val)
3165
3166         result.append(out)
3167
3168     return result
3169
3170
3171 class LUModifyNodeStorage(NoHooksLU):
3172   """Logical unit for modifying a storage volume on a node.
3173
3174   """
3175   _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3176   REQ_BGL = False
3177
3178   def CheckArguments(self):
3179     self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3180
3181     storage_type = self.op.storage_type
3182     if storage_type not in constants.VALID_STORAGE_TYPES:
3183       raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
3184                                  errors.ECODE_INVAL)
3185
3186   def ExpandNames(self):
3187     self.needed_locks = {
3188       locking.LEVEL_NODE: self.op.node_name,
3189       }
3190
3191   def CheckPrereq(self):
3192     """Check prerequisites.
3193
3194     """
3195     storage_type = self.op.storage_type
3196
3197     try:
3198       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3199     except KeyError:
3200       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3201                                  " modified" % storage_type,
3202                                  errors.ECODE_INVAL)
3203
3204     diff = set(self.op.changes.keys()) - modifiable
3205     if diff:
3206       raise errors.OpPrereqError("The following fields can not be modified for"
3207                                  " storage units of type '%s': %r" %
3208                                  (storage_type, list(diff)),
3209                                  errors.ECODE_INVAL)
3210
3211   def Exec(self, feedback_fn):
3212     """Computes the list of nodes and their attributes.
3213
3214     """
3215     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3216     result = self.rpc.call_storage_modify(self.op.node_name,
3217                                           self.op.storage_type, st_args,
3218                                           self.op.name, self.op.changes)
3219     result.Raise("Failed to modify storage unit '%s' on %s" %
3220                  (self.op.name, self.op.node_name))
3221
3222
3223 class LUAddNode(LogicalUnit):
3224   """Logical unit for adding node to the cluster.
3225
3226   """
3227   HPATH = "node-add"
3228   HTYPE = constants.HTYPE_NODE
3229   _OP_REQP = ["node_name"]
3230
3231   def CheckArguments(self):
3232     # validate/normalize the node name
3233     self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3234
3235   def BuildHooksEnv(self):
3236     """Build hooks env.
3237
3238     This will run on all nodes before, and on all nodes + the new node after.
3239
3240     """
3241     env = {
3242       "OP_TARGET": self.op.node_name,
3243       "NODE_NAME": self.op.node_name,
3244       "NODE_PIP": self.op.primary_ip,
3245       "NODE_SIP": self.op.secondary_ip,
3246       }
3247     nodes_0 = self.cfg.GetNodeList()
3248     nodes_1 = nodes_0 + [self.op.node_name, ]
3249     return env, nodes_0, nodes_1
3250
3251   def CheckPrereq(self):
3252     """Check prerequisites.
3253
3254     This checks:
3255      - the new node is not already in the config
3256      - it is resolvable
3257      - its parameters (single/dual homed) matches the cluster
3258
3259     Any errors are signaled by raising errors.OpPrereqError.
3260
3261     """
3262     node_name = self.op.node_name
3263     cfg = self.cfg
3264
3265     dns_data = utils.GetHostInfo(node_name)
3266
3267     node = dns_data.name
3268     primary_ip = self.op.primary_ip = dns_data.ip
3269     secondary_ip = getattr(self.op, "secondary_ip", None)
3270     if secondary_ip is None:
3271       secondary_ip = primary_ip
3272     if not utils.IsValidIP(secondary_ip):
3273       raise errors.OpPrereqError("Invalid secondary IP given",
3274                                  errors.ECODE_INVAL)
3275     self.op.secondary_ip = secondary_ip
3276
3277     node_list = cfg.GetNodeList()
3278     if not self.op.readd and node in node_list:
3279       raise errors.OpPrereqError("Node %s is already in the configuration" %
3280                                  node, errors.ECODE_EXISTS)
3281     elif self.op.readd and node not in node_list:
3282       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3283                                  errors.ECODE_NOENT)
3284
3285     for existing_node_name in node_list:
3286       existing_node = cfg.GetNodeInfo(existing_node_name)
3287
3288       if self.op.readd and node == existing_node_name:
3289         if (existing_node.primary_ip != primary_ip or
3290             existing_node.secondary_ip != secondary_ip):
3291           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3292                                      " address configuration as before",
3293                                      errors.ECODE_INVAL)
3294         continue
3295
3296       if (existing_node.primary_ip == primary_ip or
3297           existing_node.secondary_ip == primary_ip or
3298           existing_node.primary_ip == secondary_ip or
3299           existing_node.secondary_ip == secondary_ip):
3300         raise errors.OpPrereqError("New node ip address(es) conflict with"
3301                                    " existing node %s" % existing_node.name,
3302                                    errors.ECODE_NOTUNIQUE)
3303
3304     # check that the type of the node (single versus dual homed) is the
3305     # same as for the master
3306     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3307     master_singlehomed = myself.secondary_ip == myself.primary_ip
3308     newbie_singlehomed = secondary_ip == primary_ip
3309     if master_singlehomed != newbie_singlehomed:
3310       if master_singlehomed:
3311         raise errors.OpPrereqError("The master has no private ip but the"
3312                                    " new node has one",
3313                                    errors.ECODE_INVAL)
3314       else:
3315         raise errors.OpPrereqError("The master has a private ip but the"
3316                                    " new node doesn't have one",
3317                                    errors.ECODE_INVAL)
3318
3319     # checks reachability
3320     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3321       raise errors.OpPrereqError("Node not reachable by ping",
3322                                  errors.ECODE_ENVIRON)
3323
3324     if not newbie_singlehomed:
3325       # check reachability from my secondary ip to newbie's secondary ip
3326       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3327                            source=myself.secondary_ip):
3328         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3329                                    " based ping to noded port",
3330                                    errors.ECODE_ENVIRON)
3331
3332     if self.op.readd:
3333       exceptions = [node]
3334     else:
3335       exceptions = []
3336
3337     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3338
3339     if self.op.readd:
3340       self.new_node = self.cfg.GetNodeInfo(node)
3341       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3342     else:
3343       self.new_node = objects.Node(name=node,
3344                                    primary_ip=primary_ip,
3345                                    secondary_ip=secondary_ip,
3346                                    master_candidate=self.master_candidate,
3347                                    offline=False, drained=False)
3348
3349   def Exec(self, feedback_fn):
3350     """Adds the new node to the cluster.
3351
3352     """
3353     new_node = self.new_node
3354     node = new_node.name
3355
3356     # for re-adds, reset the offline/drained/master-candidate flags;
3357     # we need to reset here, otherwise offline would prevent RPC calls
3358     # later in the procedure; this also means that if the re-add
3359     # fails, we are left with a non-offlined, broken node
3360     if self.op.readd:
3361       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3362       self.LogInfo("Readding a node, the offline/drained flags were reset")
3363       # if we demote the node, we do cleanup later in the procedure
3364       new_node.master_candidate = self.master_candidate
3365
3366     # notify the user about any possible mc promotion
3367     if new_node.master_candidate:
3368       self.LogInfo("Node will be a master candidate")
3369
3370     # check connectivity
3371     result = self.rpc.call_version([node])[node]
3372     result.Raise("Can't get version information from node %s" % node)
3373     if constants.PROTOCOL_VERSION == result.payload:
3374       logging.info("Communication to node %s fine, sw version %s match",
3375                    node, result.payload)
3376     else:
3377       raise errors.OpExecError("Version mismatch master version %s,"
3378                                " node version %s" %
3379                                (constants.PROTOCOL_VERSION, result.payload))
3380
3381     # setup ssh on node
3382     if self.cfg.GetClusterInfo().modify_ssh_setup:
3383       logging.info("Copy ssh key to node %s", node)
3384       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3385       keyarray = []
3386       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3387                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3388                   priv_key, pub_key]
3389
3390       for i in keyfiles:
3391         keyarray.append(utils.ReadFile(i))
3392
3393       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3394                                       keyarray[2], keyarray[3], keyarray[4],
3395                                       keyarray[5])
3396       result.Raise("Cannot transfer ssh keys to the new node")
3397
3398     # Add node to our /etc/hosts, and add key to known_hosts
3399     if self.cfg.GetClusterInfo().modify_etc_hosts:
3400       utils.AddHostToEtcHosts(new_node.name)
3401
3402     if new_node.secondary_ip != new_node.primary_ip:
3403       result = self.rpc.call_node_has_ip_address(new_node.name,
3404                                                  new_node.secondary_ip)
3405       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3406                    prereq=True, ecode=errors.ECODE_ENVIRON)
3407       if not result.payload:
3408         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3409                                  " you gave (%s). Please fix and re-run this"
3410                                  " command." % new_node.secondary_ip)
3411
3412     node_verify_list = [self.cfg.GetMasterNode()]
3413     node_verify_param = {
3414       constants.NV_NODELIST: [node],
3415       # TODO: do a node-net-test as well?
3416     }
3417
3418     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3419                                        self.cfg.GetClusterName())
3420     for verifier in node_verify_list:
3421       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3422       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3423       if nl_payload:
3424         for failed in nl_payload:
3425           feedback_fn("ssh/hostname verification failed"
3426                       " (checking from %s): %s" %
3427                       (verifier, nl_payload[failed]))
3428         raise errors.OpExecError("ssh/hostname verification failed.")
3429
3430     if self.op.readd:
3431       _RedistributeAncillaryFiles(self)
3432       self.context.ReaddNode(new_node)
3433       # make sure we redistribute the config
3434       self.cfg.Update(new_node, feedback_fn)
3435       # and make sure the new node will not have old files around
3436       if not new_node.master_candidate:
3437         result = self.rpc.call_node_demote_from_mc(new_node.name)
3438         msg = result.fail_msg
3439         if msg:
3440           self.LogWarning("Node failed to demote itself from master"
3441                           " candidate status: %s" % msg)
3442     else:
3443       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3444       self.context.AddNode(new_node, self.proc.GetECId())
3445
3446
3447 class LUSetNodeParams(LogicalUnit):
3448   """Modifies the parameters of a node.
3449
3450   """
3451   HPATH = "node-modify"
3452   HTYPE = constants.HTYPE_NODE
3453   _OP_REQP = ["node_name"]
3454   REQ_BGL = False
3455
3456   def CheckArguments(self):
3457     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3458     _CheckBooleanOpField(self.op, 'master_candidate')
3459     _CheckBooleanOpField(self.op, 'offline')
3460     _CheckBooleanOpField(self.op, 'drained')
3461     _CheckBooleanOpField(self.op, 'auto_promote')
3462     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3463     if all_mods.count(None) == 3:
3464       raise errors.OpPrereqError("Please pass at least one modification",
3465                                  errors.ECODE_INVAL)
3466     if all_mods.count(True) > 1:
3467       raise errors.OpPrereqError("Can't set the node into more than one"
3468                                  " state at the same time",
3469                                  errors.ECODE_INVAL)
3470
3471     # Boolean value that tells us whether we're offlining or draining the node
3472     self.offline_or_drain = (self.op.offline == True or
3473                              self.op.drained == True)
3474     self.deoffline_or_drain = (self.op.offline == False or
3475                                self.op.drained == False)
3476     self.might_demote = (self.op.master_candidate == False or
3477                          self.offline_or_drain)
3478
3479     self.lock_all = self.op.auto_promote and self.might_demote
3480
3481
3482   def ExpandNames(self):
3483     if self.lock_all:
3484       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3485     else:
3486       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3487
3488   def BuildHooksEnv(self):
3489     """Build hooks env.
3490
3491     This runs on the master node.
3492
3493     """
3494     env = {
3495       "OP_TARGET": self.op.node_name,
3496       "MASTER_CANDIDATE": str(self.op.master_candidate),
3497       "OFFLINE": str(self.op.offline),
3498       "DRAINED": str(self.op.drained),
3499       }
3500     nl = [self.cfg.GetMasterNode(),
3501           self.op.node_name]
3502     return env, nl, nl
3503
3504   def CheckPrereq(self):
3505     """Check prerequisites.
3506
3507     This only checks the instance list against the existing names.
3508
3509     """
3510     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3511
3512     if (self.op.master_candidate is not None or
3513         self.op.drained is not None or
3514         self.op.offline is not None):
3515       # we can't change the master's node flags
3516       if self.op.node_name == self.cfg.GetMasterNode():
3517         raise errors.OpPrereqError("The master role can be changed"
3518                                    " only via masterfailover",
3519                                    errors.ECODE_INVAL)
3520
3521
3522     if node.master_candidate and self.might_demote and not self.lock_all:
3523       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3524       # check if after removing the current node, we're missing master
3525       # candidates
3526       (mc_remaining, mc_should, _) = \
3527           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3528       if mc_remaining < mc_should:
3529         raise errors.OpPrereqError("Not enough master candidates, please"
3530                                    " pass auto_promote to allow promotion",
3531                                    errors.ECODE_INVAL)
3532
3533     if (self.op.master_candidate == True and
3534         ((node.offline and not self.op.offline == False) or
3535          (node.drained and not self.op.drained == False))):
3536       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3537                                  " to master_candidate" % node.name,
3538                                  errors.ECODE_INVAL)
3539
3540     # If we're being deofflined/drained, we'll MC ourself if needed
3541     if (self.deoffline_or_drain and not self.offline_or_drain and not
3542         self.op.master_candidate == True and not node.master_candidate):
3543       self.op.master_candidate = _DecideSelfPromotion(self)
3544       if self.op.master_candidate:
3545         self.LogInfo("Autopromoting node to master candidate")
3546
3547     return
3548
3549   def Exec(self, feedback_fn):
3550     """Modifies a node.
3551
3552     """
3553     node = self.node
3554
3555     result = []
3556     changed_mc = False
3557
3558     if self.op.offline is not None:
3559       node.offline = self.op.offline
3560       result.append(("offline", str(self.op.offline)))
3561       if self.op.offline == True:
3562         if node.master_candidate:
3563           node.master_candidate = False
3564           changed_mc = True
3565           result.append(("master_candidate", "auto-demotion due to offline"))
3566         if node.drained:
3567           node.drained = False
3568           result.append(("drained", "clear drained status due to offline"))
3569
3570     if self.op.master_candidate is not None:
3571       node.master_candidate = self.op.master_candidate
3572       changed_mc = True
3573       result.append(("master_candidate", str(self.op.master_candidate)))
3574       if self.op.master_candidate == False:
3575         rrc = self.rpc.call_node_demote_from_mc(node.name)
3576         msg = rrc.fail_msg
3577         if msg:
3578           self.LogWarning("Node failed to demote itself: %s" % msg)
3579
3580     if self.op.drained is not None:
3581       node.drained = self.op.drained
3582       result.append(("drained", str(self.op.drained)))
3583       if self.op.drained == True:
3584         if node.master_candidate:
3585           node.master_candidate = False
3586           changed_mc = True
3587           result.append(("master_candidate", "auto-demotion due to drain"))
3588           rrc = self.rpc.call_node_demote_from_mc(node.name)
3589           msg = rrc.fail_msg
3590           if msg:
3591             self.LogWarning("Node failed to demote itself: %s" % msg)
3592         if node.offline:
3593           node.offline = False
3594           result.append(("offline", "clear offline status due to drain"))
3595
3596     # we locked all nodes, we adjust the CP before updating this node
3597     if self.lock_all:
3598       _AdjustCandidatePool(self, [node.name])
3599
3600     # this will trigger configuration file update, if needed
3601     self.cfg.Update(node, feedback_fn)
3602
3603     # this will trigger job queue propagation or cleanup
3604     if changed_mc:
3605       self.context.ReaddNode(node)
3606
3607     return result
3608
3609
3610 class LUPowercycleNode(NoHooksLU):
3611   """Powercycles a node.
3612
3613   """
3614   _OP_REQP = ["node_name", "force"]
3615   REQ_BGL = False
3616
3617   def CheckArguments(self):
3618     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3619     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3620       raise errors.OpPrereqError("The node is the master and the force"
3621                                  " parameter was not set",
3622                                  errors.ECODE_INVAL)
3623
3624   def ExpandNames(self):
3625     """Locking for PowercycleNode.
3626
3627     This is a last-resort option and shouldn't block on other
3628     jobs. Therefore, we grab no locks.
3629
3630     """
3631     self.needed_locks = {}
3632
3633   def CheckPrereq(self):
3634     """Check prerequisites.
3635
3636     This LU has no prereqs.
3637
3638     """
3639     pass
3640
3641   def Exec(self, feedback_fn):
3642     """Reboots a node.
3643
3644     """
3645     result = self.rpc.call_node_powercycle(self.op.node_name,
3646                                            self.cfg.GetHypervisorType())
3647     result.Raise("Failed to schedule the reboot")
3648     return result.payload
3649
3650
3651 class LUQueryClusterInfo(NoHooksLU):
3652   """Query cluster configuration.
3653
3654   """
3655   _OP_REQP = []
3656   REQ_BGL = False
3657
3658   def ExpandNames(self):
3659     self.needed_locks = {}
3660
3661   def CheckPrereq(self):
3662     """No prerequsites needed for this LU.
3663
3664     """
3665     pass
3666
3667   def Exec(self, feedback_fn):
3668     """Return cluster config.
3669
3670     """
3671     cluster = self.cfg.GetClusterInfo()
3672     os_hvp = {}
3673
3674     # Filter just for enabled hypervisors
3675     for os_name, hv_dict in cluster.os_hvp.items():
3676       os_hvp[os_name] = {}
3677       for hv_name, hv_params in hv_dict.items():
3678         if hv_name in cluster.enabled_hypervisors:
3679           os_hvp[os_name][hv_name] = hv_params
3680
3681     result = {
3682       "software_version": constants.RELEASE_VERSION,
3683       "protocol_version": constants.PROTOCOL_VERSION,
3684       "config_version": constants.CONFIG_VERSION,
3685       "os_api_version": max(constants.OS_API_VERSIONS),
3686       "export_version": constants.EXPORT_VERSION,
3687       "architecture": (platform.architecture()[0], platform.machine()),
3688       "name": cluster.cluster_name,
3689       "master": cluster.master_node,
3690       "default_hypervisor": cluster.enabled_hypervisors[0],
3691       "enabled_hypervisors": cluster.enabled_hypervisors,
3692       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3693                         for hypervisor_name in cluster.enabled_hypervisors]),
3694       "os_hvp": os_hvp,
3695       "beparams": cluster.beparams,
3696       "nicparams": cluster.nicparams,
3697       "candidate_pool_size": cluster.candidate_pool_size,
3698       "master_netdev": cluster.master_netdev,
3699       "volume_group_name": cluster.volume_group_name,
3700       "file_storage_dir": cluster.file_storage_dir,
3701       "maintain_node_health": cluster.maintain_node_health,
3702       "ctime": cluster.ctime,
3703       "mtime": cluster.mtime,
3704       "uuid": cluster.uuid,
3705       "tags": list(cluster.GetTags()),
3706       }
3707
3708     return result
3709
3710
3711 class LUQueryConfigValues(NoHooksLU):
3712   """Return configuration values.
3713
3714   """
3715   _OP_REQP = []
3716   REQ_BGL = False
3717   _FIELDS_DYNAMIC = utils.FieldSet()
3718   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3719                                   "watcher_pause")
3720
3721   def ExpandNames(self):
3722     self.needed_locks = {}
3723
3724     _CheckOutputFields(static=self._FIELDS_STATIC,
3725                        dynamic=self._FIELDS_DYNAMIC,
3726                        selected=self.op.output_fields)
3727
3728   def CheckPrereq(self):
3729     """No prerequisites.
3730
3731     """
3732     pass
3733
3734   def Exec(self, feedback_fn):
3735     """Dump a representation of the cluster config to the standard output.
3736
3737     """
3738     values = []
3739     for field in self.op.output_fields:
3740       if field == "cluster_name":
3741         entry = self.cfg.GetClusterName()
3742       elif field == "master_node":
3743         entry = self.cfg.GetMasterNode()
3744       elif field == "drain_flag":
3745         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3746       elif field == "watcher_pause":
3747         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3748       else:
3749         raise errors.ParameterError(field)
3750       values.append(entry)
3751     return values
3752
3753
3754 class LUActivateInstanceDisks(NoHooksLU):
3755   """Bring up an instance's disks.
3756
3757   """
3758   _OP_REQP = ["instance_name"]
3759   REQ_BGL = False
3760
3761   def ExpandNames(self):
3762     self._ExpandAndLockInstance()
3763     self.needed_locks[locking.LEVEL_NODE] = []
3764     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3765
3766   def DeclareLocks(self, level):
3767     if level == locking.LEVEL_NODE:
3768       self._LockInstancesNodes()
3769
3770   def CheckPrereq(self):
3771     """Check prerequisites.
3772
3773     This checks that the instance is in the cluster.
3774
3775     """
3776     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3777     assert self.instance is not None, \
3778       "Cannot retrieve locked instance %s" % self.op.instance_name
3779     _CheckNodeOnline(self, self.instance.primary_node)
3780     if not hasattr(self.op, "ignore_size"):
3781       self.op.ignore_size = False
3782
3783   def Exec(self, feedback_fn):
3784     """Activate the disks.
3785
3786     """
3787     disks_ok, disks_info = \
3788               _AssembleInstanceDisks(self, self.instance,
3789                                      ignore_size=self.op.ignore_size)
3790     if not disks_ok:
3791       raise errors.OpExecError("Cannot activate block devices")
3792
3793     return disks_info
3794
3795
3796 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3797                            ignore_size=False):
3798   """Prepare the block devices for an instance.
3799
3800   This sets up the block devices on all nodes.
3801
3802   @type lu: L{LogicalUnit}
3803   @param lu: the logical unit on whose behalf we execute
3804   @type instance: L{objects.Instance}
3805   @param instance: the instance for whose disks we assemble
3806   @type ignore_secondaries: boolean
3807   @param ignore_secondaries: if true, errors on secondary nodes
3808       won't result in an error return from the function
3809   @type ignore_size: boolean
3810   @param ignore_size: if true, the current known size of the disk
3811       will not be used during the disk activation, useful for cases
3812       when the size is wrong
3813   @return: False if the operation failed, otherwise a list of
3814       (host, instance_visible_name, node_visible_name)
3815       with the mapping from node devices to instance devices
3816
3817   """
3818   device_info = []
3819   disks_ok = True
3820   iname = instance.name
3821   # With the two passes mechanism we try to reduce the window of
3822   # opportunity for the race condition of switching DRBD to primary
3823   # before handshaking occured, but we do not eliminate it
3824
3825   # The proper fix would be to wait (with some limits) until the
3826   # connection has been made and drbd transitions from WFConnection
3827   # into any other network-connected state (Connected, SyncTarget,
3828   # SyncSource, etc.)
3829
3830   # 1st pass, assemble on all nodes in secondary mode
3831   for inst_disk in instance.disks:
3832     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3833       if ignore_size:
3834         node_disk = node_disk.Copy()
3835         node_disk.UnsetSize()
3836       lu.cfg.SetDiskID(node_disk, node)
3837       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3838       msg = result.fail_msg
3839       if msg:
3840         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3841                            " (is_primary=False, pass=1): %s",
3842                            inst_disk.iv_name, node, msg)
3843         if not ignore_secondaries:
3844           disks_ok = False
3845
3846   # FIXME: race condition on drbd migration to primary
3847
3848   # 2nd pass, do only the primary node
3849   for inst_disk in instance.disks:
3850     dev_path = None
3851
3852     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3853       if node != instance.primary_node:
3854         continue
3855       if ignore_size:
3856         node_disk = node_disk.Copy()
3857         node_disk.UnsetSize()
3858       lu.cfg.SetDiskID(node_disk, node)
3859       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3860       msg = result.fail_msg
3861       if msg:
3862         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3863                            " (is_primary=True, pass=2): %s",
3864                            inst_disk.iv_name, node, msg)
3865         disks_ok = False
3866       else:
3867         dev_path = result.payload
3868
3869     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3870
3871   # leave the disks configured for the primary node
3872   # this is a workaround that would be fixed better by
3873   # improving the logical/physical id handling
3874   for disk in instance.disks:
3875     lu.cfg.SetDiskID(disk, instance.primary_node)
3876
3877   return disks_ok, device_info
3878
3879
3880 def _StartInstanceDisks(lu, instance, force):
3881   """Start the disks of an instance.
3882
3883   """
3884   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3885                                            ignore_secondaries=force)
3886   if not disks_ok:
3887     _ShutdownInstanceDisks(lu, instance)
3888     if force is not None and not force:
3889       lu.proc.LogWarning("", hint="If the message above refers to a"
3890                          " secondary node,"
3891                          " you can retry the operation using '--force'.")
3892     raise errors.OpExecError("Disk consistency error")
3893
3894
3895 class LUDeactivateInstanceDisks(NoHooksLU):
3896   """Shutdown an instance's disks.
3897
3898   """
3899   _OP_REQP = ["instance_name"]
3900   REQ_BGL = False
3901
3902   def ExpandNames(self):
3903     self._ExpandAndLockInstance()
3904     self.needed_locks[locking.LEVEL_NODE] = []
3905     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3906
3907   def DeclareLocks(self, level):
3908     if level == locking.LEVEL_NODE:
3909       self._LockInstancesNodes()
3910
3911   def CheckPrereq(self):
3912     """Check prerequisites.
3913
3914     This checks that the instance is in the cluster.
3915
3916     """
3917     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3918     assert self.instance is not None, \
3919       "Cannot retrieve locked instance %s" % self.op.instance_name
3920
3921   def Exec(self, feedback_fn):
3922     """Deactivate the disks
3923
3924     """
3925     instance = self.instance
3926     _SafeShutdownInstanceDisks(self, instance)
3927
3928
3929 def _SafeShutdownInstanceDisks(lu, instance):
3930   """Shutdown block devices of an instance.
3931
3932   This function checks if an instance is running, before calling
3933   _ShutdownInstanceDisks.
3934
3935   """
3936   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3937   _ShutdownInstanceDisks(lu, instance)
3938
3939
3940 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3941   """Shutdown block devices of an instance.
3942
3943   This does the shutdown on all nodes of the instance.
3944
3945   If the ignore_primary is false, errors on the primary node are
3946   ignored.
3947
3948   """
3949   all_result = True
3950   for disk in instance.disks:
3951     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3952       lu.cfg.SetDiskID(top_disk, node)
3953       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3954       msg = result.fail_msg
3955       if msg:
3956         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3957                       disk.iv_name, node, msg)
3958         if not ignore_primary or node != instance.primary_node:
3959           all_result = False
3960   return all_result
3961
3962
3963 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3964   """Checks if a node has enough free memory.
3965
3966   This function check if a given node has the needed amount of free
3967   memory. In case the node has less memory or we cannot get the
3968   information from the node, this function raise an OpPrereqError
3969   exception.
3970
3971   @type lu: C{LogicalUnit}
3972   @param lu: a logical unit from which we get configuration data
3973   @type node: C{str}
3974   @param node: the node to check
3975   @type reason: C{str}
3976   @param reason: string to use in the error message
3977   @type requested: C{int}
3978   @param requested: the amount of memory in MiB to check for
3979   @type hypervisor_name: C{str}
3980   @param hypervisor_name: the hypervisor to ask for memory stats
3981   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3982       we cannot check the node
3983
3984   """
3985   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3986   nodeinfo[node].Raise("Can't get data from node %s" % node,
3987                        prereq=True, ecode=errors.ECODE_ENVIRON)
3988   free_mem = nodeinfo[node].payload.get('memory_free', None)
3989   if not isinstance(free_mem, int):
3990     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3991                                " was '%s'" % (node, free_mem),
3992                                errors.ECODE_ENVIRON)
3993   if requested > free_mem:
3994     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3995                                " needed %s MiB, available %s MiB" %
3996                                (node, reason, requested, free_mem),
3997                                errors.ECODE_NORES)
3998
3999
4000 def _CheckNodesFreeDisk(lu, nodenames, requested):
4001   """Checks if nodes have enough free disk space in the default VG.
4002
4003   This function check if all given nodes have the needed amount of
4004   free disk. In case any node has less disk or we cannot get the
4005   information from the node, this function raise an OpPrereqError
4006   exception.
4007
4008   @type lu: C{LogicalUnit}
4009   @param lu: a logical unit from which we get configuration data
4010   @type nodenames: C{list}
4011   @param nodenames: the list of node names to check
4012   @type requested: C{int}
4013   @param requested: the amount of disk in MiB to check for
4014   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4015       we cannot check the node
4016
4017   """
4018   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4019                                    lu.cfg.GetHypervisorType())
4020   for node in nodenames:
4021     info = nodeinfo[node]
4022     info.Raise("Cannot get current information from node %s" % node,
4023                prereq=True, ecode=errors.ECODE_ENVIRON)
4024     vg_free = info.payload.get("vg_free", None)
4025     if not isinstance(vg_free, int):
4026       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4027                                  " result was '%s'" % (node, vg_free),
4028                                  errors.ECODE_ENVIRON)
4029     if requested > vg_free:
4030       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4031                                  " required %d MiB, available %d MiB" %
4032                                  (node, requested, vg_free),
4033                                  errors.ECODE_NORES)
4034
4035
4036 class LUStartupInstance(LogicalUnit):
4037   """Starts an instance.
4038
4039   """
4040   HPATH = "instance-start"
4041   HTYPE = constants.HTYPE_INSTANCE
4042   _OP_REQP = ["instance_name", "force"]
4043   REQ_BGL = False
4044
4045   def ExpandNames(self):
4046     self._ExpandAndLockInstance()
4047
4048   def BuildHooksEnv(self):
4049     """Build hooks env.
4050
4051     This runs on master, primary and secondary nodes of the instance.
4052
4053     """
4054     env = {
4055       "FORCE": self.op.force,
4056       }
4057     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4058     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4059     return env, nl, nl
4060
4061   def CheckPrereq(self):
4062     """Check prerequisites.
4063
4064     This checks that the instance is in the cluster.
4065
4066     """
4067     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4068     assert self.instance is not None, \
4069       "Cannot retrieve locked instance %s" % self.op.instance_name
4070
4071     # extra beparams
4072     self.beparams = getattr(self.op, "beparams", {})
4073     if self.beparams:
4074       if not isinstance(self.beparams, dict):
4075         raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4076                                    " dict" % (type(self.beparams), ),
4077                                    errors.ECODE_INVAL)
4078       # fill the beparams dict
4079       utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4080       self.op.beparams = self.beparams
4081
4082     # extra hvparams
4083     self.hvparams = getattr(self.op, "hvparams", {})
4084     if self.hvparams:
4085       if not isinstance(self.hvparams, dict):
4086         raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4087                                    " dict" % (type(self.hvparams), ),
4088                                    errors.ECODE_INVAL)
4089
4090       # check hypervisor parameter syntax (locally)
4091       cluster = self.cfg.GetClusterInfo()
4092       utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4093       filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4094                                     instance.hvparams)
4095       filled_hvp.update(self.hvparams)
4096       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4097       hv_type.CheckParameterSyntax(filled_hvp)
4098       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4099       self.op.hvparams = self.hvparams
4100
4101     _CheckNodeOnline(self, instance.primary_node)
4102
4103     bep = self.cfg.GetClusterInfo().FillBE(instance)
4104     # check bridges existence
4105     _CheckInstanceBridgesExist(self, instance)
4106
4107     remote_info = self.rpc.call_instance_info(instance.primary_node,
4108                                               instance.name,
4109                                               instance.hypervisor)
4110     remote_info.Raise("Error checking node %s" % instance.primary_node,
4111                       prereq=True, ecode=errors.ECODE_ENVIRON)
4112     if not remote_info.payload: # not running already
4113       _CheckNodeFreeMemory(self, instance.primary_node,
4114                            "starting instance %s" % instance.name,
4115                            bep[constants.BE_MEMORY], instance.hypervisor)
4116
4117   def Exec(self, feedback_fn):
4118     """Start the instance.
4119
4120     """
4121     instance = self.instance
4122     force = self.op.force
4123
4124     self.cfg.MarkInstanceUp(instance.name)
4125
4126     node_current = instance.primary_node
4127
4128     _StartInstanceDisks(self, instance, force)
4129
4130     result = self.rpc.call_instance_start(node_current, instance,
4131                                           self.hvparams, self.beparams)
4132     msg = result.fail_msg
4133     if msg:
4134       _ShutdownInstanceDisks(self, instance)
4135       raise errors.OpExecError("Could not start instance: %s" % msg)
4136
4137
4138 class LURebootInstance(LogicalUnit):
4139   """Reboot an instance.
4140
4141   """
4142   HPATH = "instance-reboot"
4143   HTYPE = constants.HTYPE_INSTANCE
4144   _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4145   REQ_BGL = False
4146
4147   def CheckArguments(self):
4148     """Check the arguments.
4149
4150     """
4151     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4152                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4153
4154   def ExpandNames(self):
4155     if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4156                                    constants.INSTANCE_REBOOT_HARD,
4157                                    constants.INSTANCE_REBOOT_FULL]:
4158       raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4159                                   (constants.INSTANCE_REBOOT_SOFT,
4160                                    constants.INSTANCE_REBOOT_HARD,
4161                                    constants.INSTANCE_REBOOT_FULL))
4162     self._ExpandAndLockInstance()
4163
4164   def BuildHooksEnv(self):
4165     """Build hooks env.
4166
4167     This runs on master, primary and secondary nodes of the instance.
4168
4169     """
4170     env = {
4171       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4172       "REBOOT_TYPE": self.op.reboot_type,
4173       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4174       }
4175     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4176     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4177     return env, nl, nl
4178
4179   def CheckPrereq(self):
4180     """Check prerequisites.
4181
4182     This checks that the instance is in the cluster.
4183
4184     """
4185     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4186     assert self.instance is not None, \
4187       "Cannot retrieve locked instance %s" % self.op.instance_name
4188
4189     _CheckNodeOnline(self, instance.primary_node)
4190
4191     # check bridges existence
4192     _CheckInstanceBridgesExist(self, instance)
4193
4194   def Exec(self, feedback_fn):
4195     """Reboot the instance.
4196
4197     """
4198     instance = self.instance
4199     ignore_secondaries = self.op.ignore_secondaries
4200     reboot_type = self.op.reboot_type
4201
4202     node_current = instance.primary_node
4203
4204     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4205                        constants.INSTANCE_REBOOT_HARD]:
4206       for disk in instance.disks:
4207         self.cfg.SetDiskID(disk, node_current)
4208       result = self.rpc.call_instance_reboot(node_current, instance,
4209                                              reboot_type,
4210                                              self.shutdown_timeout)
4211       result.Raise("Could not reboot instance")
4212     else:
4213       result = self.rpc.call_instance_shutdown(node_current, instance,
4214                                                self.shutdown_timeout)
4215       result.Raise("Could not shutdown instance for full reboot")
4216       _ShutdownInstanceDisks(self, instance)
4217       _StartInstanceDisks(self, instance, ignore_secondaries)
4218       result = self.rpc.call_instance_start(node_current, instance, None, None)
4219       msg = result.fail_msg
4220       if msg:
4221         _ShutdownInstanceDisks(self, instance)
4222         raise errors.OpExecError("Could not start instance for"
4223                                  " full reboot: %s" % msg)
4224
4225     self.cfg.MarkInstanceUp(instance.name)
4226
4227
4228 class LUShutdownInstance(LogicalUnit):
4229   """Shutdown an instance.
4230
4231   """
4232   HPATH = "instance-stop"
4233   HTYPE = constants.HTYPE_INSTANCE
4234   _OP_REQP = ["instance_name"]
4235   REQ_BGL = False
4236
4237   def CheckArguments(self):
4238     """Check the arguments.
4239
4240     """
4241     self.timeout = getattr(self.op, "timeout",
4242                            constants.DEFAULT_SHUTDOWN_TIMEOUT)
4243
4244   def ExpandNames(self):
4245     self._ExpandAndLockInstance()
4246
4247   def BuildHooksEnv(self):
4248     """Build hooks env.
4249
4250     This runs on master, primary and secondary nodes of the instance.
4251
4252     """
4253     env = _BuildInstanceHookEnvByObject(self, self.instance)
4254     env["TIMEOUT"] = self.timeout
4255     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4256     return env, nl, nl
4257
4258   def CheckPrereq(self):
4259     """Check prerequisites.
4260
4261     This checks that the instance is in the cluster.
4262
4263     """
4264     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4265     assert self.instance is not None, \
4266       "Cannot retrieve locked instance %s" % self.op.instance_name
4267     _CheckNodeOnline(self, self.instance.primary_node)
4268
4269   def Exec(self, feedback_fn):
4270     """Shutdown the instance.
4271
4272     """
4273     instance = self.instance
4274     node_current = instance.primary_node
4275     timeout = self.timeout
4276     self.cfg.MarkInstanceDown(instance.name)
4277     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4278     msg = result.fail_msg
4279     if msg:
4280       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4281
4282     _ShutdownInstanceDisks(self, instance)
4283
4284
4285 class LUReinstallInstance(LogicalUnit):
4286   """Reinstall an instance.
4287
4288   """
4289   HPATH = "instance-reinstall"
4290   HTYPE = constants.HTYPE_INSTANCE
4291   _OP_REQP = ["instance_name"]
4292   REQ_BGL = False
4293
4294   def ExpandNames(self):
4295     self._ExpandAndLockInstance()
4296
4297   def BuildHooksEnv(self):
4298     """Build hooks env.
4299
4300     This runs on master, primary and secondary nodes of the instance.
4301
4302     """
4303     env = _BuildInstanceHookEnvByObject(self, self.instance)
4304     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4305     return env, nl, nl
4306
4307   def CheckPrereq(self):
4308     """Check prerequisites.
4309
4310     This checks that the instance is in the cluster and is not running.
4311
4312     """
4313     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4314     assert instance is not None, \
4315       "Cannot retrieve locked instance %s" % self.op.instance_name
4316     _CheckNodeOnline(self, instance.primary_node)
4317
4318     if instance.disk_template == constants.DT_DISKLESS:
4319       raise errors.OpPrereqError("Instance '%s' has no disks" %
4320                                  self.op.instance_name,
4321                                  errors.ECODE_INVAL)
4322     _CheckInstanceDown(self, instance, "cannot reinstall")
4323
4324     self.op.os_type = getattr(self.op, "os_type", None)
4325     self.op.force_variant = getattr(self.op, "force_variant", False)
4326     if self.op.os_type is not None:
4327       # OS verification
4328       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4329       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4330
4331     self.instance = instance
4332
4333   def Exec(self, feedback_fn):
4334     """Reinstall the instance.
4335
4336     """
4337     inst = self.instance
4338
4339     if self.op.os_type is not None:
4340       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4341       inst.os = self.op.os_type
4342       self.cfg.Update(inst, feedback_fn)
4343
4344     _StartInstanceDisks(self, inst, None)
4345     try:
4346       feedback_fn("Running the instance OS create scripts...")
4347       # FIXME: pass debug option from opcode to backend
4348       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4349                                              self.op.debug_level)
4350       result.Raise("Could not install OS for instance %s on node %s" %
4351                    (inst.name, inst.primary_node))
4352     finally:
4353       _ShutdownInstanceDisks(self, inst)
4354
4355
4356 class LURecreateInstanceDisks(LogicalUnit):
4357   """Recreate an instance's missing disks.
4358
4359   """
4360   HPATH = "instance-recreate-disks"
4361   HTYPE = constants.HTYPE_INSTANCE
4362   _OP_REQP = ["instance_name", "disks"]
4363   REQ_BGL = False
4364
4365   def CheckArguments(self):
4366     """Check the arguments.
4367
4368     """
4369     if not isinstance(self.op.disks, list):
4370       raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4371     for item in self.op.disks:
4372       if (not isinstance(item, int) or
4373           item < 0):
4374         raise errors.OpPrereqError("Invalid disk specification '%s'" %
4375                                    str(item), errors.ECODE_INVAL)
4376
4377   def ExpandNames(self):
4378     self._ExpandAndLockInstance()
4379
4380   def BuildHooksEnv(self):
4381     """Build hooks env.
4382
4383     This runs on master, primary and secondary nodes of the instance.
4384
4385     """
4386     env = _BuildInstanceHookEnvByObject(self, self.instance)
4387     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4388     return env, nl, nl
4389
4390   def CheckPrereq(self):
4391     """Check prerequisites.
4392
4393     This checks that the instance is in the cluster and is not running.
4394
4395     """
4396     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4397     assert instance is not None, \
4398       "Cannot retrieve locked instance %s" % self.op.instance_name
4399     _CheckNodeOnline(self, instance.primary_node)
4400
4401     if instance.disk_template == constants.DT_DISKLESS:
4402       raise errors.OpPrereqError("Instance '%s' has no disks" %
4403                                  self.op.instance_name, errors.ECODE_INVAL)
4404     _CheckInstanceDown(self, instance, "cannot recreate disks")
4405
4406     if not self.op.disks:
4407       self.op.disks = range(len(instance.disks))
4408     else:
4409       for idx in self.op.disks:
4410         if idx >= len(instance.disks):
4411           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4412                                      errors.ECODE_INVAL)
4413
4414     self.instance = instance
4415
4416   def Exec(self, feedback_fn):
4417     """Recreate the disks.
4418
4419     """
4420     to_skip = []
4421     for idx, _ in enumerate(self.instance.disks):
4422       if idx not in self.op.disks: # disk idx has not been passed in
4423         to_skip.append(idx)
4424         continue
4425
4426     _CreateDisks(self, self.instance, to_skip=to_skip)
4427
4428
4429 class LURenameInstance(LogicalUnit):
4430   """Rename an instance.
4431
4432   """
4433   HPATH = "instance-rename"
4434   HTYPE = constants.HTYPE_INSTANCE
4435   _OP_REQP = ["instance_name", "new_name"]
4436
4437   def BuildHooksEnv(self):
4438     """Build hooks env.
4439
4440     This runs on master, primary and secondary nodes of the instance.
4441
4442     """
4443     env = _BuildInstanceHookEnvByObject(self, self.instance)
4444     env["INSTANCE_NEW_NAME"] = self.op.new_name
4445     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4446     return env, nl, nl
4447
4448   def CheckPrereq(self):
4449     """Check prerequisites.
4450
4451     This checks that the instance is in the cluster and is not running.
4452
4453     """
4454     self.op.instance_name = _ExpandInstanceName(self.cfg,
4455                                                 self.op.instance_name)
4456     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4457     assert instance is not None
4458     _CheckNodeOnline(self, instance.primary_node)
4459     _CheckInstanceDown(self, instance, "cannot rename")
4460     self.instance = instance
4461
4462     # new name verification
4463     name_info = utils.GetHostInfo(self.op.new_name)
4464
4465     self.op.new_name = new_name = name_info.name
4466     instance_list = self.cfg.GetInstanceList()
4467     if new_name in instance_list:
4468       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4469                                  new_name, errors.ECODE_EXISTS)
4470
4471     if not getattr(self.op, "ignore_ip", False):
4472       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4473         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4474                                    (name_info.ip, new_name),
4475                                    errors.ECODE_NOTUNIQUE)
4476
4477
4478   def Exec(self, feedback_fn):
4479     """Reinstall the instance.
4480
4481     """
4482     inst = self.instance
4483     old_name = inst.name
4484
4485     if inst.disk_template == constants.DT_FILE:
4486       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4487
4488     self.cfg.RenameInstance(inst.name, self.op.new_name)
4489     # Change the instance lock. This is definitely safe while we hold the BGL
4490     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4491     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4492
4493     # re-read the instance from the configuration after rename
4494     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4495
4496     if inst.disk_template == constants.DT_FILE:
4497       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4498       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4499                                                      old_file_storage_dir,
4500                                                      new_file_storage_dir)
4501       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4502                    " (but the instance has been renamed in Ganeti)" %
4503                    (inst.primary_node, old_file_storage_dir,
4504                     new_file_storage_dir))
4505
4506     _StartInstanceDisks(self, inst, None)
4507     try:
4508       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4509                                                  old_name, self.op.debug_level)
4510       msg = result.fail_msg
4511       if msg:
4512         msg = ("Could not run OS rename script for instance %s on node %s"
4513                " (but the instance has been renamed in Ganeti): %s" %
4514                (inst.name, inst.primary_node, msg))
4515         self.proc.LogWarning(msg)
4516     finally:
4517       _ShutdownInstanceDisks(self, inst)
4518
4519
4520 class LURemoveInstance(LogicalUnit):
4521   """Remove an instance.
4522
4523   """
4524   HPATH = "instance-remove"
4525   HTYPE = constants.HTYPE_INSTANCE
4526   _OP_REQP = ["instance_name", "ignore_failures"]
4527   REQ_BGL = False
4528
4529   def CheckArguments(self):
4530     """Check the arguments.
4531
4532     """
4533     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4534                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4535
4536   def ExpandNames(self):
4537     self._ExpandAndLockInstance()
4538     self.needed_locks[locking.LEVEL_NODE] = []
4539     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4540
4541   def DeclareLocks(self, level):
4542     if level == locking.LEVEL_NODE:
4543       self._LockInstancesNodes()
4544
4545   def BuildHooksEnv(self):
4546     """Build hooks env.
4547
4548     This runs on master, primary and secondary nodes of the instance.
4549
4550     """
4551     env = _BuildInstanceHookEnvByObject(self, self.instance)
4552     env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4553     nl = [self.cfg.GetMasterNode()]
4554     nl_post = list(self.instance.all_nodes) + nl
4555     return env, nl, nl_post
4556
4557   def CheckPrereq(self):
4558     """Check prerequisites.
4559
4560     This checks that the instance is in the cluster.
4561
4562     """
4563     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4564     assert self.instance is not None, \
4565       "Cannot retrieve locked instance %s" % self.op.instance_name
4566
4567   def Exec(self, feedback_fn):
4568     """Remove the instance.
4569
4570     """
4571     instance = self.instance
4572     logging.info("Shutting down instance %s on node %s",
4573                  instance.name, instance.primary_node)
4574
4575     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4576                                              self.shutdown_timeout)
4577     msg = result.fail_msg
4578     if msg:
4579       if self.op.ignore_failures:
4580         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4581       else:
4582         raise errors.OpExecError("Could not shutdown instance %s on"
4583                                  " node %s: %s" %
4584                                  (instance.name, instance.primary_node, msg))
4585
4586     logging.info("Removing block devices for instance %s", instance.name)
4587
4588     if not _RemoveDisks(self, instance):
4589       if self.op.ignore_failures:
4590         feedback_fn("Warning: can't remove instance's disks")
4591       else:
4592         raise errors.OpExecError("Can't remove instance's disks")
4593
4594     logging.info("Removing instance %s out of cluster config", instance.name)
4595
4596     self.cfg.RemoveInstance(instance.name)
4597     self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4598
4599
4600 class LUQueryInstances(NoHooksLU):
4601   """Logical unit for querying instances.
4602
4603   """
4604   # pylint: disable-msg=W0142
4605   _OP_REQP = ["output_fields", "names", "use_locking"]
4606   REQ_BGL = False
4607   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4608                     "serial_no", "ctime", "mtime", "uuid"]
4609   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4610                                     "admin_state",
4611                                     "disk_template", "ip", "mac", "bridge",
4612                                     "nic_mode", "nic_link",
4613                                     "sda_size", "sdb_size", "vcpus", "tags",
4614                                     "network_port", "beparams",
4615                                     r"(disk)\.(size)/([0-9]+)",
4616                                     r"(disk)\.(sizes)", "disk_usage",
4617                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4618                                     r"(nic)\.(bridge)/([0-9]+)",
4619                                     r"(nic)\.(macs|ips|modes|links|bridges)",
4620                                     r"(disk|nic)\.(count)",
4621                                     "hvparams",
4622                                     ] + _SIMPLE_FIELDS +
4623                                   ["hv/%s" % name
4624                                    for name in constants.HVS_PARAMETERS
4625                                    if name not in constants.HVC_GLOBALS] +
4626                                   ["be/%s" % name
4627                                    for name in constants.BES_PARAMETERS])
4628   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4629
4630
4631   def ExpandNames(self):
4632     _CheckOutputFields(static=self._FIELDS_STATIC,
4633                        dynamic=self._FIELDS_DYNAMIC,
4634                        selected=self.op.output_fields)
4635
4636     self.needed_locks = {}
4637     self.share_locks[locking.LEVEL_INSTANCE] = 1
4638     self.share_locks[locking.LEVEL_NODE] = 1
4639
4640     if self.op.names:
4641       self.wanted = _GetWantedInstances(self, self.op.names)
4642     else:
4643       self.wanted = locking.ALL_SET
4644
4645     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4646     self.do_locking = self.do_node_query and self.op.use_locking
4647     if self.do_locking:
4648       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4649       self.needed_locks[locking.LEVEL_NODE] = []
4650       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4651
4652   def DeclareLocks(self, level):
4653     if level == locking.LEVEL_NODE and self.do_locking:
4654       self._LockInstancesNodes()
4655
4656   def CheckPrereq(self):
4657     """Check prerequisites.
4658
4659     """
4660     pass
4661
4662   def Exec(self, feedback_fn):
4663     """Computes the list of nodes and their attributes.
4664
4665     """
4666     # pylint: disable-msg=R0912
4667     # way too many branches here
4668     all_info = self.cfg.GetAllInstancesInfo()
4669     if self.wanted == locking.ALL_SET:
4670       # caller didn't specify instance names, so ordering is not important
4671       if self.do_locking:
4672         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4673       else:
4674         instance_names = all_info.keys()
4675       instance_names = utils.NiceSort(instance_names)
4676     else:
4677       # caller did specify names, so we must keep the ordering
4678       if self.do_locking:
4679         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4680       else:
4681         tgt_set = all_info.keys()
4682       missing = set(self.wanted).difference(tgt_set)
4683       if missing:
4684         raise errors.OpExecError("Some instances were removed before"
4685                                  " retrieving their data: %s" % missing)
4686       instance_names = self.wanted
4687
4688     instance_list = [all_info[iname] for iname in instance_names]
4689
4690     # begin data gathering
4691
4692     nodes = frozenset([inst.primary_node for inst in instance_list])
4693     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4694
4695     bad_nodes = []
4696     off_nodes = []
4697     if self.do_node_query:
4698       live_data = {}
4699       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4700       for name in nodes:
4701         result = node_data[name]
4702         if result.offline:
4703           # offline nodes will be in both lists
4704           off_nodes.append(name)
4705         if result.fail_msg:
4706           bad_nodes.append(name)
4707         else:
4708           if result.payload:
4709             live_data.update(result.payload)
4710           # else no instance is alive
4711     else:
4712       live_data = dict([(name, {}) for name in instance_names])
4713
4714     # end data gathering
4715
4716     HVPREFIX = "hv/"
4717     BEPREFIX = "be/"
4718     output = []
4719     cluster = self.cfg.GetClusterInfo()
4720     for instance in instance_list:
4721       iout = []
4722       i_hv = cluster.FillHV(instance, skip_globals=True)
4723       i_be = cluster.FillBE(instance)
4724       i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4725                                  nic.nicparams) for nic in instance.nics]
4726       for field in self.op.output_fields:
4727         st_match = self._FIELDS_STATIC.Matches(field)
4728         if field in self._SIMPLE_FIELDS:
4729           val = getattr(instance, field)
4730         elif field == "pnode":
4731           val = instance.primary_node
4732         elif field == "snodes":
4733           val = list(instance.secondary_nodes)
4734         elif field == "admin_state":
4735           val = instance.admin_up
4736         elif field == "oper_state":
4737           if instance.primary_node in bad_nodes:
4738             val = None
4739           else:
4740             val = bool(live_data.get(instance.name))
4741         elif field == "status":
4742           if instance.primary_node in off_nodes:
4743             val = "ERROR_nodeoffline"
4744           elif instance.primary_node in bad_nodes:
4745             val = "ERROR_nodedown"
4746           else:
4747             running = bool(live_data.get(instance.name))
4748             if running:
4749               if instance.admin_up:
4750                 val = "running"
4751               else:
4752                 val = "ERROR_up"
4753             else:
4754               if instance.admin_up:
4755                 val = "ERROR_down"
4756               else:
4757                 val = "ADMIN_down"
4758         elif field == "oper_ram":
4759           if instance.primary_node in bad_nodes:
4760             val = None
4761           elif instance.name in live_data:
4762             val = live_data[instance.name].get("memory", "?")
4763           else:
4764             val = "-"
4765         elif field == "vcpus":
4766           val = i_be[constants.BE_VCPUS]
4767         elif field == "disk_template":
4768           val = instance.disk_template
4769         elif field == "ip":
4770           if instance.nics:
4771             val = instance.nics[0].ip
4772           else:
4773             val = None
4774         elif field == "nic_mode":
4775           if instance.nics:
4776             val = i_nicp[0][constants.NIC_MODE]
4777           else:
4778             val = None
4779         elif field == "nic_link":
4780           if instance.nics:
4781             val = i_nicp[0][constants.NIC_LINK]
4782           else:
4783             val = None
4784         elif field == "bridge":
4785           if (instance.nics and
4786               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4787             val = i_nicp[0][constants.NIC_LINK]
4788           else:
4789             val = None
4790         elif field == "mac":
4791           if instance.nics:
4792             val = instance.nics[0].mac
4793           else:
4794             val = None
4795         elif field == "sda_size" or field == "sdb_size":
4796           idx = ord(field[2]) - ord('a')
4797           try:
4798             val = instance.FindDisk(idx).size
4799           except errors.OpPrereqError:
4800             val = None
4801         elif field == "disk_usage": # total disk usage per node
4802           disk_sizes = [{'size': disk.size} for disk in instance.disks]
4803           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4804         elif field == "tags":
4805           val = list(instance.GetTags())
4806         elif field == "hvparams":
4807           val = i_hv
4808         elif (field.startswith(HVPREFIX) and
4809               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4810               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4811           val = i_hv.get(field[len(HVPREFIX):], None)
4812         elif field == "beparams":
4813           val = i_be
4814         elif (field.startswith(BEPREFIX) and
4815               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4816           val = i_be.get(field[len(BEPREFIX):], None)
4817         elif st_match and st_match.groups():
4818           # matches a variable list
4819           st_groups = st_match.groups()
4820           if st_groups and st_groups[0] == "disk":
4821             if st_groups[1] == "count":
4822               val = len(instance.disks)
4823             elif st_groups[1] == "sizes":
4824               val = [disk.size for disk in instance.disks]
4825             elif st_groups[1] == "size":
4826               try:
4827                 val = instance.FindDisk(st_groups[2]).size
4828               except errors.OpPrereqError:
4829                 val = None
4830             else:
4831               assert False, "Unhandled disk parameter"
4832           elif st_groups[0] == "nic":
4833             if st_groups[1] == "count":
4834               val = len(instance.nics)
4835             elif st_groups[1] == "macs":
4836               val = [nic.mac for nic in instance.nics]
4837             elif st_groups[1] == "ips":
4838               val = [nic.ip for nic in instance.nics]
4839             elif st_groups[1] == "modes":
4840               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4841             elif st_groups[1] == "links":
4842               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4843             elif st_groups[1] == "bridges":
4844               val = []
4845               for nicp in i_nicp:
4846                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4847                   val.append(nicp[constants.NIC_LINK])
4848                 else:
4849                   val.append(None)
4850             else:
4851               # index-based item
4852               nic_idx = int(st_groups[2])
4853               if nic_idx >= len(instance.nics):
4854                 val = None
4855               else:
4856                 if st_groups[1] == "mac":
4857                   val = instance.nics[nic_idx].mac
4858                 elif st_groups[1] == "ip":
4859                   val = instance.nics[nic_idx].ip
4860                 elif st_groups[1] == "mode":
4861                   val = i_nicp[nic_idx][constants.NIC_MODE]
4862                 elif st_groups[1] == "link":
4863                   val = i_nicp[nic_idx][constants.NIC_LINK]
4864                 elif st_groups[1] == "bridge":
4865                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4866                   if nic_mode == constants.NIC_MODE_BRIDGED:
4867                     val = i_nicp[nic_idx][constants.NIC_LINK]
4868                   else:
4869                     val = None
4870                 else:
4871                   assert False, "Unhandled NIC parameter"
4872           else:
4873             assert False, ("Declared but unhandled variable parameter '%s'" %
4874                            field)
4875         else:
4876           assert False, "Declared but unhandled parameter '%s'" % field
4877         iout.append(val)
4878       output.append(iout)
4879
4880     return output
4881
4882
4883 class LUFailoverInstance(LogicalUnit):
4884   """Failover an instance.
4885
4886   """
4887   HPATH = "instance-failover"
4888   HTYPE = constants.HTYPE_INSTANCE
4889   _OP_REQP = ["instance_name", "ignore_consistency"]
4890   REQ_BGL = False
4891
4892   def CheckArguments(self):
4893     """Check the arguments.
4894
4895     """
4896     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4897                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4898
4899   def ExpandNames(self):
4900     self._ExpandAndLockInstance()
4901     self.needed_locks[locking.LEVEL_NODE] = []
4902     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4903
4904   def DeclareLocks(self, level):
4905     if level == locking.LEVEL_NODE:
4906       self._LockInstancesNodes()
4907
4908   def BuildHooksEnv(self):
4909     """Build hooks env.
4910
4911     This runs on master, primary and secondary nodes of the instance.
4912
4913     """
4914     instance = self.instance
4915     source_node = instance.primary_node
4916     target_node = instance.secondary_nodes[0]
4917     env = {
4918       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4919       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4920       "OLD_PRIMARY": source_node,
4921       "OLD_SECONDARY": target_node,
4922       "NEW_PRIMARY": target_node,
4923       "NEW_SECONDARY": source_node,
4924       }
4925     env.update(_BuildInstanceHookEnvByObject(self, instance))
4926     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4927     nl_post = list(nl)
4928     nl_post.append(source_node)
4929     return env, nl, nl_post
4930
4931   def CheckPrereq(self):
4932     """Check prerequisites.
4933
4934     This checks that the instance is in the cluster.
4935
4936     """
4937     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4938     assert self.instance is not None, \
4939       "Cannot retrieve locked instance %s" % self.op.instance_name
4940
4941     bep = self.cfg.GetClusterInfo().FillBE(instance)
4942     if instance.disk_template not in constants.DTS_NET_MIRROR:
4943       raise errors.OpPrereqError("Instance's disk layout is not"
4944                                  " network mirrored, cannot failover.",
4945                                  errors.ECODE_STATE)
4946
4947     secondary_nodes = instance.secondary_nodes
4948     if not secondary_nodes:
4949       raise errors.ProgrammerError("no secondary node but using "
4950                                    "a mirrored disk template")
4951
4952     target_node = secondary_nodes[0]
4953     _CheckNodeOnline(self, target_node)
4954     _CheckNodeNotDrained(self, target_node)
4955     if instance.admin_up:
4956       # check memory requirements on the secondary node
4957       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4958                            instance.name, bep[constants.BE_MEMORY],
4959                            instance.hypervisor)
4960     else:
4961       self.LogInfo("Not checking memory on the secondary node as"
4962                    " instance will not be started")
4963
4964     # check bridge existance
4965     _CheckInstanceBridgesExist(self, instance, node=target_node)
4966
4967   def Exec(self, feedback_fn):
4968     """Failover an instance.
4969
4970     The failover is done by shutting it down on its present node and
4971     starting it on the secondary.
4972
4973     """
4974     instance = self.instance
4975
4976     source_node = instance.primary_node
4977     target_node = instance.secondary_nodes[0]
4978
4979     if instance.admin_up:
4980       feedback_fn("* checking disk consistency between source and target")
4981       for dev in instance.disks:
4982         # for drbd, these are drbd over lvm
4983         if not _CheckDiskConsistency(self, dev, target_node, False):
4984           if not self.op.ignore_consistency:
4985             raise errors.OpExecError("Disk %s is degraded on target node,"
4986                                      " aborting failover." % dev.iv_name)
4987     else:
4988       feedback_fn("* not checking disk consistency as instance is not running")
4989
4990     feedback_fn("* shutting down instance on source node")
4991     logging.info("Shutting down instance %s on node %s",
4992                  instance.name, source_node)
4993
4994     result = self.rpc.call_instance_shutdown(source_node, instance,
4995                                              self.shutdown_timeout)
4996     msg = result.fail_msg
4997     if msg:
4998       if self.op.ignore_consistency:
4999         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5000                              " Proceeding anyway. Please make sure node"
5001                              " %s is down. Error details: %s",
5002                              instance.name, source_node, source_node, msg)
5003       else:
5004         raise errors.OpExecError("Could not shutdown instance %s on"
5005                                  " node %s: %s" %
5006                                  (instance.name, source_node, msg))
5007
5008     feedback_fn("* deactivating the instance's disks on source node")
5009     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5010       raise errors.OpExecError("Can't shut down the instance's disks.")
5011
5012     instance.primary_node = target_node
5013     # distribute new instance config to the other nodes
5014     self.cfg.Update(instance, feedback_fn)
5015
5016     # Only start the instance if it's marked as up
5017     if instance.admin_up:
5018       feedback_fn("* activating the instance's disks on target node")
5019       logging.info("Starting instance %s on node %s",
5020                    instance.name, target_node)
5021
5022       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5023                                                ignore_secondaries=True)
5024       if not disks_ok:
5025         _ShutdownInstanceDisks(self, instance)
5026         raise errors.OpExecError("Can't activate the instance's disks")
5027
5028       feedback_fn("* starting the instance on the target node")
5029       result = self.rpc.call_instance_start(target_node, instance, None, None)
5030       msg = result.fail_msg
5031       if msg:
5032         _ShutdownInstanceDisks(self, instance)
5033         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5034                                  (instance.name, target_node, msg))
5035
5036
5037 class LUMigrateInstance(LogicalUnit):
5038   """Migrate an instance.
5039
5040   This is migration without shutting down, compared to the failover,
5041   which is done with shutdown.
5042
5043   """
5044   HPATH = "instance-migrate"
5045   HTYPE = constants.HTYPE_INSTANCE
5046   _OP_REQP = ["instance_name", "live", "cleanup"]
5047
5048   REQ_BGL = False
5049
5050   def ExpandNames(self):
5051     self._ExpandAndLockInstance()
5052
5053     self.needed_locks[locking.LEVEL_NODE] = []
5054     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5055
5056     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5057                                        self.op.live, self.op.cleanup)
5058     self.tasklets = [self._migrater]
5059
5060   def DeclareLocks(self, level):
5061     if level == locking.LEVEL_NODE:
5062       self._LockInstancesNodes()
5063
5064   def BuildHooksEnv(self):
5065     """Build hooks env.
5066
5067     This runs on master, primary and secondary nodes of the instance.
5068
5069     """
5070     instance = self._migrater.instance
5071     source_node = instance.primary_node
5072     target_node = instance.secondary_nodes[0]
5073     env = _BuildInstanceHookEnvByObject(self, instance)
5074     env["MIGRATE_LIVE"] = self.op.live
5075     env["MIGRATE_CLEANUP"] = self.op.cleanup
5076     env.update({
5077         "OLD_PRIMARY": source_node,
5078         "OLD_SECONDARY": target_node,
5079         "NEW_PRIMARY": target_node,
5080         "NEW_SECONDARY": source_node,
5081         })
5082     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5083     nl_post = list(nl)
5084     nl_post.append(source_node)
5085     return env, nl, nl_post
5086
5087
5088 class LUMoveInstance(LogicalUnit):
5089   """Move an instance by data-copying.
5090
5091   """
5092   HPATH = "instance-move"
5093   HTYPE = constants.HTYPE_INSTANCE
5094   _OP_REQP = ["instance_name", "target_node"]
5095   REQ_BGL = False
5096
5097   def CheckArguments(self):
5098     """Check the arguments.
5099
5100     """
5101     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5102                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
5103
5104   def ExpandNames(self):
5105     self._ExpandAndLockInstance()
5106     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5107     self.op.target_node = target_node
5108     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5109     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5110
5111   def DeclareLocks(self, level):
5112     if level == locking.LEVEL_NODE:
5113       self._LockInstancesNodes(primary_only=True)
5114
5115   def BuildHooksEnv(self):
5116     """Build hooks env.
5117
5118     This runs on master, primary and secondary nodes of the instance.
5119
5120     """
5121     env = {
5122       "TARGET_NODE": self.op.target_node,
5123       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5124       }
5125     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5126     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5127                                        self.op.target_node]
5128     return env, nl, nl
5129
5130   def CheckPrereq(self):
5131     """Check prerequisites.
5132
5133     This checks that the instance is in the cluster.
5134
5135     """
5136     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5137     assert self.instance is not None, \
5138       "Cannot retrieve locked instance %s" % self.op.instance_name
5139
5140     node = self.cfg.GetNodeInfo(self.op.target_node)
5141     assert node is not None, \
5142       "Cannot retrieve locked node %s" % self.op.target_node
5143
5144     self.target_node = target_node = node.name
5145
5146     if target_node == instance.primary_node:
5147       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5148                                  (instance.name, target_node),
5149                                  errors.ECODE_STATE)
5150
5151     bep = self.cfg.GetClusterInfo().FillBE(instance)
5152
5153     for idx, dsk in enumerate(instance.disks):
5154       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5155         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5156                                    " cannot copy" % idx, errors.ECODE_STATE)
5157
5158     _CheckNodeOnline(self, target_node)
5159     _CheckNodeNotDrained(self, target_node)
5160
5161     if instance.admin_up:
5162       # check memory requirements on the secondary node
5163       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5164                            instance.name, bep[constants.BE_MEMORY],
5165                            instance.hypervisor)
5166     else:
5167       self.LogInfo("Not checking memory on the secondary node as"
5168                    " instance will not be started")
5169
5170     # check bridge existance
5171     _CheckInstanceBridgesExist(self, instance, node=target_node)
5172
5173   def Exec(self, feedback_fn):
5174     """Move an instance.
5175
5176     The move is done by shutting it down on its present node, copying
5177     the data over (slow) and starting it on the new node.
5178
5179     """
5180     instance = self.instance
5181
5182     source_node = instance.primary_node
5183     target_node = self.target_node
5184
5185     self.LogInfo("Shutting down instance %s on source node %s",
5186                  instance.name, source_node)
5187
5188     result = self.rpc.call_instance_shutdown(source_node, instance,
5189                                              self.shutdown_timeout)
5190     msg = result.fail_msg
5191     if msg:
5192       if self.op.ignore_consistency:
5193         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5194                              " Proceeding anyway. Please make sure node"
5195                              " %s is down. Error details: %s",
5196                              instance.name, source_node, source_node, msg)
5197       else:
5198         raise errors.OpExecError("Could not shutdown instance %s on"
5199                                  " node %s: %s" %
5200                                  (instance.name, source_node, msg))
5201
5202     # create the target disks
5203     try:
5204       _CreateDisks(self, instance, target_node=target_node)
5205     except errors.OpExecError:
5206       self.LogWarning("Device creation failed, reverting...")
5207       try:
5208         _RemoveDisks(self, instance, target_node=target_node)
5209       finally:
5210         self.cfg.ReleaseDRBDMinors(instance.name)
5211         raise
5212
5213     cluster_name = self.cfg.GetClusterInfo().cluster_name
5214
5215     errs = []
5216     # activate, get path, copy the data over
5217     for idx, disk in enumerate(instance.disks):
5218       self.LogInfo("Copying data for disk %d", idx)
5219       result = self.rpc.call_blockdev_assemble(target_node, disk,
5220                                                instance.name, True)
5221       if result.fail_msg:
5222         self.LogWarning("Can't assemble newly created disk %d: %s",
5223                         idx, result.fail_msg)
5224         errs.append(result.fail_msg)
5225         break
5226       dev_path = result.payload
5227       result = self.rpc.call_blockdev_export(source_node, disk,
5228                                              target_node, dev_path,
5229                                              cluster_name)
5230       if result.fail_msg:
5231         self.LogWarning("Can't copy data over for disk %d: %s",
5232                         idx, result.fail_msg)
5233         errs.append(result.fail_msg)
5234         break
5235
5236     if errs:
5237       self.LogWarning("Some disks failed to copy, aborting")
5238       try:
5239         _RemoveDisks(self, instance, target_node=target_node)
5240       finally:
5241         self.cfg.ReleaseDRBDMinors(instance.name)
5242         raise errors.OpExecError("Errors during disk copy: %s" %
5243                                  (",".join(errs),))
5244
5245     instance.primary_node = target_node
5246     self.cfg.Update(instance, feedback_fn)
5247
5248     self.LogInfo("Removing the disks on the original node")
5249     _RemoveDisks(self, instance, target_node=source_node)
5250
5251     # Only start the instance if it's marked as up
5252     if instance.admin_up:
5253       self.LogInfo("Starting instance %s on node %s",
5254                    instance.name, target_node)
5255
5256       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5257                                            ignore_secondaries=True)
5258       if not disks_ok:
5259         _ShutdownInstanceDisks(self, instance)
5260         raise errors.OpExecError("Can't activate the instance's disks")
5261
5262       result = self.rpc.call_instance_start(target_node, instance, None, None)
5263       msg = result.fail_msg
5264       if msg:
5265         _ShutdownInstanceDisks(self, instance)
5266         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5267                                  (instance.name, target_node, msg))
5268
5269
5270 class LUMigrateNode(LogicalUnit):
5271   """Migrate all instances from a node.
5272
5273   """
5274   HPATH = "node-migrate"
5275   HTYPE = constants.HTYPE_NODE
5276   _OP_REQP = ["node_name", "live"]
5277   REQ_BGL = False
5278
5279   def ExpandNames(self):
5280     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5281
5282     self.needed_locks = {
5283       locking.LEVEL_NODE: [self.op.node_name],
5284       }
5285
5286     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5287
5288     # Create tasklets for migrating instances for all instances on this node
5289     names = []
5290     tasklets = []
5291
5292     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5293       logging.debug("Migrating instance %s", inst.name)
5294       names.append(inst.name)
5295
5296       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5297
5298     self.tasklets = tasklets
5299
5300     # Declare instance locks
5301     self.needed_locks[locking.LEVEL_INSTANCE] = names
5302
5303   def DeclareLocks(self, level):
5304     if level == locking.LEVEL_NODE:
5305       self._LockInstancesNodes()
5306
5307   def BuildHooksEnv(self):
5308     """Build hooks env.
5309
5310     This runs on the master, the primary and all the secondaries.
5311
5312     """
5313     env = {
5314       "NODE_NAME": self.op.node_name,
5315       }
5316
5317     nl = [self.cfg.GetMasterNode()]
5318
5319     return (env, nl, nl)
5320
5321
5322 class TLMigrateInstance(Tasklet):
5323   def __init__(self, lu, instance_name, live, cleanup):
5324     """Initializes this class.
5325
5326     """
5327     Tasklet.__init__(self, lu)
5328
5329     # Parameters
5330     self.instance_name = instance_name
5331     self.live = live
5332     self.cleanup = cleanup
5333
5334   def CheckPrereq(self):
5335     """Check prerequisites.
5336
5337     This checks that the instance is in the cluster.
5338
5339     """
5340     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5341     instance = self.cfg.GetInstanceInfo(instance_name)
5342     assert instance is not None
5343
5344     if instance.disk_template != constants.DT_DRBD8:
5345       raise errors.OpPrereqError("Instance's disk layout is not"
5346                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5347
5348     secondary_nodes = instance.secondary_nodes
5349     if not secondary_nodes:
5350       raise errors.ConfigurationError("No secondary node but using"
5351                                       " drbd8 disk template")
5352
5353     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5354
5355     target_node = secondary_nodes[0]
5356     # check memory requirements on the secondary node
5357     _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5358                          instance.name, i_be[constants.BE_MEMORY],
5359                          instance.hypervisor)
5360
5361     # check bridge existance
5362     _CheckInstanceBridgesExist(self, instance, node=target_node)
5363
5364     if not self.cleanup:
5365       _CheckNodeNotDrained(self, target_node)
5366       result = self.rpc.call_instance_migratable(instance.primary_node,
5367                                                  instance)
5368       result.Raise("Can't migrate, please use failover",
5369                    prereq=True, ecode=errors.ECODE_STATE)
5370
5371     self.instance = instance
5372
5373   def _WaitUntilSync(self):
5374     """Poll with custom rpc for disk sync.
5375
5376     This uses our own step-based rpc call.
5377
5378     """
5379     self.feedback_fn("* wait until resync is done")
5380     all_done = False
5381     while not all_done:
5382       all_done = True
5383       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5384                                             self.nodes_ip,
5385                                             self.instance.disks)
5386       min_percent = 100
5387       for node, nres in result.items():
5388         nres.Raise("Cannot resync disks on node %s" % node)
5389         node_done, node_percent = nres.payload
5390         all_done = all_done and node_done
5391         if node_percent is not None:
5392           min_percent = min(min_percent, node_percent)
5393       if not all_done:
5394         if min_percent < 100:
5395           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5396         time.sleep(2)
5397
5398   def _EnsureSecondary(self, node):
5399     """Demote a node to secondary.
5400
5401     """
5402     self.feedback_fn("* switching node %s to secondary mode" % node)
5403
5404     for dev in self.instance.disks:
5405       self.cfg.SetDiskID(dev, node)
5406
5407     result = self.rpc.call_blockdev_close(node, self.instance.name,
5408                                           self.instance.disks)
5409     result.Raise("Cannot change disk to secondary on node %s" % node)
5410
5411   def _GoStandalone(self):
5412     """Disconnect from the network.
5413
5414     """
5415     self.feedback_fn("* changing into standalone mode")
5416     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5417                                                self.instance.disks)
5418     for node, nres in result.items():
5419       nres.Raise("Cannot disconnect disks node %s" % node)
5420
5421   def _GoReconnect(self, multimaster):
5422     """Reconnect to the network.
5423
5424     """
5425     if multimaster:
5426       msg = "dual-master"
5427     else:
5428       msg = "single-master"
5429     self.feedback_fn("* changing disks into %s mode" % msg)
5430     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5431                                            self.instance.disks,
5432                                            self.instance.name, multimaster)
5433     for node, nres in result.items():
5434       nres.Raise("Cannot change disks config on node %s" % node)
5435
5436   def _ExecCleanup(self):
5437     """Try to cleanup after a failed migration.
5438
5439     The cleanup is done by:
5440       - check that the instance is running only on one node
5441         (and update the config if needed)
5442       - change disks on its secondary node to secondary
5443       - wait until disks are fully synchronized
5444       - disconnect from the network
5445       - change disks into single-master mode
5446       - wait again until disks are fully synchronized
5447
5448     """
5449     instance = self.instance
5450     target_node = self.target_node
5451     source_node = self.source_node
5452
5453     # check running on only one node
5454     self.feedback_fn("* checking where the instance actually runs"
5455                      " (if this hangs, the hypervisor might be in"
5456                      " a bad state)")
5457     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5458     for node, result in ins_l.items():
5459       result.Raise("Can't contact node %s" % node)
5460
5461     runningon_source = instance.name in ins_l[source_node].payload
5462     runningon_target = instance.name in ins_l[target_node].payload
5463
5464     if runningon_source and runningon_target:
5465       raise errors.OpExecError("Instance seems to be running on two nodes,"
5466                                " or the hypervisor is confused. You will have"
5467                                " to ensure manually that it runs only on one"
5468                                " and restart this operation.")
5469
5470     if not (runningon_source or runningon_target):
5471       raise errors.OpExecError("Instance does not seem to be running at all."
5472                                " In this case, it's safer to repair by"
5473                                " running 'gnt-instance stop' to ensure disk"
5474                                " shutdown, and then restarting it.")
5475
5476     if runningon_target:
5477       # the migration has actually succeeded, we need to update the config
5478       self.feedback_fn("* instance running on secondary node (%s),"
5479                        " updating config" % target_node)
5480       instance.primary_node = target_node
5481       self.cfg.Update(instance, self.feedback_fn)
5482       demoted_node = source_node
5483     else:
5484       self.feedback_fn("* instance confirmed to be running on its"
5485                        " primary node (%s)" % source_node)
5486       demoted_node = target_node
5487
5488     self._EnsureSecondary(demoted_node)
5489     try:
5490       self._WaitUntilSync()
5491     except errors.OpExecError:
5492       # we ignore here errors, since if the device is standalone, it
5493       # won't be able to sync
5494       pass
5495     self._GoStandalone()
5496     self._GoReconnect(False)
5497     self._WaitUntilSync()
5498
5499     self.feedback_fn("* done")
5500
5501   def _RevertDiskStatus(self):
5502     """Try to revert the disk status after a failed migration.
5503
5504     """
5505     target_node = self.target_node
5506     try:
5507       self._EnsureSecondary(target_node)
5508       self._GoStandalone()
5509       self._GoReconnect(False)
5510       self._WaitUntilSync()
5511     except errors.OpExecError, err:
5512       self.lu.LogWarning("Migration failed and I can't reconnect the"
5513                          " drives: error '%s'\n"
5514                          "Please look and recover the instance status" %
5515                          str(err))
5516
5517   def _AbortMigration(self):
5518     """Call the hypervisor code to abort a started migration.
5519
5520     """
5521     instance = self.instance
5522     target_node = self.target_node
5523     migration_info = self.migration_info
5524
5525     abort_result = self.rpc.call_finalize_migration(target_node,
5526                                                     instance,
5527                                                     migration_info,
5528                                                     False)
5529     abort_msg = abort_result.fail_msg
5530     if abort_msg:
5531       logging.error("Aborting migration failed on target node %s: %s",
5532                     target_node, abort_msg)
5533       # Don't raise an exception here, as we stil have to try to revert the
5534       # disk status, even if this step failed.
5535
5536   def _ExecMigration(self):
5537     """Migrate an instance.
5538
5539     The migrate is done by:
5540       - change the disks into dual-master mode
5541       - wait until disks are fully synchronized again
5542       - migrate the instance
5543       - change disks on the new secondary node (the old primary) to secondary
5544       - wait until disks are fully synchronized
5545       - change disks into single-master mode
5546
5547     """
5548     instance = self.instance
5549     target_node = self.target_node
5550     source_node = self.source_node
5551
5552     self.feedback_fn("* checking disk consistency between source and target")
5553     for dev in instance.disks:
5554       if not _CheckDiskConsistency(self, dev, target_node, False):
5555         raise errors.OpExecError("Disk %s is degraded or not fully"
5556                                  " synchronized on target node,"
5557                                  " aborting migrate." % dev.iv_name)
5558
5559     # First get the migration information from the remote node
5560     result = self.rpc.call_migration_info(source_node, instance)
5561     msg = result.fail_msg
5562     if msg:
5563       log_err = ("Failed fetching source migration information from %s: %s" %
5564                  (source_node, msg))
5565       logging.error(log_err)
5566       raise errors.OpExecError(log_err)
5567
5568     self.migration_info = migration_info = result.payload
5569
5570     # Then switch the disks to master/master mode
5571     self._EnsureSecondary(target_node)
5572     self._GoStandalone()
5573     self._GoReconnect(True)
5574     self._WaitUntilSync()
5575
5576     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5577     result = self.rpc.call_accept_instance(target_node,
5578                                            instance,
5579                                            migration_info,
5580                                            self.nodes_ip[target_node])
5581
5582     msg = result.fail_msg
5583     if msg:
5584       logging.error("Instance pre-migration failed, trying to revert"
5585                     " disk status: %s", msg)
5586       self.feedback_fn("Pre-migration failed, aborting")
5587       self._AbortMigration()
5588       self._RevertDiskStatus()
5589       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5590                                (instance.name, msg))
5591
5592     self.feedback_fn("* migrating instance to %s" % target_node)
5593     time.sleep(10)
5594     result = self.rpc.call_instance_migrate(source_node, instance,
5595                                             self.nodes_ip[target_node],
5596                                             self.live)
5597     msg = result.fail_msg
5598     if msg:
5599       logging.error("Instance migration failed, trying to revert"
5600                     " disk status: %s", msg)
5601       self.feedback_fn("Migration failed, aborting")
5602       self._AbortMigration()
5603       self._RevertDiskStatus()
5604       raise errors.OpExecError("Could not migrate instance %s: %s" %
5605                                (instance.name, msg))
5606     time.sleep(10)
5607
5608     instance.primary_node = target_node
5609     # distribute new instance config to the other nodes
5610     self.cfg.Update(instance, self.feedback_fn)
5611
5612     result = self.rpc.call_finalize_migration(target_node,
5613                                               instance,
5614                                               migration_info,
5615                                               True)
5616     msg = result.fail_msg
5617     if msg:
5618       logging.error("Instance migration succeeded, but finalization failed:"
5619                     " %s", msg)
5620       raise errors.OpExecError("Could not finalize instance migration: %s" %
5621                                msg)
5622
5623     self._EnsureSecondary(source_node)
5624     self._WaitUntilSync()
5625     self._GoStandalone()
5626     self._GoReconnect(False)
5627     self._WaitUntilSync()
5628
5629     self.feedback_fn("* done")
5630
5631   def Exec(self, feedback_fn):
5632     """Perform the migration.
5633
5634     """
5635     feedback_fn("Migrating instance %s" % self.instance.name)
5636
5637     self.feedback_fn = feedback_fn
5638
5639     self.source_node = self.instance.primary_node
5640     self.target_node = self.instance.secondary_nodes[0]
5641     self.all_nodes = [self.source_node, self.target_node]
5642     self.nodes_ip = {
5643       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5644       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5645       }
5646
5647     if self.cleanup:
5648       return self._ExecCleanup()
5649     else:
5650       return self._ExecMigration()
5651
5652
5653 def _CreateBlockDev(lu, node, instance, device, force_create,
5654                     info, force_open):
5655   """Create a tree of block devices on a given node.
5656
5657   If this device type has to be created on secondaries, create it and
5658   all its children.
5659
5660   If not, just recurse to children keeping the same 'force' value.
5661
5662   @param lu: the lu on whose behalf we execute
5663   @param node: the node on which to create the device
5664   @type instance: L{objects.Instance}
5665   @param instance: the instance which owns the device
5666   @type device: L{objects.Disk}
5667   @param device: the device to create
5668   @type force_create: boolean
5669   @param force_create: whether to force creation of this device; this
5670       will be change to True whenever we find a device which has
5671       CreateOnSecondary() attribute
5672   @param info: the extra 'metadata' we should attach to the device
5673       (this will be represented as a LVM tag)
5674   @type force_open: boolean
5675   @param force_open: this parameter will be passes to the
5676       L{backend.BlockdevCreate} function where it specifies
5677       whether we run on primary or not, and it affects both
5678       the child assembly and the device own Open() execution
5679
5680   """
5681   if device.CreateOnSecondary():
5682     force_create = True
5683
5684   if device.children:
5685     for child in device.children:
5686       _CreateBlockDev(lu, node, instance, child, force_create,
5687                       info, force_open)
5688
5689   if not force_create:
5690     return
5691
5692   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5693
5694
5695 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5696   """Create a single block device on a given node.
5697
5698   This will not recurse over children of the device, so they must be
5699   created in advance.
5700
5701   @param lu: the lu on whose behalf we execute
5702   @param node: the node on which to create the device
5703   @type instance: L{objects.Instance}
5704   @param instance: the instance which owns the device
5705   @type device: L{objects.Disk}
5706   @param device: the device to create
5707   @param info: the extra 'metadata' we should attach to the device
5708       (this will be represented as a LVM tag)
5709   @type force_open: boolean
5710   @param force_open: this parameter will be passes to the
5711       L{backend.BlockdevCreate} function where it specifies
5712       whether we run on primary or not, and it affects both
5713       the child assembly and the device own Open() execution
5714
5715   """
5716   lu.cfg.SetDiskID(device, node)
5717   result = lu.rpc.call_blockdev_create(node, device, device.size,
5718                                        instance.name, force_open, info)
5719   result.Raise("Can't create block device %s on"
5720                " node %s for instance %s" % (device, node, instance.name))
5721   if device.physical_id is None:
5722     device.physical_id = result.payload
5723
5724
5725 def _GenerateUniqueNames(lu, exts):
5726   """Generate a suitable LV name.
5727
5728   This will generate a logical volume name for the given instance.
5729
5730   """
5731   results = []
5732   for val in exts:
5733     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5734     results.append("%s%s" % (new_id, val))
5735   return results
5736
5737
5738 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5739                          p_minor, s_minor):
5740   """Generate a drbd8 device complete with its children.
5741
5742   """
5743   port = lu.cfg.AllocatePort()
5744   vgname = lu.cfg.GetVGName()
5745   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5746   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5747                           logical_id=(vgname, names[0]))
5748   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5749                           logical_id=(vgname, names[1]))
5750   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5751                           logical_id=(primary, secondary, port,
5752                                       p_minor, s_minor,
5753                                       shared_secret),
5754                           children=[dev_data, dev_meta],
5755                           iv_name=iv_name)
5756   return drbd_dev
5757
5758
5759 def _GenerateDiskTemplate(lu, template_name,
5760                           instance_name, primary_node,
5761                           secondary_nodes, disk_info,
5762                           file_storage_dir, file_driver,
5763                           base_index):
5764   """Generate the entire disk layout for a given template type.
5765
5766   """
5767   #TODO: compute space requirements
5768
5769   vgname = lu.cfg.GetVGName()
5770   disk_count = len(disk_info)
5771   disks = []
5772   if template_name == constants.DT_DISKLESS:
5773     pass
5774   elif template_name == constants.DT_PLAIN:
5775     if len(secondary_nodes) != 0:
5776       raise errors.ProgrammerError("Wrong template configuration")
5777
5778     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5779                                       for i in range(disk_count)])
5780     for idx, disk in enumerate(disk_info):
5781       disk_index = idx + base_index
5782       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5783                               logical_id=(vgname, names[idx]),
5784                               iv_name="disk/%d" % disk_index,
5785                               mode=disk["mode"])
5786       disks.append(disk_dev)
5787   elif template_name == constants.DT_DRBD8:
5788     if len(secondary_nodes) != 1:
5789       raise errors.ProgrammerError("Wrong template configuration")
5790     remote_node = secondary_nodes[0]
5791     minors = lu.cfg.AllocateDRBDMinor(
5792       [primary_node, remote_node] * len(disk_info), instance_name)
5793
5794     names = []
5795     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5796                                                for i in range(disk_count)]):
5797       names.append(lv_prefix + "_data")
5798       names.append(lv_prefix + "_meta")
5799     for idx, disk in enumerate(disk_info):
5800       disk_index = idx + base_index
5801       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5802                                       disk["size"], names[idx*2:idx*2+2],
5803                                       "disk/%d" % disk_index,
5804                                       minors[idx*2], minors[idx*2+1])
5805       disk_dev.mode = disk["mode"]
5806       disks.append(disk_dev)
5807   elif template_name == constants.DT_FILE:
5808     if len(secondary_nodes) != 0:
5809       raise errors.ProgrammerError("Wrong template configuration")
5810
5811     for idx, disk in enumerate(disk_info):
5812       disk_index = idx + base_index
5813       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5814                               iv_name="disk/%d" % disk_index,
5815                               logical_id=(file_driver,
5816                                           "%s/disk%d" % (file_storage_dir,
5817                                                          disk_index)),
5818                               mode=disk["mode"])
5819       disks.append(disk_dev)
5820   else:
5821     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5822   return disks
5823
5824
5825 def _GetInstanceInfoText(instance):
5826   """Compute that text that should be added to the disk's metadata.
5827
5828   """
5829   return "originstname+%s" % instance.name
5830
5831
5832 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5833   """Create all disks for an instance.
5834
5835   This abstracts away some work from AddInstance.
5836
5837   @type lu: L{LogicalUnit}
5838   @param lu: the logical unit on whose behalf we execute
5839   @type instance: L{objects.Instance}
5840   @param instance: the instance whose disks we should create
5841   @type to_skip: list
5842   @param to_skip: list of indices to skip
5843   @type target_node: string
5844   @param target_node: if passed, overrides the target node for creation
5845   @rtype: boolean
5846   @return: the success of the creation
5847
5848   """
5849   info = _GetInstanceInfoText(instance)
5850   if target_node is None:
5851     pnode = instance.primary_node
5852     all_nodes = instance.all_nodes
5853   else:
5854     pnode = target_node
5855     all_nodes = [pnode]
5856
5857   if instance.disk_template == constants.DT_FILE:
5858     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5859     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5860
5861     result.Raise("Failed to create directory '%s' on"
5862                  " node %s" % (file_storage_dir, pnode))
5863
5864   # Note: this needs to be kept in sync with adding of disks in
5865   # LUSetInstanceParams
5866   for idx, device in enumerate(instance.disks):
5867     if to_skip and idx in to_skip:
5868       continue
5869     logging.info("Creating volume %s for instance %s",
5870                  device.iv_name, instance.name)
5871     #HARDCODE
5872     for node in all_nodes:
5873       f_create = node == pnode
5874       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5875
5876
5877 def _RemoveDisks(lu, instance, target_node=None):
5878   """Remove all disks for an instance.
5879
5880   This abstracts away some work from `AddInstance()` and
5881   `RemoveInstance()`. Note that in case some of the devices couldn't
5882   be removed, the removal will continue with the other ones (compare
5883   with `_CreateDisks()`).
5884
5885   @type lu: L{LogicalUnit}
5886   @param lu: the logical unit on whose behalf we execute
5887   @type instance: L{objects.Instance}
5888   @param instance: the instance whose disks we should remove
5889   @type target_node: string
5890   @param target_node: used to override the node on which to remove the disks
5891   @rtype: boolean
5892   @return: the success of the removal
5893
5894   """
5895   logging.info("Removing block devices for instance %s", instance.name)
5896
5897   all_result = True
5898   for device in instance.disks:
5899     if target_node:
5900       edata = [(target_node, device)]
5901     else:
5902       edata = device.ComputeNodeTree(instance.primary_node)
5903     for node, disk in edata:
5904       lu.cfg.SetDiskID(disk, node)
5905       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5906       if msg:
5907         lu.LogWarning("Could not remove block device %s on node %s,"
5908                       " continuing anyway: %s", device.iv_name, node, msg)
5909         all_result = False
5910
5911   if instance.disk_template == constants.DT_FILE:
5912     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5913     if target_node:
5914       tgt = target_node
5915     else:
5916       tgt = instance.primary_node
5917     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5918     if result.fail_msg:
5919       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5920                     file_storage_dir, instance.primary_node, result.fail_msg)
5921       all_result = False
5922
5923   return all_result
5924
5925
5926 def _ComputeDiskSize(disk_template, disks):
5927   """Compute disk size requirements in the volume group
5928
5929   """
5930   # Required free disk space as a function of disk and swap space
5931   req_size_dict = {
5932     constants.DT_DISKLESS: None,
5933     constants.DT_PLAIN: sum(d["size"] for d in disks),
5934     # 128 MB are added for drbd metadata for each disk
5935     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5936     constants.DT_FILE: None,
5937   }
5938
5939   if disk_template not in req_size_dict:
5940     raise errors.ProgrammerError("Disk template '%s' size requirement"
5941                                  " is unknown" %  disk_template)
5942
5943   return req_size_dict[disk_template]
5944
5945
5946 def _CheckHVParams(lu, nodenames, hvname, hvparams):
5947   """Hypervisor parameter validation.
5948
5949   This function abstract the hypervisor parameter validation to be
5950   used in both instance create and instance modify.
5951
5952   @type lu: L{LogicalUnit}
5953   @param lu: the logical unit for which we check
5954   @type nodenames: list
5955   @param nodenames: the list of nodes on which we should check
5956   @type hvname: string
5957   @param hvname: the name of the hypervisor we should use
5958   @type hvparams: dict
5959   @param hvparams: the parameters which we need to check
5960   @raise errors.OpPrereqError: if the parameters are not valid
5961
5962   """
5963   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5964                                                   hvname,
5965                                                   hvparams)
5966   for node in nodenames:
5967     info = hvinfo[node]
5968     if info.offline:
5969       continue
5970     info.Raise("Hypervisor parameter validation failed on node %s" % node)
5971
5972
5973 class LUCreateInstance(LogicalUnit):
5974   """Create an instance.
5975
5976   """
5977   HPATH = "instance-add"
5978   HTYPE = constants.HTYPE_INSTANCE
5979   _OP_REQP = ["instance_name", "disks",
5980               "mode", "start",
5981               "wait_for_sync", "ip_check", "nics",
5982               "hvparams", "beparams"]
5983   REQ_BGL = False
5984
5985   def CheckArguments(self):
5986     """Check arguments.
5987
5988     """
5989     # set optional parameters to none if they don't exist
5990     for attr in ["pnode", "snode", "iallocator", "hypervisor",
5991                  "disk_template"]:
5992       if not hasattr(self.op, attr):
5993         setattr(self.op, attr, None)
5994
5995     # do not require name_check to ease forward/backward compatibility
5996     # for tools
5997     if not hasattr(self.op, "name_check"):
5998       self.op.name_check = True
5999     if not hasattr(self.op, "no_install"):
6000       self.op.no_install = False
6001     if self.op.no_install and self.op.start:
6002       self.LogInfo("No-installation mode selected, disabling startup")
6003       self.op.start = False
6004     # validate/normalize the instance name
6005     self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6006     if self.op.ip_check and not self.op.name_check:
6007       # TODO: make the ip check more flexible and not depend on the name check
6008       raise errors.OpPrereqError("Cannot do ip checks without a name check",
6009                                  errors.ECODE_INVAL)
6010     # check disk information: either all adopt, or no adopt
6011     has_adopt = has_no_adopt = False
6012     for disk in self.op.disks:
6013       if "adopt" in disk:
6014         has_adopt = True
6015       else:
6016         has_no_adopt = True
6017     if has_adopt and has_no_adopt:
6018       raise errors.OpPrereqError("Either all disks are adopted or none is",
6019                                  errors.ECODE_INVAL)
6020     if has_adopt:
6021       if self.op.disk_template != constants.DT_PLAIN:
6022         raise errors.OpPrereqError("Disk adoption is only supported for the"
6023                                    " 'plain' disk template",
6024                                    errors.ECODE_INVAL)
6025       if self.op.iallocator is not None:
6026         raise errors.OpPrereqError("Disk adoption not allowed with an"
6027                                    " iallocator script", errors.ECODE_INVAL)
6028       if self.op.mode == constants.INSTANCE_IMPORT:
6029         raise errors.OpPrereqError("Disk adoption not allowed for"
6030                                    " instance import", errors.ECODE_INVAL)
6031
6032     self.adopt_disks = has_adopt
6033
6034     # verify creation mode
6035     if self.op.mode not in (constants.INSTANCE_CREATE,
6036                             constants.INSTANCE_IMPORT):
6037       raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6038                                  self.op.mode, errors.ECODE_INVAL)
6039
6040     # instance name verification
6041     if self.op.name_check:
6042       self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6043       self.op.instance_name = self.hostname1.name
6044       # used in CheckPrereq for ip ping check
6045       self.check_ip = self.hostname1.ip
6046     else:
6047       self.check_ip = None
6048
6049     # file storage checks
6050     if (self.op.file_driver and
6051         not self.op.file_driver in constants.FILE_DRIVER):
6052       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6053                                  self.op.file_driver, errors.ECODE_INVAL)
6054
6055     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6056       raise errors.OpPrereqError("File storage directory path not absolute",
6057                                  errors.ECODE_INVAL)
6058
6059     ### Node/iallocator related checks
6060     if [self.op.iallocator, self.op.pnode].count(None) != 1:
6061       raise errors.OpPrereqError("One and only one of iallocator and primary"
6062                                  " node must be given",
6063                                  errors.ECODE_INVAL)
6064
6065     if self.op.mode == constants.INSTANCE_IMPORT:
6066       # On import force_variant must be True, because if we forced it at
6067       # initial install, our only chance when importing it back is that it
6068       # works again!
6069       self.op.force_variant = True
6070
6071       if self.op.no_install:
6072         self.LogInfo("No-installation mode has no effect during import")
6073
6074     else: # INSTANCE_CREATE
6075       if getattr(self.op, "os_type", None) is None:
6076         raise errors.OpPrereqError("No guest OS specified",
6077                                    errors.ECODE_INVAL)
6078       self.op.force_variant = getattr(self.op, "force_variant", False)
6079       if self.op.disk_template is None:
6080         raise errors.OpPrereqError("No disk template specified",
6081                                    errors.ECODE_INVAL)
6082
6083   def ExpandNames(self):
6084     """ExpandNames for CreateInstance.
6085
6086     Figure out the right locks for instance creation.
6087
6088     """
6089     self.needed_locks = {}
6090
6091     instance_name = self.op.instance_name
6092     # this is just a preventive check, but someone might still add this
6093     # instance in the meantime, and creation will fail at lock-add time
6094     if instance_name in self.cfg.GetInstanceList():
6095       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6096                                  instance_name, errors.ECODE_EXISTS)
6097
6098     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6099
6100     if self.op.iallocator:
6101       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6102     else:
6103       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6104       nodelist = [self.op.pnode]
6105       if self.op.snode is not None:
6106         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6107         nodelist.append(self.op.snode)
6108       self.needed_locks[locking.LEVEL_NODE] = nodelist
6109
6110     # in case of import lock the source node too
6111     if self.op.mode == constants.INSTANCE_IMPORT:
6112       src_node = getattr(self.op, "src_node", None)
6113       src_path = getattr(self.op, "src_path", None)
6114
6115       if src_path is None:
6116         self.op.src_path = src_path = self.op.instance_name
6117
6118       if src_node is None:
6119         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6120         self.op.src_node = None
6121         if os.path.isabs(src_path):
6122           raise errors.OpPrereqError("Importing an instance from an absolute"
6123                                      " path requires a source node option.",
6124                                      errors.ECODE_INVAL)
6125       else:
6126         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6127         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6128           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6129         if not os.path.isabs(src_path):
6130           self.op.src_path = src_path = \
6131             utils.PathJoin(constants.EXPORT_DIR, src_path)
6132
6133   def _RunAllocator(self):
6134     """Run the allocator based on input opcode.
6135
6136     """
6137     nics = [n.ToDict() for n in self.nics]
6138     ial = IAllocator(self.cfg, self.rpc,
6139                      mode=constants.IALLOCATOR_MODE_ALLOC,
6140                      name=self.op.instance_name,
6141                      disk_template=self.op.disk_template,
6142                      tags=[],
6143                      os=self.op.os_type,
6144                      vcpus=self.be_full[constants.BE_VCPUS],
6145                      mem_size=self.be_full[constants.BE_MEMORY],
6146                      disks=self.disks,
6147                      nics=nics,
6148                      hypervisor=self.op.hypervisor,
6149                      )
6150
6151     ial.Run(self.op.iallocator)
6152
6153     if not ial.success:
6154       raise errors.OpPrereqError("Can't compute nodes using"
6155                                  " iallocator '%s': %s" %
6156                                  (self.op.iallocator, ial.info),
6157                                  errors.ECODE_NORES)
6158     if len(ial.result) != ial.required_nodes:
6159       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6160                                  " of nodes (%s), required %s" %
6161                                  (self.op.iallocator, len(ial.result),
6162                                   ial.required_nodes), errors.ECODE_FAULT)
6163     self.op.pnode = ial.result[0]
6164     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6165                  self.op.instance_name, self.op.iallocator,
6166                  utils.CommaJoin(ial.result))
6167     if ial.required_nodes == 2:
6168       self.op.snode = ial.result[1]
6169
6170   def BuildHooksEnv(self):
6171     """Build hooks env.
6172
6173     This runs on master, primary and secondary nodes of the instance.
6174
6175     """
6176     env = {
6177       "ADD_MODE": self.op.mode,
6178       }
6179     if self.op.mode == constants.INSTANCE_IMPORT:
6180       env["SRC_NODE"] = self.op.src_node
6181       env["SRC_PATH"] = self.op.src_path
6182       env["SRC_IMAGES"] = self.src_images
6183
6184     env.update(_BuildInstanceHookEnv(
6185       name=self.op.instance_name,
6186       primary_node=self.op.pnode,
6187       secondary_nodes=self.secondaries,
6188       status=self.op.start,
6189       os_type=self.op.os_type,
6190       memory=self.be_full[constants.BE_MEMORY],
6191       vcpus=self.be_full[constants.BE_VCPUS],
6192       nics=_NICListToTuple(self, self.nics),
6193       disk_template=self.op.disk_template,
6194       disks=[(d["size"], d["mode"]) for d in self.disks],
6195       bep=self.be_full,
6196       hvp=self.hv_full,
6197       hypervisor_name=self.op.hypervisor,
6198     ))
6199
6200     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6201           self.secondaries)
6202     return env, nl, nl
6203
6204   def _ReadExportInfo(self):
6205     """Reads the export information from disk.
6206
6207     It will override the opcode source node and path with the actual
6208     information, if these two were not specified before.
6209
6210     @return: the export information
6211
6212     """
6213     assert self.op.mode == constants.INSTANCE_IMPORT
6214
6215     src_node = self.op.src_node
6216     src_path = self.op.src_path
6217
6218     if src_node is None:
6219       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6220       exp_list = self.rpc.call_export_list(locked_nodes)
6221       found = False
6222       for node in exp_list:
6223         if exp_list[node].fail_msg:
6224           continue
6225         if src_path in exp_list[node].payload:
6226           found = True
6227           self.op.src_node = src_node = node
6228           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6229                                                        src_path)
6230           break
6231       if not found:
6232         raise errors.OpPrereqError("No export found for relative path %s" %
6233                                     src_path, errors.ECODE_INVAL)
6234
6235     _CheckNodeOnline(self, src_node)
6236     result = self.rpc.call_export_info(src_node, src_path)
6237     result.Raise("No export or invalid export found in dir %s" % src_path)
6238
6239     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6240     if not export_info.has_section(constants.INISECT_EXP):
6241       raise errors.ProgrammerError("Corrupted export config",
6242                                    errors.ECODE_ENVIRON)
6243
6244     ei_version = export_info.get(constants.INISECT_EXP, "version")
6245     if (int(ei_version) != constants.EXPORT_VERSION):
6246       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6247                                  (ei_version, constants.EXPORT_VERSION),
6248                                  errors.ECODE_ENVIRON)
6249     return export_info
6250
6251   def _ReadExportParams(self, einfo):
6252     """Use export parameters as defaults.
6253
6254     In case the opcode doesn't specify (as in override) some instance
6255     parameters, then try to use them from the export information, if
6256     that declares them.
6257
6258     """
6259     if self.op.disk_template is None:
6260       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6261         self.op.disk_template = einfo.get(constants.INISECT_INS,
6262                                           "disk_template")
6263       else:
6264         raise errors.OpPrereqError("No disk template specified and the export"
6265                                    " is missing the disk_template information",
6266                                    errors.ECODE_INVAL)
6267
6268     if not self.op.disks:
6269       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6270         disks = []
6271         # TODO: import the disk iv_name too
6272         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6273           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6274           disks.append({"size": disk_sz})
6275         self.op.disks = disks
6276       else:
6277         raise errors.OpPrereqError("No disk info specified and the export"
6278                                    " is missing the disk information",
6279                                    errors.ECODE_INVAL)
6280
6281     if (not self.op.nics and
6282         einfo.has_option(constants.INISECT_INS, "nic_count")):
6283       nics = []
6284       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6285         ndict = {}
6286         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6287           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6288           ndict[name] = v
6289         nics.append(ndict)
6290       self.op.nics = nics
6291
6292     if (self.op.hypervisor is None and
6293         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6294       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6295     if einfo.has_section(constants.INISECT_HYP):
6296       # use the export parameters but do not override the ones
6297       # specified by the user
6298       for name, value in einfo.items(constants.INISECT_HYP):
6299         if name not in self.op.hvparams:
6300           self.op.hvparams[name] = value
6301
6302     if einfo.has_section(constants.INISECT_BEP):
6303       # use the parameters, without overriding
6304       for name, value in einfo.items(constants.INISECT_BEP):
6305         if name not in self.op.beparams:
6306           self.op.beparams[name] = value
6307     else:
6308       # try to read the parameters old style, from the main section
6309       for name in constants.BES_PARAMETERS:
6310         if (name not in self.op.beparams and
6311             einfo.has_option(constants.INISECT_INS, name)):
6312           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6313
6314   def CheckPrereq(self):
6315     """Check prerequisites.
6316
6317     """
6318     if self.op.mode == constants.INSTANCE_IMPORT:
6319       export_info = self._ReadExportInfo()
6320       self._ReadExportParams(export_info)
6321
6322     _CheckDiskTemplate(self.op.disk_template)
6323
6324     if (not self.cfg.GetVGName() and
6325         self.op.disk_template not in constants.DTS_NOT_LVM):
6326       raise errors.OpPrereqError("Cluster does not support lvm-based"
6327                                  " instances", errors.ECODE_STATE)
6328
6329     if self.op.hypervisor is None:
6330       self.op.hypervisor = self.cfg.GetHypervisorType()
6331
6332     cluster = self.cfg.GetClusterInfo()
6333     enabled_hvs = cluster.enabled_hypervisors
6334     if self.op.hypervisor not in enabled_hvs:
6335       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6336                                  " cluster (%s)" % (self.op.hypervisor,
6337                                   ",".join(enabled_hvs)),
6338                                  errors.ECODE_STATE)
6339
6340     # check hypervisor parameter syntax (locally)
6341     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6342     filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
6343                                   self.op.hvparams)
6344     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6345     hv_type.CheckParameterSyntax(filled_hvp)
6346     self.hv_full = filled_hvp
6347     # check that we don't specify global parameters on an instance
6348     _CheckGlobalHvParams(self.op.hvparams)
6349
6350     # fill and remember the beparams dict
6351     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6352     self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6353                                     self.op.beparams)
6354
6355     # NIC buildup
6356     self.nics = []
6357     for idx, nic in enumerate(self.op.nics):
6358       nic_mode_req = nic.get("mode", None)
6359       nic_mode = nic_mode_req
6360       if nic_mode is None:
6361         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6362
6363       # in routed mode, for the first nic, the default ip is 'auto'
6364       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6365         default_ip_mode = constants.VALUE_AUTO
6366       else:
6367         default_ip_mode = constants.VALUE_NONE
6368
6369       # ip validity checks
6370       ip = nic.get("ip", default_ip_mode)
6371       if ip is None or ip.lower() == constants.VALUE_NONE:
6372         nic_ip = None
6373       elif ip.lower() == constants.VALUE_AUTO:
6374         if not self.op.name_check:
6375           raise errors.OpPrereqError("IP address set to auto but name checks"
6376                                      " have been skipped. Aborting.",
6377                                      errors.ECODE_INVAL)
6378         nic_ip = self.hostname1.ip
6379       else:
6380         if not utils.IsValidIP(ip):
6381           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6382                                      " like a valid IP" % ip,
6383                                      errors.ECODE_INVAL)
6384         nic_ip = ip
6385
6386       # TODO: check the ip address for uniqueness
6387       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6388         raise errors.OpPrereqError("Routed nic mode requires an ip address",
6389                                    errors.ECODE_INVAL)
6390
6391       # MAC address verification
6392       mac = nic.get("mac", constants.VALUE_AUTO)
6393       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6394         mac = utils.NormalizeAndValidateMac(mac)
6395
6396         try:
6397           self.cfg.ReserveMAC(mac, self.proc.GetECId())
6398         except errors.ReservationError:
6399           raise errors.OpPrereqError("MAC address %s already in use"
6400                                      " in cluster" % mac,
6401                                      errors.ECODE_NOTUNIQUE)
6402
6403       # bridge verification
6404       bridge = nic.get("bridge", None)
6405       link = nic.get("link", None)
6406       if bridge and link:
6407         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6408                                    " at the same time", errors.ECODE_INVAL)
6409       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6410         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6411                                    errors.ECODE_INVAL)
6412       elif bridge:
6413         link = bridge
6414
6415       nicparams = {}
6416       if nic_mode_req:
6417         nicparams[constants.NIC_MODE] = nic_mode_req
6418       if link:
6419         nicparams[constants.NIC_LINK] = link
6420
6421       check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6422                                       nicparams)
6423       objects.NIC.CheckParameterSyntax(check_params)
6424       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6425
6426     # disk checks/pre-build
6427     self.disks = []
6428     for disk in self.op.disks:
6429       mode = disk.get("mode", constants.DISK_RDWR)
6430       if mode not in constants.DISK_ACCESS_SET:
6431         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6432                                    mode, errors.ECODE_INVAL)
6433       size = disk.get("size", None)
6434       if size is None:
6435         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6436       try:
6437         size = int(size)
6438       except (TypeError, ValueError):
6439         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6440                                    errors.ECODE_INVAL)
6441       new_disk = {"size": size, "mode": mode}
6442       if "adopt" in disk:
6443         new_disk["adopt"] = disk["adopt"]
6444       self.disks.append(new_disk)
6445
6446     if self.op.mode == constants.INSTANCE_IMPORT:
6447
6448       # Check that the new instance doesn't have less disks than the export
6449       instance_disks = len(self.disks)
6450       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6451       if instance_disks < export_disks:
6452         raise errors.OpPrereqError("Not enough disks to import."
6453                                    " (instance: %d, export: %d)" %
6454                                    (instance_disks, export_disks),
6455                                    errors.ECODE_INVAL)
6456
6457       self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6458       disk_images = []
6459       for idx in range(export_disks):
6460         option = 'disk%d_dump' % idx
6461         if export_info.has_option(constants.INISECT_INS, option):
6462           # FIXME: are the old os-es, disk sizes, etc. useful?
6463           export_name = export_info.get(constants.INISECT_INS, option)
6464           image = utils.PathJoin(self.op.src_path, export_name)
6465           disk_images.append(image)
6466         else:
6467           disk_images.append(False)
6468
6469       self.src_images = disk_images
6470
6471       old_name = export_info.get(constants.INISECT_INS, 'name')
6472       try:
6473         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6474       except (TypeError, ValueError), err:
6475         raise errors.OpPrereqError("Invalid export file, nic_count is not"
6476                                    " an integer: %s" % str(err),
6477                                    errors.ECODE_STATE)
6478       if self.op.instance_name == old_name:
6479         for idx, nic in enumerate(self.nics):
6480           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6481             nic_mac_ini = 'nic%d_mac' % idx
6482             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6483
6484     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6485
6486     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6487     if self.op.ip_check:
6488       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6489         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6490                                    (self.check_ip, self.op.instance_name),
6491                                    errors.ECODE_NOTUNIQUE)
6492
6493     #### mac address generation
6494     # By generating here the mac address both the allocator and the hooks get
6495     # the real final mac address rather than the 'auto' or 'generate' value.
6496     # There is a race condition between the generation and the instance object
6497     # creation, which means that we know the mac is valid now, but we're not
6498     # sure it will be when we actually add the instance. If things go bad
6499     # adding the instance will abort because of a duplicate mac, and the
6500     # creation job will fail.
6501     for nic in self.nics:
6502       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6503         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6504
6505     #### allocator run
6506
6507     if self.op.iallocator is not None:
6508       self._RunAllocator()
6509
6510     #### node related checks
6511
6512     # check primary node
6513     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6514     assert self.pnode is not None, \
6515       "Cannot retrieve locked node %s" % self.op.pnode
6516     if pnode.offline:
6517       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6518                                  pnode.name, errors.ECODE_STATE)
6519     if pnode.drained:
6520       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6521                                  pnode.name, errors.ECODE_STATE)
6522
6523     self.secondaries = []
6524
6525     # mirror node verification
6526     if self.op.disk_template in constants.DTS_NET_MIRROR:
6527       if self.op.snode is None:
6528         raise errors.OpPrereqError("The networked disk templates need"
6529                                    " a mirror node", errors.ECODE_INVAL)
6530       if self.op.snode == pnode.name:
6531         raise errors.OpPrereqError("The secondary node cannot be the"
6532                                    " primary node.", errors.ECODE_INVAL)
6533       _CheckNodeOnline(self, self.op.snode)
6534       _CheckNodeNotDrained(self, self.op.snode)
6535       self.secondaries.append(self.op.snode)
6536
6537     nodenames = [pnode.name] + self.secondaries
6538
6539     req_size = _ComputeDiskSize(self.op.disk_template,
6540                                 self.disks)
6541
6542     # Check lv size requirements, if not adopting
6543     if req_size is not None and not self.adopt_disks:
6544       _CheckNodesFreeDisk(self, nodenames, req_size)
6545
6546     if self.adopt_disks: # instead, we must check the adoption data
6547       all_lvs = set([i["adopt"] for i in self.disks])
6548       if len(all_lvs) != len(self.disks):
6549         raise errors.OpPrereqError("Duplicate volume names given for adoption",
6550                                    errors.ECODE_INVAL)
6551       for lv_name in all_lvs:
6552         try:
6553           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6554         except errors.ReservationError:
6555           raise errors.OpPrereqError("LV named %s used by another instance" %
6556                                      lv_name, errors.ECODE_NOTUNIQUE)
6557
6558       node_lvs = self.rpc.call_lv_list([pnode.name],
6559                                        self.cfg.GetVGName())[pnode.name]
6560       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6561       node_lvs = node_lvs.payload
6562       delta = all_lvs.difference(node_lvs.keys())
6563       if delta:
6564         raise errors.OpPrereqError("Missing logical volume(s): %s" %
6565                                    utils.CommaJoin(delta),
6566                                    errors.ECODE_INVAL)
6567       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6568       if online_lvs:
6569         raise errors.OpPrereqError("Online logical volumes found, cannot"
6570                                    " adopt: %s" % utils.CommaJoin(online_lvs),
6571                                    errors.ECODE_STATE)
6572       # update the size of disk based on what is found
6573       for dsk in self.disks:
6574         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6575
6576     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6577
6578     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6579
6580     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6581
6582     # memory check on primary node
6583     if self.op.start:
6584       _CheckNodeFreeMemory(self, self.pnode.name,
6585                            "creating instance %s" % self.op.instance_name,
6586                            self.be_full[constants.BE_MEMORY],
6587                            self.op.hypervisor)
6588
6589     self.dry_run_result = list(nodenames)
6590
6591   def Exec(self, feedback_fn):
6592     """Create and add the instance to the cluster.
6593
6594     """
6595     instance = self.op.instance_name
6596     pnode_name = self.pnode.name
6597
6598     ht_kind = self.op.hypervisor
6599     if ht_kind in constants.HTS_REQ_PORT:
6600       network_port = self.cfg.AllocatePort()
6601     else:
6602       network_port = None
6603
6604     ##if self.op.vnc_bind_address is None:
6605     ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6606
6607     # this is needed because os.path.join does not accept None arguments
6608     if self.op.file_storage_dir is None:
6609       string_file_storage_dir = ""
6610     else:
6611       string_file_storage_dir = self.op.file_storage_dir
6612
6613     # build the full file storage dir path
6614     file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6615                                       string_file_storage_dir, instance)
6616
6617
6618     disks = _GenerateDiskTemplate(self,
6619                                   self.op.disk_template,
6620                                   instance, pnode_name,
6621                                   self.secondaries,
6622                                   self.disks,
6623                                   file_storage_dir,
6624                                   self.op.file_driver,
6625                                   0)
6626
6627     iobj = objects.Instance(name=instance, os=self.op.os_type,
6628                             primary_node=pnode_name,
6629                             nics=self.nics, disks=disks,
6630                             disk_template=self.op.disk_template,
6631                             admin_up=False,
6632                             network_port=network_port,
6633                             beparams=self.op.beparams,
6634                             hvparams=self.op.hvparams,
6635                             hypervisor=self.op.hypervisor,
6636                             )
6637
6638     if self.adopt_disks:
6639       # rename LVs to the newly-generated names; we need to construct
6640       # 'fake' LV disks with the old data, plus the new unique_id
6641       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6642       rename_to = []
6643       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6644         rename_to.append(t_dsk.logical_id)
6645         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6646         self.cfg.SetDiskID(t_dsk, pnode_name)
6647       result = self.rpc.call_blockdev_rename(pnode_name,
6648                                              zip(tmp_disks, rename_to))
6649       result.Raise("Failed to rename adoped LVs")
6650     else:
6651       feedback_fn("* creating instance disks...")
6652       try:
6653         _CreateDisks(self, iobj)
6654       except errors.OpExecError:
6655         self.LogWarning("Device creation failed, reverting...")
6656         try:
6657           _RemoveDisks(self, iobj)
6658         finally:
6659           self.cfg.ReleaseDRBDMinors(instance)
6660           raise
6661
6662     feedback_fn("adding instance %s to cluster config" % instance)
6663
6664     self.cfg.AddInstance(iobj, self.proc.GetECId())
6665
6666     # Declare that we don't want to remove the instance lock anymore, as we've
6667     # added the instance to the config
6668     del self.remove_locks[locking.LEVEL_INSTANCE]
6669     # Unlock all the nodes
6670     if self.op.mode == constants.INSTANCE_IMPORT:
6671       nodes_keep = [self.op.src_node]
6672       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6673                        if node != self.op.src_node]
6674       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6675       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6676     else:
6677       self.context.glm.release(locking.LEVEL_NODE)
6678       del self.acquired_locks[locking.LEVEL_NODE]
6679
6680     if self.op.wait_for_sync:
6681       disk_abort = not _WaitForSync(self, iobj)
6682     elif iobj.disk_template in constants.DTS_NET_MIRROR:
6683       # make sure the disks are not degraded (still sync-ing is ok)
6684       time.sleep(15)
6685       feedback_fn("* checking mirrors status")
6686       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6687     else:
6688       disk_abort = False
6689
6690     if disk_abort:
6691       _RemoveDisks(self, iobj)
6692       self.cfg.RemoveInstance(iobj.name)
6693       # Make sure the instance lock gets removed
6694       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6695       raise errors.OpExecError("There are some degraded disks for"
6696                                " this instance")
6697
6698     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6699       if self.op.mode == constants.INSTANCE_CREATE:
6700         if not self.op.no_install:
6701           feedback_fn("* running the instance OS create scripts...")
6702           # FIXME: pass debug option from opcode to backend
6703           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6704                                                  self.op.debug_level)
6705           result.Raise("Could not add os for instance %s"
6706                        " on node %s" % (instance, pnode_name))
6707
6708       elif self.op.mode == constants.INSTANCE_IMPORT:
6709         feedback_fn("* running the instance OS import scripts...")
6710         src_node = self.op.src_node
6711         src_images = self.src_images
6712         cluster_name = self.cfg.GetClusterName()
6713         # FIXME: pass debug option from opcode to backend
6714         import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6715                                                          src_node, src_images,
6716                                                          cluster_name,
6717                                                          self.op.debug_level)
6718         msg = import_result.fail_msg
6719         if msg:
6720           self.LogWarning("Error while importing the disk images for instance"
6721                           " %s on node %s: %s" % (instance, pnode_name, msg))
6722       else:
6723         # also checked in the prereq part
6724         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6725                                      % self.op.mode)
6726
6727     if self.op.start:
6728       iobj.admin_up = True
6729       self.cfg.Update(iobj, feedback_fn)
6730       logging.info("Starting instance %s on node %s", instance, pnode_name)
6731       feedback_fn("* starting instance...")
6732       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6733       result.Raise("Could not start instance")
6734
6735     return list(iobj.all_nodes)
6736
6737
6738 class LUConnectConsole(NoHooksLU):
6739   """Connect to an instance's console.
6740
6741   This is somewhat special in that it returns the command line that
6742   you need to run on the master node in order to connect to the
6743   console.
6744
6745   """
6746   _OP_REQP = ["instance_name"]
6747   REQ_BGL = False
6748
6749   def ExpandNames(self):
6750     self._ExpandAndLockInstance()
6751
6752   def CheckPrereq(self):
6753     """Check prerequisites.
6754
6755     This checks that the instance is in the cluster.
6756
6757     """
6758     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6759     assert self.instance is not None, \
6760       "Cannot retrieve locked instance %s" % self.op.instance_name
6761     _CheckNodeOnline(self, self.instance.primary_node)
6762
6763   def Exec(self, feedback_fn):
6764     """Connect to the console of an instance
6765
6766     """
6767     instance = self.instance
6768     node = instance.primary_node
6769
6770     node_insts = self.rpc.call_instance_list([node],
6771                                              [instance.hypervisor])[node]
6772     node_insts.Raise("Can't get node information from %s" % node)
6773
6774     if instance.name not in node_insts.payload:
6775       raise errors.OpExecError("Instance %s is not running." % instance.name)
6776
6777     logging.debug("Connecting to console of %s on %s", instance.name, node)
6778
6779     hyper = hypervisor.GetHypervisor(instance.hypervisor)
6780     cluster = self.cfg.GetClusterInfo()
6781     # beparams and hvparams are passed separately, to avoid editing the
6782     # instance and then saving the defaults in the instance itself.
6783     hvparams = cluster.FillHV(instance)
6784     beparams = cluster.FillBE(instance)
6785     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6786
6787     # build ssh cmdline
6788     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6789
6790
6791 class LUReplaceDisks(LogicalUnit):
6792   """Replace the disks of an instance.
6793
6794   """
6795   HPATH = "mirrors-replace"
6796   HTYPE = constants.HTYPE_INSTANCE
6797   _OP_REQP = ["instance_name", "mode", "disks"]
6798   REQ_BGL = False
6799
6800   def CheckArguments(self):
6801     if not hasattr(self.op, "remote_node"):
6802       self.op.remote_node = None
6803     if not hasattr(self.op, "iallocator"):
6804       self.op.iallocator = None
6805     if not hasattr(self.op, "early_release"):
6806       self.op.early_release = False
6807
6808     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6809                                   self.op.iallocator)
6810
6811   def ExpandNames(self):
6812     self._ExpandAndLockInstance()
6813
6814     if self.op.iallocator is not None:
6815       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6816
6817     elif self.op.remote_node is not None:
6818       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6819       self.op.remote_node = remote_node
6820
6821       # Warning: do not remove the locking of the new secondary here
6822       # unless DRBD8.AddChildren is changed to work in parallel;
6823       # currently it doesn't since parallel invocations of
6824       # FindUnusedMinor will conflict
6825       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6826       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6827
6828     else:
6829       self.needed_locks[locking.LEVEL_NODE] = []
6830       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6831
6832     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6833                                    self.op.iallocator, self.op.remote_node,
6834                                    self.op.disks, False, self.op.early_release)
6835
6836     self.tasklets = [self.replacer]
6837
6838   def DeclareLocks(self, level):
6839     # If we're not already locking all nodes in the set we have to declare the
6840     # instance's primary/secondary nodes.
6841     if (level == locking.LEVEL_NODE and
6842         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6843       self._LockInstancesNodes()
6844
6845   def BuildHooksEnv(self):
6846     """Build hooks env.
6847
6848     This runs on the master, the primary and all the secondaries.
6849
6850     """
6851     instance = self.replacer.instance
6852     env = {
6853       "MODE": self.op.mode,
6854       "NEW_SECONDARY": self.op.remote_node,
6855       "OLD_SECONDARY": instance.secondary_nodes[0],
6856       }
6857     env.update(_BuildInstanceHookEnvByObject(self, instance))
6858     nl = [
6859       self.cfg.GetMasterNode(),
6860       instance.primary_node,
6861       ]
6862     if self.op.remote_node is not None:
6863       nl.append(self.op.remote_node)
6864     return env, nl, nl
6865
6866
6867 class LUEvacuateNode(LogicalUnit):
6868   """Relocate the secondary instances from a node.
6869
6870   """
6871   HPATH = "node-evacuate"
6872   HTYPE = constants.HTYPE_NODE
6873   _OP_REQP = ["node_name"]
6874   REQ_BGL = False
6875
6876   def CheckArguments(self):
6877     if not hasattr(self.op, "remote_node"):
6878       self.op.remote_node = None
6879     if not hasattr(self.op, "iallocator"):
6880       self.op.iallocator = None
6881     if not hasattr(self.op, "early_release"):
6882       self.op.early_release = False
6883
6884     TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6885                                   self.op.remote_node,
6886                                   self.op.iallocator)
6887
6888   def ExpandNames(self):
6889     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6890
6891     self.needed_locks = {}
6892
6893     # Declare node locks
6894     if self.op.iallocator is not None:
6895       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6896
6897     elif self.op.remote_node is not None:
6898       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6899
6900       # Warning: do not remove the locking of the new secondary here
6901       # unless DRBD8.AddChildren is changed to work in parallel;
6902       # currently it doesn't since parallel invocations of
6903       # FindUnusedMinor will conflict
6904       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6905       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6906
6907     else:
6908       raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6909
6910     # Create tasklets for replacing disks for all secondary instances on this
6911     # node
6912     names = []
6913     tasklets = []
6914
6915     for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6916       logging.debug("Replacing disks for instance %s", inst.name)
6917       names.append(inst.name)
6918
6919       replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6920                                 self.op.iallocator, self.op.remote_node, [],
6921                                 True, self.op.early_release)
6922       tasklets.append(replacer)
6923
6924     self.tasklets = tasklets
6925     self.instance_names = names
6926
6927     # Declare instance locks
6928     self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6929
6930   def DeclareLocks(self, level):
6931     # If we're not already locking all nodes in the set we have to declare the
6932     # instance's primary/secondary nodes.
6933     if (level == locking.LEVEL_NODE and
6934         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6935       self._LockInstancesNodes()
6936
6937   def BuildHooksEnv(self):
6938     """Build hooks env.
6939
6940     This runs on the master, the primary and all the secondaries.
6941
6942     """
6943     env = {
6944       "NODE_NAME": self.op.node_name,
6945       }
6946
6947     nl = [self.cfg.GetMasterNode()]
6948
6949     if self.op.remote_node is not None:
6950       env["NEW_SECONDARY"] = self.op.remote_node
6951       nl.append(self.op.remote_node)
6952
6953     return (env, nl, nl)
6954
6955
6956 class TLReplaceDisks(Tasklet):
6957   """Replaces disks for an instance.
6958
6959   Note: Locking is not within the scope of this class.
6960
6961   """
6962   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6963                disks, delay_iallocator, early_release):
6964     """Initializes this class.
6965
6966     """
6967     Tasklet.__init__(self, lu)
6968
6969     # Parameters
6970     self.instance_name = instance_name
6971     self.mode = mode
6972     self.iallocator_name = iallocator_name
6973     self.remote_node = remote_node
6974     self.disks = disks
6975     self.delay_iallocator = delay_iallocator
6976     self.early_release = early_release
6977
6978     # Runtime data
6979     self.instance = None
6980     self.new_node = None
6981     self.target_node = None
6982     self.other_node = None
6983     self.remote_node_info = None
6984     self.node_secondary_ip = None
6985
6986   @staticmethod
6987   def CheckArguments(mode, remote_node, iallocator):
6988     """Helper function for users of this class.
6989
6990     """
6991     # check for valid parameter combination
6992     if mode == constants.REPLACE_DISK_CHG:
6993       if remote_node is None and iallocator is None:
6994         raise errors.OpPrereqError("When changing the secondary either an"
6995                                    " iallocator script must be used or the"
6996                                    " new node given", errors.ECODE_INVAL)
6997
6998       if remote_node is not None and iallocator is not None:
6999         raise errors.OpPrereqError("Give either the iallocator or the new"
7000                                    " secondary, not both", errors.ECODE_INVAL)
7001
7002     elif remote_node is not None or iallocator is not None:
7003       # Not replacing the secondary
7004       raise errors.OpPrereqError("The iallocator and new node options can"
7005                                  " only be used when changing the"
7006                                  " secondary node", errors.ECODE_INVAL)
7007
7008   @staticmethod
7009   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7010     """Compute a new secondary node using an IAllocator.
7011
7012     """
7013     ial = IAllocator(lu.cfg, lu.rpc,
7014                      mode=constants.IALLOCATOR_MODE_RELOC,
7015                      name=instance_name,
7016                      relocate_from=relocate_from)
7017
7018     ial.Run(iallocator_name)
7019
7020     if not ial.success:
7021       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7022                                  " %s" % (iallocator_name, ial.info),
7023                                  errors.ECODE_NORES)
7024
7025     if len(ial.result) != ial.required_nodes:
7026       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7027                                  " of nodes (%s), required %s" %
7028                                  (iallocator_name,
7029                                   len(ial.result), ial.required_nodes),
7030                                  errors.ECODE_FAULT)
7031
7032     remote_node_name = ial.result[0]
7033
7034     lu.LogInfo("Selected new secondary for instance '%s': %s",
7035                instance_name, remote_node_name)
7036
7037     return remote_node_name
7038
7039   def _FindFaultyDisks(self, node_name):
7040     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7041                                     node_name, True)
7042
7043   def CheckPrereq(self):
7044     """Check prerequisites.
7045
7046     This checks that the instance is in the cluster.
7047
7048     """
7049     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7050     assert instance is not None, \
7051       "Cannot retrieve locked instance %s" % self.instance_name
7052
7053     if instance.disk_template != constants.DT_DRBD8:
7054       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7055                                  " instances", errors.ECODE_INVAL)
7056
7057     if len(instance.secondary_nodes) != 1:
7058       raise errors.OpPrereqError("The instance has a strange layout,"
7059                                  " expected one secondary but found %d" %
7060                                  len(instance.secondary_nodes),
7061                                  errors.ECODE_FAULT)
7062
7063     if not self.delay_iallocator:
7064       self._CheckPrereq2()
7065
7066   def _CheckPrereq2(self):
7067     """Check prerequisites, second part.
7068
7069     This function should always be part of CheckPrereq. It was separated and is
7070     now called from Exec because during node evacuation iallocator was only
7071     called with an unmodified cluster model, not taking planned changes into
7072     account.
7073
7074     """
7075     instance = self.instance
7076     secondary_node = instance.secondary_nodes[0]
7077
7078     if self.iallocator_name is None:
7079       remote_node = self.remote_node
7080     else:
7081       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7082                                        instance.name, instance.secondary_nodes)
7083
7084     if remote_node is not None:
7085       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7086       assert self.remote_node_info is not None, \
7087         "Cannot retrieve locked node %s" % remote_node
7088     else:
7089       self.remote_node_info = None
7090
7091     if remote_node == self.instance.primary_node:
7092       raise errors.OpPrereqError("The specified node is the primary node of"
7093                                  " the instance.", errors.ECODE_INVAL)
7094
7095     if remote_node == secondary_node:
7096       raise errors.OpPrereqError("The specified node is already the"
7097                                  " secondary node of the instance.",
7098                                  errors.ECODE_INVAL)
7099
7100     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7101                                     constants.REPLACE_DISK_CHG):
7102       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7103                                  errors.ECODE_INVAL)
7104
7105     if self.mode == constants.REPLACE_DISK_AUTO:
7106       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7107       faulty_secondary = self._FindFaultyDisks(secondary_node)
7108
7109       if faulty_primary and faulty_secondary:
7110         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7111                                    " one node and can not be repaired"
7112                                    " automatically" % self.instance_name,
7113                                    errors.ECODE_STATE)
7114
7115       if faulty_primary:
7116         self.disks = faulty_primary
7117         self.target_node = instance.primary_node
7118         self.other_node = secondary_node
7119         check_nodes = [self.target_node, self.other_node]
7120       elif faulty_secondary:
7121         self.disks = faulty_secondary
7122         self.target_node = secondary_node
7123         self.other_node = instance.primary_node
7124         check_nodes = [self.target_node, self.other_node]
7125       else:
7126         self.disks = []
7127         check_nodes = []
7128
7129     else:
7130       # Non-automatic modes
7131       if self.mode == constants.REPLACE_DISK_PRI:
7132         self.target_node = instance.primary_node
7133         self.other_node = secondary_node
7134         check_nodes = [self.target_node, self.other_node]
7135
7136       elif self.mode == constants.REPLACE_DISK_SEC:
7137         self.target_node = secondary_node
7138         self.other_node = instance.primary_node
7139         check_nodes = [self.target_node, self.other_node]
7140
7141       elif self.mode == constants.REPLACE_DISK_CHG:
7142         self.new_node = remote_node
7143         self.other_node = instance.primary_node
7144         self.target_node = secondary_node
7145         check_nodes = [self.new_node, self.other_node]
7146
7147         _CheckNodeNotDrained(self.lu, remote_node)
7148
7149         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7150         assert old_node_info is not None
7151         if old_node_info.offline and not self.early_release:
7152           # doesn't make sense to delay the release
7153           self.early_release = True
7154           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7155                           " early-release mode", secondary_node)
7156
7157       else:
7158         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7159                                      self.mode)
7160
7161       # If not specified all disks should be replaced
7162       if not self.disks:
7163         self.disks = range(len(self.instance.disks))
7164
7165     for node in check_nodes:
7166       _CheckNodeOnline(self.lu, node)
7167
7168     # Check whether disks are valid
7169     for disk_idx in self.disks:
7170       instance.FindDisk(disk_idx)
7171
7172     # Get secondary node IP addresses
7173     node_2nd_ip = {}
7174
7175     for node_name in [self.target_node, self.other_node, self.new_node]:
7176       if node_name is not None:
7177         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7178
7179     self.node_secondary_ip = node_2nd_ip
7180
7181   def Exec(self, feedback_fn):
7182     """Execute disk replacement.
7183
7184     This dispatches the disk replacement to the appropriate handler.
7185
7186     """
7187     if self.delay_iallocator:
7188       self._CheckPrereq2()
7189
7190     if not self.disks:
7191       feedback_fn("No disks need replacement")
7192       return
7193
7194     feedback_fn("Replacing disk(s) %s for %s" %
7195                 (utils.CommaJoin(self.disks), self.instance.name))
7196
7197     activate_disks = (not self.instance.admin_up)
7198
7199     # Activate the instance disks if we're replacing them on a down instance
7200     if activate_disks:
7201       _StartInstanceDisks(self.lu, self.instance, True)
7202
7203     try:
7204       # Should we replace the secondary node?
7205       if self.new_node is not None:
7206         fn = self._ExecDrbd8Secondary
7207       else:
7208         fn = self._ExecDrbd8DiskOnly
7209
7210       return fn(feedback_fn)
7211
7212     finally:
7213       # Deactivate the instance disks if we're replacing them on a
7214       # down instance
7215       if activate_disks:
7216         _SafeShutdownInstanceDisks(self.lu, self.instance)
7217
7218   def _CheckVolumeGroup(self, nodes):
7219     self.lu.LogInfo("Checking volume groups")
7220
7221     vgname = self.cfg.GetVGName()
7222
7223     # Make sure volume group exists on all involved nodes
7224     results = self.rpc.call_vg_list(nodes)
7225     if not results:
7226       raise errors.OpExecError("Can't list volume groups on the nodes")
7227
7228     for node in nodes:
7229       res = results[node]
7230       res.Raise("Error checking node %s" % node)
7231       if vgname not in res.payload:
7232         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7233                                  (vgname, node))
7234
7235   def _CheckDisksExistence(self, nodes):
7236     # Check disk existence
7237     for idx, dev in enumerate(self.instance.disks):
7238       if idx not in self.disks:
7239         continue
7240
7241       for node in nodes:
7242         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7243         self.cfg.SetDiskID(dev, node)
7244
7245         result = self.rpc.call_blockdev_find(node, dev)
7246
7247         msg = result.fail_msg
7248         if msg or not result.payload:
7249           if not msg:
7250             msg = "disk not found"
7251           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7252                                    (idx, node, msg))
7253
7254   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7255     for idx, dev in enumerate(self.instance.disks):
7256       if idx not in self.disks:
7257         continue
7258
7259       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7260                       (idx, node_name))
7261
7262       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7263                                    ldisk=ldisk):
7264         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7265                                  " replace disks for instance %s" %
7266                                  (node_name, self.instance.name))
7267
7268   def _CreateNewStorage(self, node_name):
7269     vgname = self.cfg.GetVGName()
7270     iv_names = {}
7271
7272     for idx, dev in enumerate(self.instance.disks):
7273       if idx not in self.disks:
7274         continue
7275
7276       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7277
7278       self.cfg.SetDiskID(dev, node_name)
7279
7280       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7281       names = _GenerateUniqueNames(self.lu, lv_names)
7282
7283       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7284                              logical_id=(vgname, names[0]))
7285       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7286                              logical_id=(vgname, names[1]))
7287
7288       new_lvs = [lv_data, lv_meta]
7289       old_lvs = dev.children
7290       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7291
7292       # we pass force_create=True to force the LVM creation
7293       for new_lv in new_lvs:
7294         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7295                         _GetInstanceInfoText(self.instance), False)
7296
7297     return iv_names
7298
7299   def _CheckDevices(self, node_name, iv_names):
7300     for name, (dev, _, _) in iv_names.iteritems():
7301       self.cfg.SetDiskID(dev, node_name)
7302
7303       result = self.rpc.call_blockdev_find(node_name, dev)
7304
7305       msg = result.fail_msg
7306       if msg or not result.payload:
7307         if not msg:
7308           msg = "disk not found"
7309         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7310                                  (name, msg))
7311
7312       if result.payload.is_degraded:
7313         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7314
7315   def _RemoveOldStorage(self, node_name, iv_names):
7316     for name, (_, old_lvs, _) in iv_names.iteritems():
7317       self.lu.LogInfo("Remove logical volumes for %s" % name)
7318
7319       for lv in old_lvs:
7320         self.cfg.SetDiskID(lv, node_name)
7321
7322         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7323         if msg:
7324           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7325                              hint="remove unused LVs manually")
7326
7327   def _ReleaseNodeLock(self, node_name):
7328     """Releases the lock for a given node."""
7329     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7330
7331   def _ExecDrbd8DiskOnly(self, feedback_fn):
7332     """Replace a disk on the primary or secondary for DRBD 8.
7333
7334     The algorithm for replace is quite complicated:
7335
7336       1. for each disk to be replaced:
7337
7338         1. create new LVs on the target node with unique names
7339         1. detach old LVs from the drbd device
7340         1. rename old LVs to name_replaced.<time_t>
7341         1. rename new LVs to old LVs
7342         1. attach the new LVs (with the old names now) to the drbd device
7343
7344       1. wait for sync across all devices
7345
7346       1. for each modified disk:
7347
7348         1. remove old LVs (which have the name name_replaces.<time_t>)
7349
7350     Failures are not very well handled.
7351
7352     """
7353     steps_total = 6
7354
7355     # Step: check device activation
7356     self.lu.LogStep(1, steps_total, "Check device existence")
7357     self._CheckDisksExistence([self.other_node, self.target_node])
7358     self._CheckVolumeGroup([self.target_node, self.other_node])
7359
7360     # Step: check other node consistency
7361     self.lu.LogStep(2, steps_total, "Check peer consistency")
7362     self._CheckDisksConsistency(self.other_node,
7363                                 self.other_node == self.instance.primary_node,
7364                                 False)
7365
7366     # Step: create new storage
7367     self.lu.LogStep(3, steps_total, "Allocate new storage")
7368     iv_names = self._CreateNewStorage(self.target_node)
7369
7370     # Step: for each lv, detach+rename*2+attach
7371     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7372     for dev, old_lvs, new_lvs in iv_names.itervalues():
7373       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7374
7375       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7376                                                      old_lvs)
7377       result.Raise("Can't detach drbd from local storage on node"
7378                    " %s for device %s" % (self.target_node, dev.iv_name))
7379       #dev.children = []
7380       #cfg.Update(instance)
7381
7382       # ok, we created the new LVs, so now we know we have the needed
7383       # storage; as such, we proceed on the target node to rename
7384       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7385       # using the assumption that logical_id == physical_id (which in
7386       # turn is the unique_id on that node)
7387
7388       # FIXME(iustin): use a better name for the replaced LVs
7389       temp_suffix = int(time.time())
7390       ren_fn = lambda d, suff: (d.physical_id[0],
7391                                 d.physical_id[1] + "_replaced-%s" % suff)
7392
7393       # Build the rename list based on what LVs exist on the node
7394       rename_old_to_new = []
7395       for to_ren in old_lvs:
7396         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7397         if not result.fail_msg and result.payload:
7398           # device exists
7399           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7400
7401       self.lu.LogInfo("Renaming the old LVs on the target node")
7402       result = self.rpc.call_blockdev_rename(self.target_node,
7403                                              rename_old_to_new)
7404       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7405
7406       # Now we rename the new LVs to the old LVs
7407       self.lu.LogInfo("Renaming the new LVs on the target node")
7408       rename_new_to_old = [(new, old.physical_id)
7409                            for old, new in zip(old_lvs, new_lvs)]
7410       result = self.rpc.call_blockdev_rename(self.target_node,
7411                                              rename_new_to_old)
7412       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7413
7414       for old, new in zip(old_lvs, new_lvs):
7415         new.logical_id = old.logical_id
7416         self.cfg.SetDiskID(new, self.target_node)
7417
7418       for disk in old_lvs:
7419         disk.logical_id = ren_fn(disk, temp_suffix)
7420         self.cfg.SetDiskID(disk, self.target_node)
7421
7422       # Now that the new lvs have the old name, we can add them to the device
7423       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7424       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7425                                                   new_lvs)
7426       msg = result.fail_msg
7427       if msg:
7428         for new_lv in new_lvs:
7429           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7430                                                new_lv).fail_msg
7431           if msg2:
7432             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7433                                hint=("cleanup manually the unused logical"
7434                                      "volumes"))
7435         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7436
7437       dev.children = new_lvs
7438
7439       self.cfg.Update(self.instance, feedback_fn)
7440
7441     cstep = 5
7442     if self.early_release:
7443       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7444       cstep += 1
7445       self._RemoveOldStorage(self.target_node, iv_names)
7446       # WARNING: we release both node locks here, do not do other RPCs
7447       # than WaitForSync to the primary node
7448       self._ReleaseNodeLock([self.target_node, self.other_node])
7449
7450     # Wait for sync
7451     # This can fail as the old devices are degraded and _WaitForSync
7452     # does a combined result over all disks, so we don't check its return value
7453     self.lu.LogStep(cstep, steps_total, "Sync devices")
7454     cstep += 1
7455     _WaitForSync(self.lu, self.instance)
7456
7457     # Check all devices manually
7458     self._CheckDevices(self.instance.primary_node, iv_names)
7459
7460     # Step: remove old storage
7461     if not self.early_release:
7462       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7463       cstep += 1
7464       self._RemoveOldStorage(self.target_node, iv_names)
7465
7466   def _ExecDrbd8Secondary(self, feedback_fn):
7467     """Replace the secondary node for DRBD 8.
7468
7469     The algorithm for replace is quite complicated:
7470       - for all disks of the instance:
7471         - create new LVs on the new node with same names
7472         - shutdown the drbd device on the old secondary
7473         - disconnect the drbd network on the primary
7474         - create the drbd device on the new secondary
7475         - network attach the drbd on the primary, using an artifice:
7476           the drbd code for Attach() will connect to the network if it
7477           finds a device which is connected to the good local disks but
7478           not network enabled
7479       - wait for sync across all devices
7480       - remove all disks from the old secondary
7481
7482     Failures are not very well handled.
7483
7484     """
7485     steps_total = 6
7486
7487     # Step: check device activation
7488     self.lu.LogStep(1, steps_total, "Check device existence")
7489     self._CheckDisksExistence([self.instance.primary_node])
7490     self._CheckVolumeGroup([self.instance.primary_node])
7491
7492     # Step: check other node consistency
7493     self.lu.LogStep(2, steps_total, "Check peer consistency")
7494     self._CheckDisksConsistency(self.instance.primary_node, True, True)
7495
7496     # Step: create new storage
7497     self.lu.LogStep(3, steps_total, "Allocate new storage")
7498     for idx, dev in enumerate(self.instance.disks):
7499       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7500                       (self.new_node, idx))
7501       # we pass force_create=True to force LVM creation
7502       for new_lv in dev.children:
7503         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7504                         _GetInstanceInfoText(self.instance), False)
7505
7506     # Step 4: dbrd minors and drbd setups changes
7507     # after this, we must manually remove the drbd minors on both the
7508     # error and the success paths
7509     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7510     minors = self.cfg.AllocateDRBDMinor([self.new_node
7511                                          for dev in self.instance.disks],
7512                                         self.instance.name)
7513     logging.debug("Allocated minors %r", minors)
7514
7515     iv_names = {}
7516     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7517       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7518                       (self.new_node, idx))
7519       # create new devices on new_node; note that we create two IDs:
7520       # one without port, so the drbd will be activated without
7521       # networking information on the new node at this stage, and one
7522       # with network, for the latter activation in step 4
7523       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7524       if self.instance.primary_node == o_node1:
7525         p_minor = o_minor1
7526       else:
7527         assert self.instance.primary_node == o_node2, "Three-node instance?"
7528         p_minor = o_minor2
7529
7530       new_alone_id = (self.instance.primary_node, self.new_node, None,
7531                       p_minor, new_minor, o_secret)
7532       new_net_id = (self.instance.primary_node, self.new_node, o_port,
7533                     p_minor, new_minor, o_secret)
7534
7535       iv_names[idx] = (dev, dev.children, new_net_id)
7536       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7537                     new_net_id)
7538       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7539                               logical_id=new_alone_id,
7540                               children=dev.children,
7541                               size=dev.size)
7542       try:
7543         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7544                               _GetInstanceInfoText(self.instance), False)
7545       except errors.GenericError:
7546         self.cfg.ReleaseDRBDMinors(self.instance.name)
7547         raise
7548
7549     # We have new devices, shutdown the drbd on the old secondary
7550     for idx, dev in enumerate(self.instance.disks):
7551       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7552       self.cfg.SetDiskID(dev, self.target_node)
7553       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7554       if msg:
7555         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7556                            "node: %s" % (idx, msg),
7557                            hint=("Please cleanup this device manually as"
7558                                  " soon as possible"))
7559
7560     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7561     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7562                                                self.node_secondary_ip,
7563                                                self.instance.disks)\
7564                                               [self.instance.primary_node]
7565
7566     msg = result.fail_msg
7567     if msg:
7568       # detaches didn't succeed (unlikely)
7569       self.cfg.ReleaseDRBDMinors(self.instance.name)
7570       raise errors.OpExecError("Can't detach the disks from the network on"
7571                                " old node: %s" % (msg,))
7572
7573     # if we managed to detach at least one, we update all the disks of
7574     # the instance to point to the new secondary
7575     self.lu.LogInfo("Updating instance configuration")
7576     for dev, _, new_logical_id in iv_names.itervalues():
7577       dev.logical_id = new_logical_id
7578       self.cfg.SetDiskID(dev, self.instance.primary_node)
7579
7580     self.cfg.Update(self.instance, feedback_fn)
7581
7582     # and now perform the drbd attach
7583     self.lu.LogInfo("Attaching primary drbds to new secondary"
7584                     " (standalone => connected)")
7585     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7586                                             self.new_node],
7587                                            self.node_secondary_ip,
7588                                            self.instance.disks,
7589                                            self.instance.name,
7590                                            False)
7591     for to_node, to_result in result.items():
7592       msg = to_result.fail_msg
7593       if msg:
7594         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7595                            to_node, msg,
7596                            hint=("please do a gnt-instance info to see the"
7597                                  " status of disks"))
7598     cstep = 5
7599     if self.early_release:
7600       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7601       cstep += 1
7602       self._RemoveOldStorage(self.target_node, iv_names)
7603       # WARNING: we release all node locks here, do not do other RPCs
7604       # than WaitForSync to the primary node
7605       self._ReleaseNodeLock([self.instance.primary_node,
7606                              self.target_node,
7607                              self.new_node])
7608
7609     # Wait for sync
7610     # This can fail as the old devices are degraded and _WaitForSync
7611     # does a combined result over all disks, so we don't check its return value
7612     self.lu.LogStep(cstep, steps_total, "Sync devices")
7613     cstep += 1
7614     _WaitForSync(self.lu, self.instance)
7615
7616     # Check all devices manually
7617     self._CheckDevices(self.instance.primary_node, iv_names)
7618
7619     # Step: remove old storage
7620     if not self.early_release:
7621       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7622       self._RemoveOldStorage(self.target_node, iv_names)
7623
7624
7625 class LURepairNodeStorage(NoHooksLU):
7626   """Repairs the volume group on a node.
7627
7628   """
7629   _OP_REQP = ["node_name"]
7630   REQ_BGL = False
7631
7632   def CheckArguments(self):
7633     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7634
7635   def ExpandNames(self):
7636     self.needed_locks = {
7637       locking.LEVEL_NODE: [self.op.node_name],
7638       }
7639
7640   def _CheckFaultyDisks(self, instance, node_name):
7641     """Ensure faulty disks abort the opcode or at least warn."""
7642     try:
7643       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7644                                   node_name, True):
7645         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7646                                    " node '%s'" % (instance.name, node_name),
7647                                    errors.ECODE_STATE)
7648     except errors.OpPrereqError, err:
7649       if self.op.ignore_consistency:
7650         self.proc.LogWarning(str(err.args[0]))
7651       else:
7652         raise
7653
7654   def CheckPrereq(self):
7655     """Check prerequisites.
7656
7657     """
7658     storage_type = self.op.storage_type
7659
7660     if (constants.SO_FIX_CONSISTENCY not in
7661         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7662       raise errors.OpPrereqError("Storage units of type '%s' can not be"
7663                                  " repaired" % storage_type,
7664                                  errors.ECODE_INVAL)
7665
7666     # Check whether any instance on this node has faulty disks
7667     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7668       if not inst.admin_up:
7669         continue
7670       check_nodes = set(inst.all_nodes)
7671       check_nodes.discard(self.op.node_name)
7672       for inst_node_name in check_nodes:
7673         self._CheckFaultyDisks(inst, inst_node_name)
7674
7675   def Exec(self, feedback_fn):
7676     feedback_fn("Repairing storage unit '%s' on %s ..." %
7677                 (self.op.name, self.op.node_name))
7678
7679     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7680     result = self.rpc.call_storage_execute(self.op.node_name,
7681                                            self.op.storage_type, st_args,
7682                                            self.op.name,
7683                                            constants.SO_FIX_CONSISTENCY)
7684     result.Raise("Failed to repair storage unit '%s' on %s" %
7685                  (self.op.name, self.op.node_name))
7686
7687
7688 class LUNodeEvacuationStrategy(NoHooksLU):
7689   """Computes the node evacuation strategy.
7690
7691   """
7692   _OP_REQP = ["nodes"]
7693   REQ_BGL = False
7694
7695   def CheckArguments(self):
7696     if not hasattr(self.op, "remote_node"):
7697       self.op.remote_node = None
7698     if not hasattr(self.op, "iallocator"):
7699       self.op.iallocator = None
7700     if self.op.remote_node is not None and self.op.iallocator is not None:
7701       raise errors.OpPrereqError("Give either the iallocator or the new"
7702                                  " secondary, not both", errors.ECODE_INVAL)
7703
7704   def ExpandNames(self):
7705     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7706     self.needed_locks = locks = {}
7707     if self.op.remote_node is None:
7708       locks[locking.LEVEL_NODE] = locking.ALL_SET
7709     else:
7710       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7711       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7712
7713   def CheckPrereq(self):
7714     pass
7715
7716   def Exec(self, feedback_fn):
7717     if self.op.remote_node is not None:
7718       instances = []
7719       for node in self.op.nodes:
7720         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7721       result = []
7722       for i in instances:
7723         if i.primary_node == self.op.remote_node:
7724           raise errors.OpPrereqError("Node %s is the primary node of"
7725                                      " instance %s, cannot use it as"
7726                                      " secondary" %
7727                                      (self.op.remote_node, i.name),
7728                                      errors.ECODE_INVAL)
7729         result.append([i.name, self.op.remote_node])
7730     else:
7731       ial = IAllocator(self.cfg, self.rpc,
7732                        mode=constants.IALLOCATOR_MODE_MEVAC,
7733                        evac_nodes=self.op.nodes)
7734       ial.Run(self.op.iallocator, validate=True)
7735       if not ial.success:
7736         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7737                                  errors.ECODE_NORES)
7738       result = ial.result
7739     return result
7740
7741
7742 class LUGrowDisk(LogicalUnit):
7743   """Grow a disk of an instance.
7744
7745   """
7746   HPATH = "disk-grow"
7747   HTYPE = constants.HTYPE_INSTANCE
7748   _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7749   REQ_BGL = False
7750
7751   def ExpandNames(self):
7752     self._ExpandAndLockInstance()
7753     self.needed_locks[locking.LEVEL_NODE] = []
7754     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7755
7756   def DeclareLocks(self, level):
7757     if level == locking.LEVEL_NODE:
7758       self._LockInstancesNodes()
7759
7760   def BuildHooksEnv(self):
7761     """Build hooks env.
7762
7763     This runs on the master, the primary and all the secondaries.
7764
7765     """
7766     env = {
7767       "DISK": self.op.disk,
7768       "AMOUNT": self.op.amount,
7769       }
7770     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7771     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7772     return env, nl, nl
7773
7774   def CheckPrereq(self):
7775     """Check prerequisites.
7776
7777     This checks that the instance is in the cluster.
7778
7779     """
7780     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7781     assert instance is not None, \
7782       "Cannot retrieve locked instance %s" % self.op.instance_name
7783     nodenames = list(instance.all_nodes)
7784     for node in nodenames:
7785       _CheckNodeOnline(self, node)
7786
7787
7788     self.instance = instance
7789
7790     if instance.disk_template not in constants.DTS_GROWABLE:
7791       raise errors.OpPrereqError("Instance's disk layout does not support"
7792                                  " growing.", errors.ECODE_INVAL)
7793
7794     self.disk = instance.FindDisk(self.op.disk)
7795
7796     if instance.disk_template != constants.DT_FILE:
7797       # TODO: check the free disk space for file, when that feature will be
7798       # supported
7799       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7800
7801   def Exec(self, feedback_fn):
7802     """Execute disk grow.
7803
7804     """
7805     instance = self.instance
7806     disk = self.disk
7807     for node in instance.all_nodes:
7808       self.cfg.SetDiskID(disk, node)
7809       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7810       result.Raise("Grow request failed to node %s" % node)
7811
7812       # TODO: Rewrite code to work properly
7813       # DRBD goes into sync mode for a short amount of time after executing the
7814       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7815       # calling "resize" in sync mode fails. Sleeping for a short amount of
7816       # time is a work-around.
7817       time.sleep(5)
7818
7819     disk.RecordGrow(self.op.amount)
7820     self.cfg.Update(instance, feedback_fn)
7821     if self.op.wait_for_sync:
7822       disk_abort = not _WaitForSync(self, instance)
7823       if disk_abort:
7824         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7825                              " status.\nPlease check the instance.")
7826
7827
7828 class LUQueryInstanceData(NoHooksLU):
7829   """Query runtime instance data.
7830
7831   """
7832   _OP_REQP = ["instances", "static"]
7833   REQ_BGL = False
7834
7835   def ExpandNames(self):
7836     self.needed_locks = {}
7837     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7838
7839     if not isinstance(self.op.instances, list):
7840       raise errors.OpPrereqError("Invalid argument type 'instances'",
7841                                  errors.ECODE_INVAL)
7842
7843     if self.op.instances:
7844       self.wanted_names = []
7845       for name in self.op.instances:
7846         full_name = _ExpandInstanceName(self.cfg, name)
7847         self.wanted_names.append(full_name)
7848       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7849     else:
7850       self.wanted_names = None
7851       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7852
7853     self.needed_locks[locking.LEVEL_NODE] = []
7854     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7855
7856   def DeclareLocks(self, level):
7857     if level == locking.LEVEL_NODE:
7858       self._LockInstancesNodes()
7859
7860   def CheckPrereq(self):
7861     """Check prerequisites.
7862
7863     This only checks the optional instance list against the existing names.
7864
7865     """
7866     if self.wanted_names is None:
7867       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7868
7869     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7870                              in self.wanted_names]
7871     return
7872
7873   def _ComputeBlockdevStatus(self, node, instance_name, dev):
7874     """Returns the status of a block device
7875
7876     """
7877     if self.op.static or not node:
7878       return None
7879
7880     self.cfg.SetDiskID(dev, node)
7881
7882     result = self.rpc.call_blockdev_find(node, dev)
7883     if result.offline:
7884       return None
7885
7886     result.Raise("Can't compute disk status for %s" % instance_name)
7887
7888     status = result.payload
7889     if status is None:
7890       return None
7891
7892     return (status.dev_path, status.major, status.minor,
7893             status.sync_percent, status.estimated_time,
7894             status.is_degraded, status.ldisk_status)
7895
7896   def _ComputeDiskStatus(self, instance, snode, dev):
7897     """Compute block device status.
7898
7899     """
7900     if dev.dev_type in constants.LDS_DRBD:
7901       # we change the snode then (otherwise we use the one passed in)
7902       if dev.logical_id[0] == instance.primary_node:
7903         snode = dev.logical_id[1]
7904       else:
7905         snode = dev.logical_id[0]
7906
7907     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7908                                               instance.name, dev)
7909     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7910
7911     if dev.children:
7912       dev_children = [self._ComputeDiskStatus(instance, snode, child)
7913                       for child in dev.children]
7914     else:
7915       dev_children = []
7916
7917     data = {
7918       "iv_name": dev.iv_name,
7919       "dev_type": dev.dev_type,
7920       "logical_id": dev.logical_id,
7921       "physical_id": dev.physical_id,
7922       "pstatus": dev_pstatus,
7923       "sstatus": dev_sstatus,
7924       "children": dev_children,
7925       "mode": dev.mode,
7926       "size": dev.size,
7927       }
7928
7929     return data
7930
7931   def Exec(self, feedback_fn):
7932     """Gather and return data"""
7933     result = {}
7934
7935     cluster = self.cfg.GetClusterInfo()
7936
7937     for instance in self.wanted_instances:
7938       if not self.op.static:
7939         remote_info = self.rpc.call_instance_info(instance.primary_node,
7940                                                   instance.name,
7941                                                   instance.hypervisor)
7942         remote_info.Raise("Error checking node %s" % instance.primary_node)
7943         remote_info = remote_info.payload
7944         if remote_info and "state" in remote_info:
7945           remote_state = "up"
7946         else:
7947           remote_state = "down"
7948       else:
7949         remote_state = None
7950       if instance.admin_up:
7951         config_state = "up"
7952       else:
7953         config_state = "down"
7954
7955       disks = [self._ComputeDiskStatus(instance, None, device)
7956                for device in instance.disks]
7957
7958       idict = {
7959         "name": instance.name,
7960         "config_state": config_state,
7961         "run_state": remote_state,
7962         "pnode": instance.primary_node,
7963         "snodes": instance.secondary_nodes,
7964         "os": instance.os,
7965         # this happens to be the same format used for hooks
7966         "nics": _NICListToTuple(self, instance.nics),
7967         "disks": disks,
7968         "hypervisor": instance.hypervisor,
7969         "network_port": instance.network_port,
7970         "hv_instance": instance.hvparams,
7971         "hv_actual": cluster.FillHV(instance, skip_globals=True),
7972         "be_instance": instance.beparams,
7973         "be_actual": cluster.FillBE(instance),
7974         "serial_no": instance.serial_no,
7975         "mtime": instance.mtime,
7976         "ctime": instance.ctime,
7977         "uuid": instance.uuid,
7978         }
7979
7980       result[instance.name] = idict
7981
7982     return result
7983
7984
7985 class LUSetInstanceParams(LogicalUnit):
7986   """Modifies an instances's parameters.
7987
7988   """
7989   HPATH = "instance-modify"
7990   HTYPE = constants.HTYPE_INSTANCE
7991   _OP_REQP = ["instance_name"]
7992   REQ_BGL = False
7993
7994   def CheckArguments(self):
7995     if not hasattr(self.op, 'nics'):
7996       self.op.nics = []
7997     if not hasattr(self.op, 'disks'):
7998       self.op.disks = []
7999     if not hasattr(self.op, 'beparams'):
8000       self.op.beparams = {}
8001     if not hasattr(self.op, 'hvparams'):
8002       self.op.hvparams = {}
8003     if not hasattr(self.op, "disk_template"):
8004       self.op.disk_template = None
8005     if not hasattr(self.op, "remote_node"):
8006       self.op.remote_node = None
8007     if not hasattr(self.op, "os_name"):
8008       self.op.os_name = None
8009     if not hasattr(self.op, "force_variant"):
8010       self.op.force_variant = False
8011     self.op.force = getattr(self.op, "force", False)
8012     if not (self.op.nics or self.op.disks or self.op.disk_template or
8013             self.op.hvparams or self.op.beparams or self.op.os_name):
8014       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8015
8016     if self.op.hvparams:
8017       _CheckGlobalHvParams(self.op.hvparams)
8018
8019     # Disk validation
8020     disk_addremove = 0
8021     for disk_op, disk_dict in self.op.disks:
8022       if disk_op == constants.DDM_REMOVE:
8023         disk_addremove += 1
8024         continue
8025       elif disk_op == constants.DDM_ADD:
8026         disk_addremove += 1
8027       else:
8028         if not isinstance(disk_op, int):
8029           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8030         if not isinstance(disk_dict, dict):
8031           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8032           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8033
8034       if disk_op == constants.DDM_ADD:
8035         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8036         if mode not in constants.DISK_ACCESS_SET:
8037           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8038                                      errors.ECODE_INVAL)
8039         size = disk_dict.get('size', None)
8040         if size is None:
8041           raise errors.OpPrereqError("Required disk parameter size missing",
8042                                      errors.ECODE_INVAL)
8043         try:
8044           size = int(size)
8045         except (TypeError, ValueError), err:
8046           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8047                                      str(err), errors.ECODE_INVAL)
8048         disk_dict['size'] = size
8049       else:
8050         # modification of disk
8051         if 'size' in disk_dict:
8052           raise errors.OpPrereqError("Disk size change not possible, use"
8053                                      " grow-disk", errors.ECODE_INVAL)
8054
8055     if disk_addremove > 1:
8056       raise errors.OpPrereqError("Only one disk add or remove operation"
8057                                  " supported at a time", errors.ECODE_INVAL)
8058
8059     if self.op.disks and self.op.disk_template is not None:
8060       raise errors.OpPrereqError("Disk template conversion and other disk"
8061                                  " changes not supported at the same time",
8062                                  errors.ECODE_INVAL)
8063
8064     if self.op.disk_template:
8065       _CheckDiskTemplate(self.op.disk_template)
8066       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8067           self.op.remote_node is None):
8068         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8069                                    " one requires specifying a secondary node",
8070                                    errors.ECODE_INVAL)
8071
8072     # NIC validation
8073     nic_addremove = 0
8074     for nic_op, nic_dict in self.op.nics:
8075       if nic_op == constants.DDM_REMOVE:
8076         nic_addremove += 1
8077         continue
8078       elif nic_op == constants.DDM_ADD:
8079         nic_addremove += 1
8080       else:
8081         if not isinstance(nic_op, int):
8082           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8083         if not isinstance(nic_dict, dict):
8084           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8085           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8086
8087       # nic_dict should be a dict
8088       nic_ip = nic_dict.get('ip', None)
8089       if nic_ip is not None:
8090         if nic_ip.lower() == constants.VALUE_NONE:
8091           nic_dict['ip'] = None
8092         else:
8093           if not utils.IsValidIP(nic_ip):
8094             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8095                                        errors.ECODE_INVAL)
8096
8097       nic_bridge = nic_dict.get('bridge', None)
8098       nic_link = nic_dict.get('link', None)
8099       if nic_bridge and nic_link:
8100         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8101                                    " at the same time", errors.ECODE_INVAL)
8102       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8103         nic_dict['bridge'] = None
8104       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8105         nic_dict['link'] = None
8106
8107       if nic_op == constants.DDM_ADD:
8108         nic_mac = nic_dict.get('mac', None)
8109         if nic_mac is None:
8110           nic_dict['mac'] = constants.VALUE_AUTO
8111
8112       if 'mac' in nic_dict:
8113         nic_mac = nic_dict['mac']
8114         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8115           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8116
8117         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8118           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8119                                      " modifying an existing nic",
8120                                      errors.ECODE_INVAL)
8121
8122     if nic_addremove > 1:
8123       raise errors.OpPrereqError("Only one NIC add or remove operation"
8124                                  " supported at a time", errors.ECODE_INVAL)
8125
8126   def ExpandNames(self):
8127     self._ExpandAndLockInstance()
8128     self.needed_locks[locking.LEVEL_NODE] = []
8129     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8130
8131   def DeclareLocks(self, level):
8132     if level == locking.LEVEL_NODE:
8133       self._LockInstancesNodes()
8134       if self.op.disk_template and self.op.remote_node:
8135         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8136         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8137
8138   def BuildHooksEnv(self):
8139     """Build hooks env.
8140
8141     This runs on the master, primary and secondaries.
8142
8143     """
8144     args = dict()
8145     if constants.BE_MEMORY in self.be_new:
8146       args['memory'] = self.be_new[constants.BE_MEMORY]
8147     if constants.BE_VCPUS in self.be_new:
8148       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8149     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8150     # information at all.
8151     if self.op.nics:
8152       args['nics'] = []
8153       nic_override = dict(self.op.nics)
8154       c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8155       for idx, nic in enumerate(self.instance.nics):
8156         if idx in nic_override:
8157           this_nic_override = nic_override[idx]
8158         else:
8159           this_nic_override = {}
8160         if 'ip' in this_nic_override:
8161           ip = this_nic_override['ip']
8162         else:
8163           ip = nic.ip
8164         if 'mac' in this_nic_override:
8165           mac = this_nic_override['mac']
8166         else:
8167           mac = nic.mac
8168         if idx in self.nic_pnew:
8169           nicparams = self.nic_pnew[idx]
8170         else:
8171           nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8172         mode = nicparams[constants.NIC_MODE]
8173         link = nicparams[constants.NIC_LINK]
8174         args['nics'].append((ip, mac, mode, link))
8175       if constants.DDM_ADD in nic_override:
8176         ip = nic_override[constants.DDM_ADD].get('ip', None)
8177         mac = nic_override[constants.DDM_ADD]['mac']
8178         nicparams = self.nic_pnew[constants.DDM_ADD]
8179         mode = nicparams[constants.NIC_MODE]
8180         link = nicparams[constants.NIC_LINK]
8181         args['nics'].append((ip, mac, mode, link))
8182       elif constants.DDM_REMOVE in nic_override:
8183         del args['nics'][-1]
8184
8185     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8186     if self.op.disk_template:
8187       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8188     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8189     return env, nl, nl
8190
8191   @staticmethod
8192   def _GetUpdatedParams(old_params, update_dict,
8193                         default_values, parameter_types):
8194     """Return the new params dict for the given params.
8195
8196     @type old_params: dict
8197     @param old_params: old parameters
8198     @type update_dict: dict
8199     @param update_dict: dict containing new parameter values,
8200                         or constants.VALUE_DEFAULT to reset the
8201                         parameter to its default value
8202     @type default_values: dict
8203     @param default_values: default values for the filled parameters
8204     @type parameter_types: dict
8205     @param parameter_types: dict mapping target dict keys to types
8206                             in constants.ENFORCEABLE_TYPES
8207     @rtype: (dict, dict)
8208     @return: (new_parameters, filled_parameters)
8209
8210     """
8211     params_copy = copy.deepcopy(old_params)
8212     for key, val in update_dict.iteritems():
8213       if val == constants.VALUE_DEFAULT:
8214         try:
8215           del params_copy[key]
8216         except KeyError:
8217           pass
8218       else:
8219         params_copy[key] = val
8220     utils.ForceDictType(params_copy, parameter_types)
8221     params_filled = objects.FillDict(default_values, params_copy)
8222     return (params_copy, params_filled)
8223
8224   def CheckPrereq(self):
8225     """Check prerequisites.
8226
8227     This only checks the instance list against the existing names.
8228
8229     """
8230     self.force = self.op.force
8231
8232     # checking the new params on the primary/secondary nodes
8233
8234     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8235     cluster = self.cluster = self.cfg.GetClusterInfo()
8236     assert self.instance is not None, \
8237       "Cannot retrieve locked instance %s" % self.op.instance_name
8238     pnode = instance.primary_node
8239     nodelist = list(instance.all_nodes)
8240
8241     if self.op.disk_template:
8242       if instance.disk_template == self.op.disk_template:
8243         raise errors.OpPrereqError("Instance already has disk template %s" %
8244                                    instance.disk_template, errors.ECODE_INVAL)
8245
8246       if (instance.disk_template,
8247           self.op.disk_template) not in self._DISK_CONVERSIONS:
8248         raise errors.OpPrereqError("Unsupported disk template conversion from"
8249                                    " %s to %s" % (instance.disk_template,
8250                                                   self.op.disk_template),
8251                                    errors.ECODE_INVAL)
8252       if self.op.disk_template in constants.DTS_NET_MIRROR:
8253         _CheckNodeOnline(self, self.op.remote_node)
8254         _CheckNodeNotDrained(self, self.op.remote_node)
8255         disks = [{"size": d.size} for d in instance.disks]
8256         required = _ComputeDiskSize(self.op.disk_template, disks)
8257         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8258         _CheckInstanceDown(self, instance, "cannot change disk template")
8259
8260     # hvparams processing
8261     if self.op.hvparams:
8262       i_hvdict, hv_new = self._GetUpdatedParams(
8263                              instance.hvparams, self.op.hvparams,
8264                              cluster.hvparams[instance.hypervisor],
8265                              constants.HVS_PARAMETER_TYPES)
8266       # local check
8267       hypervisor.GetHypervisor(
8268         instance.hypervisor).CheckParameterSyntax(hv_new)
8269       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8270       self.hv_new = hv_new # the new actual values
8271       self.hv_inst = i_hvdict # the new dict (without defaults)
8272     else:
8273       self.hv_new = self.hv_inst = {}
8274
8275     # beparams processing
8276     if self.op.beparams:
8277       i_bedict, be_new = self._GetUpdatedParams(
8278                              instance.beparams, self.op.beparams,
8279                              cluster.beparams[constants.PP_DEFAULT],
8280                              constants.BES_PARAMETER_TYPES)
8281       self.be_new = be_new # the new actual values
8282       self.be_inst = i_bedict # the new dict (without defaults)
8283     else:
8284       self.be_new = self.be_inst = {}
8285
8286     self.warn = []
8287
8288     if constants.BE_MEMORY in self.op.beparams and not self.force:
8289       mem_check_list = [pnode]
8290       if be_new[constants.BE_AUTO_BALANCE]:
8291         # either we changed auto_balance to yes or it was from before
8292         mem_check_list.extend(instance.secondary_nodes)
8293       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8294                                                   instance.hypervisor)
8295       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8296                                          instance.hypervisor)
8297       pninfo = nodeinfo[pnode]
8298       msg = pninfo.fail_msg
8299       if msg:
8300         # Assume the primary node is unreachable and go ahead
8301         self.warn.append("Can't get info from primary node %s: %s" %
8302                          (pnode,  msg))
8303       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8304         self.warn.append("Node data from primary node %s doesn't contain"
8305                          " free memory information" % pnode)
8306       elif instance_info.fail_msg:
8307         self.warn.append("Can't get instance runtime information: %s" %
8308                         instance_info.fail_msg)
8309       else:
8310         if instance_info.payload:
8311           current_mem = int(instance_info.payload['memory'])
8312         else:
8313           # Assume instance not running
8314           # (there is a slight race condition here, but it's not very probable,
8315           # and we have no other way to check)
8316           current_mem = 0
8317         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8318                     pninfo.payload['memory_free'])
8319         if miss_mem > 0:
8320           raise errors.OpPrereqError("This change will prevent the instance"
8321                                      " from starting, due to %d MB of memory"
8322                                      " missing on its primary node" % miss_mem,
8323                                      errors.ECODE_NORES)
8324
8325       if be_new[constants.BE_AUTO_BALANCE]:
8326         for node, nres in nodeinfo.items():
8327           if node not in instance.secondary_nodes:
8328             continue
8329           msg = nres.fail_msg
8330           if msg:
8331             self.warn.append("Can't get info from secondary node %s: %s" %
8332                              (node, msg))
8333           elif not isinstance(nres.payload.get('memory_free', None), int):
8334             self.warn.append("Secondary node %s didn't return free"
8335                              " memory information" % node)
8336           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8337             self.warn.append("Not enough memory to failover instance to"
8338                              " secondary node %s" % node)
8339
8340     # NIC processing
8341     self.nic_pnew = {}
8342     self.nic_pinst = {}
8343     for nic_op, nic_dict in self.op.nics:
8344       if nic_op == constants.DDM_REMOVE:
8345         if not instance.nics:
8346           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8347                                      errors.ECODE_INVAL)
8348         continue
8349       if nic_op != constants.DDM_ADD:
8350         # an existing nic
8351         if not instance.nics:
8352           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8353                                      " no NICs" % nic_op,
8354                                      errors.ECODE_INVAL)
8355         if nic_op < 0 or nic_op >= len(instance.nics):
8356           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8357                                      " are 0 to %d" %
8358                                      (nic_op, len(instance.nics) - 1),
8359                                      errors.ECODE_INVAL)
8360         old_nic_params = instance.nics[nic_op].nicparams
8361         old_nic_ip = instance.nics[nic_op].ip
8362       else:
8363         old_nic_params = {}
8364         old_nic_ip = None
8365
8366       update_params_dict = dict([(key, nic_dict[key])
8367                                  for key in constants.NICS_PARAMETERS
8368                                  if key in nic_dict])
8369
8370       if 'bridge' in nic_dict:
8371         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8372
8373       new_nic_params, new_filled_nic_params = \
8374           self._GetUpdatedParams(old_nic_params, update_params_dict,
8375                                  cluster.nicparams[constants.PP_DEFAULT],
8376                                  constants.NICS_PARAMETER_TYPES)
8377       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8378       self.nic_pinst[nic_op] = new_nic_params
8379       self.nic_pnew[nic_op] = new_filled_nic_params
8380       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8381
8382       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8383         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8384         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8385         if msg:
8386           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8387           if self.force:
8388             self.warn.append(msg)
8389           else:
8390             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8391       if new_nic_mode == constants.NIC_MODE_ROUTED:
8392         if 'ip' in nic_dict:
8393           nic_ip = nic_dict['ip']
8394         else:
8395           nic_ip = old_nic_ip
8396         if nic_ip is None:
8397           raise errors.OpPrereqError('Cannot set the nic ip to None'
8398                                      ' on a routed nic', errors.ECODE_INVAL)
8399       if 'mac' in nic_dict:
8400         nic_mac = nic_dict['mac']
8401         if nic_mac is None:
8402           raise errors.OpPrereqError('Cannot set the nic mac to None',
8403                                      errors.ECODE_INVAL)
8404         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8405           # otherwise generate the mac
8406           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8407         else:
8408           # or validate/reserve the current one
8409           try:
8410             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8411           except errors.ReservationError:
8412             raise errors.OpPrereqError("MAC address %s already in use"
8413                                        " in cluster" % nic_mac,
8414                                        errors.ECODE_NOTUNIQUE)
8415
8416     # DISK processing
8417     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8418       raise errors.OpPrereqError("Disk operations not supported for"
8419                                  " diskless instances",
8420                                  errors.ECODE_INVAL)
8421     for disk_op, _ in self.op.disks:
8422       if disk_op == constants.DDM_REMOVE:
8423         if len(instance.disks) == 1:
8424           raise errors.OpPrereqError("Cannot remove the last disk of"
8425                                      " an instance", errors.ECODE_INVAL)
8426         _CheckInstanceDown(self, instance, "cannot remove disks")
8427
8428       if (disk_op == constants.DDM_ADD and
8429           len(instance.nics) >= constants.MAX_DISKS):
8430         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8431                                    " add more" % constants.MAX_DISKS,
8432                                    errors.ECODE_STATE)
8433       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8434         # an existing disk
8435         if disk_op < 0 or disk_op >= len(instance.disks):
8436           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8437                                      " are 0 to %d" %
8438                                      (disk_op, len(instance.disks)),
8439                                      errors.ECODE_INVAL)
8440
8441     # OS change
8442     if self.op.os_name and not self.op.force:
8443       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8444                       self.op.force_variant)
8445
8446     return
8447
8448   def _ConvertPlainToDrbd(self, feedback_fn):
8449     """Converts an instance from plain to drbd.
8450
8451     """
8452     feedback_fn("Converting template to drbd")
8453     instance = self.instance
8454     pnode = instance.primary_node
8455     snode = self.op.remote_node
8456
8457     # create a fake disk info for _GenerateDiskTemplate
8458     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8459     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8460                                       instance.name, pnode, [snode],
8461                                       disk_info, None, None, 0)
8462     info = _GetInstanceInfoText(instance)
8463     feedback_fn("Creating aditional volumes...")
8464     # first, create the missing data and meta devices
8465     for disk in new_disks:
8466       # unfortunately this is... not too nice
8467       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8468                             info, True)
8469       for child in disk.children:
8470         _CreateSingleBlockDev(self, snode, instance, child, info, True)
8471     # at this stage, all new LVs have been created, we can rename the
8472     # old ones
8473     feedback_fn("Renaming original volumes...")
8474     rename_list = [(o, n.children[0].logical_id)
8475                    for (o, n) in zip(instance.disks, new_disks)]
8476     result = self.rpc.call_blockdev_rename(pnode, rename_list)
8477     result.Raise("Failed to rename original LVs")
8478
8479     feedback_fn("Initializing DRBD devices...")
8480     # all child devices are in place, we can now create the DRBD devices
8481     for disk in new_disks:
8482       for node in [pnode, snode]:
8483         f_create = node == pnode
8484         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8485
8486     # at this point, the instance has been modified
8487     instance.disk_template = constants.DT_DRBD8
8488     instance.disks = new_disks
8489     self.cfg.Update(instance, feedback_fn)
8490
8491     # disks are created, waiting for sync
8492     disk_abort = not _WaitForSync(self, instance)
8493     if disk_abort:
8494       raise errors.OpExecError("There are some degraded disks for"
8495                                " this instance, please cleanup manually")
8496
8497   def _ConvertDrbdToPlain(self, feedback_fn):
8498     """Converts an instance from drbd to plain.
8499
8500     """
8501     instance = self.instance
8502     assert len(instance.secondary_nodes) == 1
8503     pnode = instance.primary_node
8504     snode = instance.secondary_nodes[0]
8505     feedback_fn("Converting template to plain")
8506
8507     old_disks = instance.disks
8508     new_disks = [d.children[0] for d in old_disks]
8509
8510     # copy over size and mode
8511     for parent, child in zip(old_disks, new_disks):
8512       child.size = parent.size
8513       child.mode = parent.mode
8514
8515     # update instance structure
8516     instance.disks = new_disks
8517     instance.disk_template = constants.DT_PLAIN
8518     self.cfg.Update(instance, feedback_fn)
8519
8520     feedback_fn("Removing volumes on the secondary node...")
8521     for disk in old_disks:
8522       self.cfg.SetDiskID(disk, snode)
8523       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8524       if msg:
8525         self.LogWarning("Could not remove block device %s on node %s,"
8526                         " continuing anyway: %s", disk.iv_name, snode, msg)
8527
8528     feedback_fn("Removing unneeded volumes on the primary node...")
8529     for idx, disk in enumerate(old_disks):
8530       meta = disk.children[1]
8531       self.cfg.SetDiskID(meta, pnode)
8532       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8533       if msg:
8534         self.LogWarning("Could not remove metadata for disk %d on node %s,"
8535                         " continuing anyway: %s", idx, pnode, msg)
8536
8537
8538   def Exec(self, feedback_fn):
8539     """Modifies an instance.
8540
8541     All parameters take effect only at the next restart of the instance.
8542
8543     """
8544     # Process here the warnings from CheckPrereq, as we don't have a
8545     # feedback_fn there.
8546     for warn in self.warn:
8547       feedback_fn("WARNING: %s" % warn)
8548
8549     result = []
8550     instance = self.instance
8551     # disk changes
8552     for disk_op, disk_dict in self.op.disks:
8553       if disk_op == constants.DDM_REMOVE:
8554         # remove the last disk
8555         device = instance.disks.pop()
8556         device_idx = len(instance.disks)
8557         for node, disk in device.ComputeNodeTree(instance.primary_node):
8558           self.cfg.SetDiskID(disk, node)
8559           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8560           if msg:
8561             self.LogWarning("Could not remove disk/%d on node %s: %s,"
8562                             " continuing anyway", device_idx, node, msg)
8563         result.append(("disk/%d" % device_idx, "remove"))
8564       elif disk_op == constants.DDM_ADD:
8565         # add a new disk
8566         if instance.disk_template == constants.DT_FILE:
8567           file_driver, file_path = instance.disks[0].logical_id
8568           file_path = os.path.dirname(file_path)
8569         else:
8570           file_driver = file_path = None
8571         disk_idx_base = len(instance.disks)
8572         new_disk = _GenerateDiskTemplate(self,
8573                                          instance.disk_template,
8574                                          instance.name, instance.primary_node,
8575                                          instance.secondary_nodes,
8576                                          [disk_dict],
8577                                          file_path,
8578                                          file_driver,
8579                                          disk_idx_base)[0]
8580         instance.disks.append(new_disk)
8581         info = _GetInstanceInfoText(instance)
8582
8583         logging.info("Creating volume %s for instance %s",
8584                      new_disk.iv_name, instance.name)
8585         # Note: this needs to be kept in sync with _CreateDisks
8586         #HARDCODE
8587         for node in instance.all_nodes:
8588           f_create = node == instance.primary_node
8589           try:
8590             _CreateBlockDev(self, node, instance, new_disk,
8591                             f_create, info, f_create)
8592           except errors.OpExecError, err:
8593             self.LogWarning("Failed to create volume %s (%s) on"
8594                             " node %s: %s",
8595                             new_disk.iv_name, new_disk, node, err)
8596         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8597                        (new_disk.size, new_disk.mode)))
8598       else:
8599         # change a given disk
8600         instance.disks[disk_op].mode = disk_dict['mode']
8601         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8602
8603     if self.op.disk_template:
8604       r_shut = _ShutdownInstanceDisks(self, instance)
8605       if not r_shut:
8606         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8607                                  " proceed with disk template conversion")
8608       mode = (instance.disk_template, self.op.disk_template)
8609       try:
8610         self._DISK_CONVERSIONS[mode](self, feedback_fn)
8611       except:
8612         self.cfg.ReleaseDRBDMinors(instance.name)
8613         raise
8614       result.append(("disk_template", self.op.disk_template))
8615
8616     # NIC changes
8617     for nic_op, nic_dict in self.op.nics:
8618       if nic_op == constants.DDM_REMOVE:
8619         # remove the last nic
8620         del instance.nics[-1]
8621         result.append(("nic.%d" % len(instance.nics), "remove"))
8622       elif nic_op == constants.DDM_ADD:
8623         # mac and bridge should be set, by now
8624         mac = nic_dict['mac']
8625         ip = nic_dict.get('ip', None)
8626         nicparams = self.nic_pinst[constants.DDM_ADD]
8627         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8628         instance.nics.append(new_nic)
8629         result.append(("nic.%d" % (len(instance.nics) - 1),
8630                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
8631                        (new_nic.mac, new_nic.ip,
8632                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8633                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8634                        )))
8635       else:
8636         for key in 'mac', 'ip':
8637           if key in nic_dict:
8638             setattr(instance.nics[nic_op], key, nic_dict[key])
8639         if nic_op in self.nic_pinst:
8640           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8641         for key, val in nic_dict.iteritems():
8642           result.append(("nic.%s/%d" % (key, nic_op), val))
8643
8644     # hvparams changes
8645     if self.op.hvparams:
8646       instance.hvparams = self.hv_inst
8647       for key, val in self.op.hvparams.iteritems():
8648         result.append(("hv/%s" % key, val))
8649
8650     # beparams changes
8651     if self.op.beparams:
8652       instance.beparams = self.be_inst
8653       for key, val in self.op.beparams.iteritems():
8654         result.append(("be/%s" % key, val))
8655
8656     # OS change
8657     if self.op.os_name:
8658       instance.os = self.op.os_name
8659
8660     self.cfg.Update(instance, feedback_fn)
8661
8662     return result
8663
8664   _DISK_CONVERSIONS = {
8665     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8666     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8667     }
8668
8669 class LUQueryExports(NoHooksLU):
8670   """Query the exports list
8671
8672   """
8673   _OP_REQP = ['nodes']
8674   REQ_BGL = False
8675
8676   def ExpandNames(self):
8677     self.needed_locks = {}
8678     self.share_locks[locking.LEVEL_NODE] = 1
8679     if not self.op.nodes:
8680       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8681     else:
8682       self.needed_locks[locking.LEVEL_NODE] = \
8683         _GetWantedNodes(self, self.op.nodes)
8684
8685   def CheckPrereq(self):
8686     """Check prerequisites.
8687
8688     """
8689     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8690
8691   def Exec(self, feedback_fn):
8692     """Compute the list of all the exported system images.
8693
8694     @rtype: dict
8695     @return: a dictionary with the structure node->(export-list)
8696         where export-list is a list of the instances exported on
8697         that node.
8698
8699     """
8700     rpcresult = self.rpc.call_export_list(self.nodes)
8701     result = {}
8702     for node in rpcresult:
8703       if rpcresult[node].fail_msg:
8704         result[node] = False
8705       else:
8706         result[node] = rpcresult[node].payload
8707
8708     return result
8709
8710
8711 class LUExportInstance(LogicalUnit):
8712   """Export an instance to an image in the cluster.
8713
8714   """
8715   HPATH = "instance-export"
8716   HTYPE = constants.HTYPE_INSTANCE
8717   _OP_REQP = ["instance_name", "target_node", "shutdown"]
8718   REQ_BGL = False
8719
8720   def CheckArguments(self):
8721     """Check the arguments.
8722
8723     """
8724     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8725                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
8726
8727   def ExpandNames(self):
8728     self._ExpandAndLockInstance()
8729     # FIXME: lock only instance primary and destination node
8730     #
8731     # Sad but true, for now we have do lock all nodes, as we don't know where
8732     # the previous export might be, and and in this LU we search for it and
8733     # remove it from its current node. In the future we could fix this by:
8734     #  - making a tasklet to search (share-lock all), then create the new one,
8735     #    then one to remove, after
8736     #  - removing the removal operation altogether
8737     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8738
8739   def DeclareLocks(self, level):
8740     """Last minute lock declaration."""
8741     # All nodes are locked anyway, so nothing to do here.
8742
8743   def BuildHooksEnv(self):
8744     """Build hooks env.
8745
8746     This will run on the master, primary node and target node.
8747
8748     """
8749     env = {
8750       "EXPORT_NODE": self.op.target_node,
8751       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8752       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8753       }
8754     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8755     nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8756           self.op.target_node]
8757     return env, nl, nl
8758
8759   def CheckPrereq(self):
8760     """Check prerequisites.
8761
8762     This checks that the instance and node names are valid.
8763
8764     """
8765     instance_name = self.op.instance_name
8766     self.instance = self.cfg.GetInstanceInfo(instance_name)
8767     assert self.instance is not None, \
8768           "Cannot retrieve locked instance %s" % self.op.instance_name
8769     _CheckNodeOnline(self, self.instance.primary_node)
8770
8771     self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8772     self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8773     assert self.dst_node is not None
8774
8775     _CheckNodeOnline(self, self.dst_node.name)
8776     _CheckNodeNotDrained(self, self.dst_node.name)
8777
8778     # instance disk type verification
8779     for disk in self.instance.disks:
8780       if disk.dev_type == constants.LD_FILE:
8781         raise errors.OpPrereqError("Export not supported for instances with"
8782                                    " file-based disks", errors.ECODE_INVAL)
8783
8784   def Exec(self, feedback_fn):
8785     """Export an instance to an image in the cluster.
8786
8787     """
8788     instance = self.instance
8789     dst_node = self.dst_node
8790     src_node = instance.primary_node
8791
8792     if self.op.shutdown:
8793       # shutdown the instance, but not the disks
8794       feedback_fn("Shutting down instance %s" % instance.name)
8795       result = self.rpc.call_instance_shutdown(src_node, instance,
8796                                                self.shutdown_timeout)
8797       result.Raise("Could not shutdown instance %s on"
8798                    " node %s" % (instance.name, src_node))
8799
8800     vgname = self.cfg.GetVGName()
8801
8802     snap_disks = []
8803
8804     # set the disks ID correctly since call_instance_start needs the
8805     # correct drbd minor to create the symlinks
8806     for disk in instance.disks:
8807       self.cfg.SetDiskID(disk, src_node)
8808
8809     activate_disks = (not instance.admin_up)
8810
8811     if activate_disks:
8812       # Activate the instance disks if we'exporting a stopped instance
8813       feedback_fn("Activating disks for %s" % instance.name)
8814       _StartInstanceDisks(self, instance, None)
8815
8816     try:
8817       # per-disk results
8818       dresults = []
8819       try:
8820         for idx, disk in enumerate(instance.disks):
8821           feedback_fn("Creating a snapshot of disk/%s on node %s" %
8822                       (idx, src_node))
8823
8824           # result.payload will be a snapshot of an lvm leaf of the one we
8825           # passed
8826           result = self.rpc.call_blockdev_snapshot(src_node, disk)
8827           msg = result.fail_msg
8828           if msg:
8829             self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8830                             idx, src_node, msg)
8831             snap_disks.append(False)
8832           else:
8833             disk_id = (vgname, result.payload)
8834             new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8835                                    logical_id=disk_id, physical_id=disk_id,
8836                                    iv_name=disk.iv_name)
8837             snap_disks.append(new_dev)
8838
8839       finally:
8840         if self.op.shutdown and instance.admin_up:
8841           feedback_fn("Starting instance %s" % instance.name)
8842           result = self.rpc.call_instance_start(src_node, instance, None, None)
8843           msg = result.fail_msg
8844           if msg:
8845             _ShutdownInstanceDisks(self, instance)
8846             raise errors.OpExecError("Could not start instance: %s" % msg)
8847
8848       # TODO: check for size
8849
8850       cluster_name = self.cfg.GetClusterName()
8851       for idx, dev in enumerate(snap_disks):
8852         feedback_fn("Exporting snapshot %s from %s to %s" %
8853                     (idx, src_node, dst_node.name))
8854         if dev:
8855           # FIXME: pass debug from opcode to backend
8856           result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8857                                                  instance, cluster_name,
8858                                                  idx, self.op.debug_level)
8859           msg = result.fail_msg
8860           if msg:
8861             self.LogWarning("Could not export disk/%s from node %s to"
8862                             " node %s: %s", idx, src_node, dst_node.name, msg)
8863             dresults.append(False)
8864           else:
8865             dresults.append(True)
8866           msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8867           if msg:
8868             self.LogWarning("Could not remove snapshot for disk/%d from node"
8869                             " %s: %s", idx, src_node, msg)
8870         else:
8871           dresults.append(False)
8872
8873       feedback_fn("Finalizing export on %s" % dst_node.name)
8874       result = self.rpc.call_finalize_export(dst_node.name, instance,
8875                                              snap_disks)
8876       fin_resu = True
8877       msg = result.fail_msg
8878       if msg:
8879         self.LogWarning("Could not finalize export for instance %s"
8880                         " on node %s: %s", instance.name, dst_node.name, msg)
8881         fin_resu = False
8882
8883     finally:
8884       if activate_disks:
8885         feedback_fn("Deactivating disks for %s" % instance.name)
8886         _ShutdownInstanceDisks(self, instance)
8887
8888     nodelist = self.cfg.GetNodeList()
8889     nodelist.remove(dst_node.name)
8890
8891     # on one-node clusters nodelist will be empty after the removal
8892     # if we proceed the backup would be removed because OpQueryExports
8893     # substitutes an empty list with the full cluster node list.
8894     iname = instance.name
8895     if nodelist:
8896       feedback_fn("Removing old exports for instance %s" % iname)
8897       exportlist = self.rpc.call_export_list(nodelist)
8898       for node in exportlist:
8899         if exportlist[node].fail_msg:
8900           continue
8901         if iname in exportlist[node].payload:
8902           msg = self.rpc.call_export_remove(node, iname).fail_msg
8903           if msg:
8904             self.LogWarning("Could not remove older export for instance %s"
8905                             " on node %s: %s", iname, node, msg)
8906     return fin_resu, dresults
8907
8908
8909 class LURemoveExport(NoHooksLU):
8910   """Remove exports related to the named instance.
8911
8912   """
8913   _OP_REQP = ["instance_name"]
8914   REQ_BGL = False
8915
8916   def ExpandNames(self):
8917     self.needed_locks = {}
8918     # We need all nodes to be locked in order for RemoveExport to work, but we
8919     # don't need to lock the instance itself, as nothing will happen to it (and
8920     # we can remove exports also for a removed instance)
8921     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8922
8923   def CheckPrereq(self):
8924     """Check prerequisites.
8925     """
8926     pass
8927
8928   def Exec(self, feedback_fn):
8929     """Remove any export.
8930
8931     """
8932     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8933     # If the instance was not found we'll try with the name that was passed in.
8934     # This will only work if it was an FQDN, though.
8935     fqdn_warn = False
8936     if not instance_name:
8937       fqdn_warn = True
8938       instance_name = self.op.instance_name
8939
8940     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8941     exportlist = self.rpc.call_export_list(locked_nodes)
8942     found = False
8943     for node in exportlist:
8944       msg = exportlist[node].fail_msg
8945       if msg:
8946         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8947         continue
8948       if instance_name in exportlist[node].payload:
8949         found = True
8950         result = self.rpc.call_export_remove(node, instance_name)
8951         msg = result.fail_msg
8952         if msg:
8953           logging.error("Could not remove export for instance %s"
8954                         " on node %s: %s", instance_name, node, msg)
8955
8956     if fqdn_warn and not found:
8957       feedback_fn("Export not found. If trying to remove an export belonging"
8958                   " to a deleted instance please use its Fully Qualified"
8959                   " Domain Name.")
8960
8961
8962 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8963   """Generic tags LU.
8964
8965   This is an abstract class which is the parent of all the other tags LUs.
8966
8967   """
8968
8969   def ExpandNames(self):
8970     self.needed_locks = {}
8971     if self.op.kind == constants.TAG_NODE:
8972       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
8973       self.needed_locks[locking.LEVEL_NODE] = self.op.name
8974     elif self.op.kind == constants.TAG_INSTANCE:
8975       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
8976       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
8977
8978   def CheckPrereq(self):
8979     """Check prerequisites.
8980
8981     """
8982     if self.op.kind == constants.TAG_CLUSTER:
8983       self.target = self.cfg.GetClusterInfo()
8984     elif self.op.kind == constants.TAG_NODE:
8985       self.target = self.cfg.GetNodeInfo(self.op.name)
8986     elif self.op.kind == constants.TAG_INSTANCE:
8987       self.target = self.cfg.GetInstanceInfo(self.op.name)
8988     else:
8989       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8990                                  str(self.op.kind), errors.ECODE_INVAL)
8991
8992
8993 class LUGetTags(TagsLU):
8994   """Returns the tags of a given object.
8995
8996   """
8997   _OP_REQP = ["kind", "name"]
8998   REQ_BGL = False
8999
9000   def Exec(self, feedback_fn):
9001     """Returns the tag list.
9002
9003     """
9004     return list(self.target.GetTags())
9005
9006
9007 class LUSearchTags(NoHooksLU):
9008   """Searches the tags for a given pattern.
9009
9010   """
9011   _OP_REQP = ["pattern"]
9012   REQ_BGL = False
9013
9014   def ExpandNames(self):
9015     self.needed_locks = {}
9016
9017   def CheckPrereq(self):
9018     """Check prerequisites.
9019
9020     This checks the pattern passed for validity by compiling it.
9021
9022     """
9023     try:
9024       self.re = re.compile(self.op.pattern)
9025     except re.error, err:
9026       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9027                                  (self.op.pattern, err), errors.ECODE_INVAL)
9028
9029   def Exec(self, feedback_fn):
9030     """Returns the tag list.
9031
9032     """
9033     cfg = self.cfg
9034     tgts = [("/cluster", cfg.GetClusterInfo())]
9035     ilist = cfg.GetAllInstancesInfo().values()
9036     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9037     nlist = cfg.GetAllNodesInfo().values()
9038     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9039     results = []
9040     for path, target in tgts:
9041       for tag in target.GetTags():
9042         if self.re.search(tag):
9043           results.append((path, tag))
9044     return results
9045
9046
9047 class LUAddTags(TagsLU):
9048   """Sets a tag on a given object.
9049
9050   """
9051   _OP_REQP = ["kind", "name", "tags"]
9052   REQ_BGL = False
9053
9054   def CheckPrereq(self):
9055     """Check prerequisites.
9056
9057     This checks the type and length of the tag name and value.
9058
9059     """
9060     TagsLU.CheckPrereq(self)
9061     for tag in self.op.tags:
9062       objects.TaggableObject.ValidateTag(tag)
9063
9064   def Exec(self, feedback_fn):
9065     """Sets the tag.
9066
9067     """
9068     try:
9069       for tag in self.op.tags:
9070         self.target.AddTag(tag)
9071     except errors.TagError, err:
9072       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9073     self.cfg.Update(self.target, feedback_fn)
9074
9075
9076 class LUDelTags(TagsLU):
9077   """Delete a list of tags from a given object.
9078
9079   """
9080   _OP_REQP = ["kind", "name", "tags"]
9081   REQ_BGL = False
9082
9083   def CheckPrereq(self):
9084     """Check prerequisites.
9085
9086     This checks that we have the given tag.
9087
9088     """
9089     TagsLU.CheckPrereq(self)
9090     for tag in self.op.tags:
9091       objects.TaggableObject.ValidateTag(tag)
9092     del_tags = frozenset(self.op.tags)
9093     cur_tags = self.target.GetTags()
9094     if not del_tags <= cur_tags:
9095       diff_tags = del_tags - cur_tags
9096       diff_names = ["'%s'" % tag for tag in diff_tags]
9097       diff_names.sort()
9098       raise errors.OpPrereqError("Tag(s) %s not found" %
9099                                  (",".join(diff_names)), errors.ECODE_NOENT)
9100
9101   def Exec(self, feedback_fn):
9102     """Remove the tag from the object.
9103
9104     """
9105     for tag in self.op.tags:
9106       self.target.RemoveTag(tag)
9107     self.cfg.Update(self.target, feedback_fn)
9108
9109
9110 class LUTestDelay(NoHooksLU):
9111   """Sleep for a specified amount of time.
9112
9113   This LU sleeps on the master and/or nodes for a specified amount of
9114   time.
9115
9116   """
9117   _OP_REQP = ["duration", "on_master", "on_nodes"]
9118   REQ_BGL = False
9119
9120   def ExpandNames(self):
9121     """Expand names and set required locks.
9122
9123     This expands the node list, if any.
9124
9125     """
9126     self.needed_locks = {}
9127     if self.op.on_nodes:
9128       # _GetWantedNodes can be used here, but is not always appropriate to use
9129       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9130       # more information.
9131       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9132       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9133
9134   def CheckPrereq(self):
9135     """Check prerequisites.
9136
9137     """
9138
9139   def Exec(self, feedback_fn):
9140     """Do the actual sleep.
9141
9142     """
9143     if self.op.on_master:
9144       if not utils.TestDelay(self.op.duration):
9145         raise errors.OpExecError("Error during master delay test")
9146     if self.op.on_nodes:
9147       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9148       for node, node_result in result.items():
9149         node_result.Raise("Failure during rpc call to node %s" % node)
9150
9151
9152 class IAllocator(object):
9153   """IAllocator framework.
9154
9155   An IAllocator instance has three sets of attributes:
9156     - cfg that is needed to query the cluster
9157     - input data (all members of the _KEYS class attribute are required)
9158     - four buffer attributes (in|out_data|text), that represent the
9159       input (to the external script) in text and data structure format,
9160       and the output from it, again in two formats
9161     - the result variables from the script (success, info, nodes) for
9162       easy usage
9163
9164   """
9165   # pylint: disable-msg=R0902
9166   # lots of instance attributes
9167   _ALLO_KEYS = [
9168     "name", "mem_size", "disks", "disk_template",
9169     "os", "tags", "nics", "vcpus", "hypervisor",
9170     ]
9171   _RELO_KEYS = [
9172     "name", "relocate_from",
9173     ]
9174   _EVAC_KEYS = [
9175     "evac_nodes",
9176     ]
9177
9178   def __init__(self, cfg, rpc, mode, **kwargs):
9179     self.cfg = cfg
9180     self.rpc = rpc
9181     # init buffer variables
9182     self.in_text = self.out_text = self.in_data = self.out_data = None
9183     # init all input fields so that pylint is happy
9184     self.mode = mode
9185     self.mem_size = self.disks = self.disk_template = None
9186     self.os = self.tags = self.nics = self.vcpus = None
9187     self.hypervisor = None
9188     self.relocate_from = None
9189     self.name = None
9190     self.evac_nodes = None
9191     # computed fields
9192     self.required_nodes = None
9193     # init result fields
9194     self.success = self.info = self.result = None
9195     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9196       keyset = self._ALLO_KEYS
9197       fn = self._AddNewInstance
9198     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9199       keyset = self._RELO_KEYS
9200       fn = self._AddRelocateInstance
9201     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9202       keyset = self._EVAC_KEYS
9203       fn = self._AddEvacuateNodes
9204     else:
9205       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9206                                    " IAllocator" % self.mode)
9207     for key in kwargs:
9208       if key not in keyset:
9209         raise errors.ProgrammerError("Invalid input parameter '%s' to"
9210                                      " IAllocator" % key)
9211       setattr(self, key, kwargs[key])
9212
9213     for key in keyset:
9214       if key not in kwargs:
9215         raise errors.ProgrammerError("Missing input parameter '%s' to"
9216                                      " IAllocator" % key)
9217     self._BuildInputData(fn)
9218
9219   def _ComputeClusterData(self):
9220     """Compute the generic allocator input data.
9221
9222     This is the data that is independent of the actual operation.
9223
9224     """
9225     cfg = self.cfg
9226     cluster_info = cfg.GetClusterInfo()
9227     # cluster data
9228     data = {
9229       "version": constants.IALLOCATOR_VERSION,
9230       "cluster_name": cfg.GetClusterName(),
9231       "cluster_tags": list(cluster_info.GetTags()),
9232       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9233       # we don't have job IDs
9234       }
9235     iinfo = cfg.GetAllInstancesInfo().values()
9236     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9237
9238     # node data
9239     node_results = {}
9240     node_list = cfg.GetNodeList()
9241
9242     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9243       hypervisor_name = self.hypervisor
9244     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9245       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9246     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9247       hypervisor_name = cluster_info.enabled_hypervisors[0]
9248
9249     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9250                                         hypervisor_name)
9251     node_iinfo = \
9252       self.rpc.call_all_instances_info(node_list,
9253                                        cluster_info.enabled_hypervisors)
9254     for nname, nresult in node_data.items():
9255       # first fill in static (config-based) values
9256       ninfo = cfg.GetNodeInfo(nname)
9257       pnr = {
9258         "tags": list(ninfo.GetTags()),
9259         "primary_ip": ninfo.primary_ip,
9260         "secondary_ip": ninfo.secondary_ip,
9261         "offline": ninfo.offline,
9262         "drained": ninfo.drained,
9263         "master_candidate": ninfo.master_candidate,
9264         }
9265
9266       if not (ninfo.offline or ninfo.drained):
9267         nresult.Raise("Can't get data for node %s" % nname)
9268         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9269                                 nname)
9270         remote_info = nresult.payload
9271
9272         for attr in ['memory_total', 'memory_free', 'memory_dom0',
9273                      'vg_size', 'vg_free', 'cpu_total']:
9274           if attr not in remote_info:
9275             raise errors.OpExecError("Node '%s' didn't return attribute"
9276                                      " '%s'" % (nname, attr))
9277           if not isinstance(remote_info[attr], int):
9278             raise errors.OpExecError("Node '%s' returned invalid value"
9279                                      " for '%s': %s" %
9280                                      (nname, attr, remote_info[attr]))
9281         # compute memory used by primary instances
9282         i_p_mem = i_p_up_mem = 0
9283         for iinfo, beinfo in i_list:
9284           if iinfo.primary_node == nname:
9285             i_p_mem += beinfo[constants.BE_MEMORY]
9286             if iinfo.name not in node_iinfo[nname].payload:
9287               i_used_mem = 0
9288             else:
9289               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9290             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9291             remote_info['memory_free'] -= max(0, i_mem_diff)
9292
9293             if iinfo.admin_up:
9294               i_p_up_mem += beinfo[constants.BE_MEMORY]
9295
9296         # compute memory used by instances
9297         pnr_dyn = {
9298           "total_memory": remote_info['memory_total'],
9299           "reserved_memory": remote_info['memory_dom0'],
9300           "free_memory": remote_info['memory_free'],
9301           "total_disk": remote_info['vg_size'],
9302           "free_disk": remote_info['vg_free'],
9303           "total_cpus": remote_info['cpu_total'],
9304           "i_pri_memory": i_p_mem,
9305           "i_pri_up_memory": i_p_up_mem,
9306           }
9307         pnr.update(pnr_dyn)
9308
9309       node_results[nname] = pnr
9310     data["nodes"] = node_results
9311
9312     # instance data
9313     instance_data = {}
9314     for iinfo, beinfo in i_list:
9315       nic_data = []
9316       for nic in iinfo.nics:
9317         filled_params = objects.FillDict(
9318             cluster_info.nicparams[constants.PP_DEFAULT],
9319             nic.nicparams)
9320         nic_dict = {"mac": nic.mac,
9321                     "ip": nic.ip,
9322                     "mode": filled_params[constants.NIC_MODE],
9323                     "link": filled_params[constants.NIC_LINK],
9324                    }
9325         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9326           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9327         nic_data.append(nic_dict)
9328       pir = {
9329         "tags": list(iinfo.GetTags()),
9330         "admin_up": iinfo.admin_up,
9331         "vcpus": beinfo[constants.BE_VCPUS],
9332         "memory": beinfo[constants.BE_MEMORY],
9333         "os": iinfo.os,
9334         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9335         "nics": nic_data,
9336         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9337         "disk_template": iinfo.disk_template,
9338         "hypervisor": iinfo.hypervisor,
9339         }
9340       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9341                                                  pir["disks"])
9342       instance_data[iinfo.name] = pir
9343
9344     data["instances"] = instance_data
9345
9346     self.in_data = data
9347
9348   def _AddNewInstance(self):
9349     """Add new instance data to allocator structure.
9350
9351     This in combination with _AllocatorGetClusterData will create the
9352     correct structure needed as input for the allocator.
9353
9354     The checks for the completeness of the opcode must have already been
9355     done.
9356
9357     """
9358     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9359
9360     if self.disk_template in constants.DTS_NET_MIRROR:
9361       self.required_nodes = 2
9362     else:
9363       self.required_nodes = 1
9364     request = {
9365       "name": self.name,
9366       "disk_template": self.disk_template,
9367       "tags": self.tags,
9368       "os": self.os,
9369       "vcpus": self.vcpus,
9370       "memory": self.mem_size,
9371       "disks": self.disks,
9372       "disk_space_total": disk_space,
9373       "nics": self.nics,
9374       "required_nodes": self.required_nodes,
9375       }
9376     return request
9377
9378   def _AddRelocateInstance(self):
9379     """Add relocate instance data to allocator structure.
9380
9381     This in combination with _IAllocatorGetClusterData will create the
9382     correct structure needed as input for the allocator.
9383
9384     The checks for the completeness of the opcode must have already been
9385     done.
9386
9387     """
9388     instance = self.cfg.GetInstanceInfo(self.name)
9389     if instance is None:
9390       raise errors.ProgrammerError("Unknown instance '%s' passed to"
9391                                    " IAllocator" % self.name)
9392
9393     if instance.disk_template not in constants.DTS_NET_MIRROR:
9394       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9395                                  errors.ECODE_INVAL)
9396
9397     if len(instance.secondary_nodes) != 1:
9398       raise errors.OpPrereqError("Instance has not exactly one secondary node",
9399                                  errors.ECODE_STATE)
9400
9401     self.required_nodes = 1
9402     disk_sizes = [{'size': disk.size} for disk in instance.disks]
9403     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9404
9405     request = {
9406       "name": self.name,
9407       "disk_space_total": disk_space,
9408       "required_nodes": self.required_nodes,
9409       "relocate_from": self.relocate_from,
9410       }
9411     return request
9412
9413   def _AddEvacuateNodes(self):
9414     """Add evacuate nodes data to allocator structure.
9415
9416     """
9417     request = {
9418       "evac_nodes": self.evac_nodes
9419       }
9420     return request
9421
9422   def _BuildInputData(self, fn):
9423     """Build input data structures.
9424
9425     """
9426     self._ComputeClusterData()
9427
9428     request = fn()
9429     request["type"] = self.mode
9430     self.in_data["request"] = request
9431
9432     self.in_text = serializer.Dump(self.in_data)
9433
9434   def Run(self, name, validate=True, call_fn=None):
9435     """Run an instance allocator and return the results.
9436
9437     """
9438     if call_fn is None:
9439       call_fn = self.rpc.call_iallocator_runner
9440
9441     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9442     result.Raise("Failure while running the iallocator script")
9443
9444     self.out_text = result.payload
9445     if validate:
9446       self._ValidateResult()
9447
9448   def _ValidateResult(self):
9449     """Process the allocator results.
9450
9451     This will process and if successful save the result in
9452     self.out_data and the other parameters.
9453
9454     """
9455     try:
9456       rdict = serializer.Load(self.out_text)
9457     except Exception, err:
9458       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9459
9460     if not isinstance(rdict, dict):
9461       raise errors.OpExecError("Can't parse iallocator results: not a dict")
9462
9463     # TODO: remove backwards compatiblity in later versions
9464     if "nodes" in rdict and "result" not in rdict:
9465       rdict["result"] = rdict["nodes"]
9466       del rdict["nodes"]
9467
9468     for key in "success", "info", "result":
9469       if key not in rdict:
9470         raise errors.OpExecError("Can't parse iallocator results:"
9471                                  " missing key '%s'" % key)
9472       setattr(self, key, rdict[key])
9473
9474     if not isinstance(rdict["result"], list):
9475       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9476                                " is not a list")
9477     self.out_data = rdict
9478
9479
9480 class LUTestAllocator(NoHooksLU):
9481   """Run allocator tests.
9482
9483   This LU runs the allocator tests
9484
9485   """
9486   _OP_REQP = ["direction", "mode", "name"]
9487
9488   def CheckPrereq(self):
9489     """Check prerequisites.
9490
9491     This checks the opcode parameters depending on the director and mode test.
9492
9493     """
9494     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9495       for attr in ["name", "mem_size", "disks", "disk_template",
9496                    "os", "tags", "nics", "vcpus"]:
9497         if not hasattr(self.op, attr):
9498           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9499                                      attr, errors.ECODE_INVAL)
9500       iname = self.cfg.ExpandInstanceName(self.op.name)
9501       if iname is not None:
9502         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9503                                    iname, errors.ECODE_EXISTS)
9504       if not isinstance(self.op.nics, list):
9505         raise errors.OpPrereqError("Invalid parameter 'nics'",
9506                                    errors.ECODE_INVAL)
9507       for row in self.op.nics:
9508         if (not isinstance(row, dict) or
9509             "mac" not in row or
9510             "ip" not in row or
9511             "bridge" not in row):
9512           raise errors.OpPrereqError("Invalid contents of the 'nics'"
9513                                      " parameter", errors.ECODE_INVAL)
9514       if not isinstance(self.op.disks, list):
9515         raise errors.OpPrereqError("Invalid parameter 'disks'",
9516                                    errors.ECODE_INVAL)
9517       for row in self.op.disks:
9518         if (not isinstance(row, dict) or
9519             "size" not in row or
9520             not isinstance(row["size"], int) or
9521             "mode" not in row or
9522             row["mode"] not in ['r', 'w']):
9523           raise errors.OpPrereqError("Invalid contents of the 'disks'"
9524                                      " parameter", errors.ECODE_INVAL)
9525       if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9526         self.op.hypervisor = self.cfg.GetHypervisorType()
9527     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9528       if not hasattr(self.op, "name"):
9529         raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9530                                    errors.ECODE_INVAL)
9531       fname = _ExpandInstanceName(self.cfg, self.op.name)
9532       self.op.name = fname
9533       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9534     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9535       if not hasattr(self.op, "evac_nodes"):
9536         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9537                                    " opcode input", errors.ECODE_INVAL)
9538     else:
9539       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9540                                  self.op.mode, errors.ECODE_INVAL)
9541
9542     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9543       if not hasattr(self.op, "allocator") or self.op.allocator is None:
9544         raise errors.OpPrereqError("Missing allocator name",
9545                                    errors.ECODE_INVAL)
9546     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9547       raise errors.OpPrereqError("Wrong allocator test '%s'" %
9548                                  self.op.direction, errors.ECODE_INVAL)
9549
9550   def Exec(self, feedback_fn):
9551     """Run the allocator test.
9552
9553     """
9554     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9555       ial = IAllocator(self.cfg, self.rpc,
9556                        mode=self.op.mode,
9557                        name=self.op.name,
9558                        mem_size=self.op.mem_size,
9559                        disks=self.op.disks,
9560                        disk_template=self.op.disk_template,
9561                        os=self.op.os,
9562                        tags=self.op.tags,
9563                        nics=self.op.nics,
9564                        vcpus=self.op.vcpus,
9565                        hypervisor=self.op.hypervisor,
9566                        )
9567     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9568       ial = IAllocator(self.cfg, self.rpc,
9569                        mode=self.op.mode,
9570                        name=self.op.name,
9571                        relocate_from=list(self.relocate_from),
9572                        )
9573     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9574       ial = IAllocator(self.cfg, self.rpc,
9575                        mode=self.op.mode,
9576                        evac_nodes=self.op.evac_nodes)
9577     else:
9578       raise errors.ProgrammerError("Uncatched mode %s in"
9579                                    " LUTestAllocator.Exec", self.op.mode)
9580
9581     if self.op.direction == constants.IALLOCATOR_DIR_IN:
9582       result = ial.in_text
9583     else:
9584       ial.Run(self.op.allocator, validate=False)
9585       result = ial.out_text
9586     return result