code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 import os
  30 import os.path
  31 import time
  32 import re
  33 import platform
  34 import logging
  35 import copy
  36 import OpenSSL
  37
  38 from ganeti import ssh
  39 from ganeti import utils
  40 from ganeti import errors
  41 from ganeti import hypervisor
  42 from ganeti import locking
  43 from ganeti import constants
  44 from ganeti import objects
  45 from ganeti import serializer
  46 from ganeti import ssconf
  47
  48
  49 class LogicalUnit(object):
  50   """Logical Unit base class.
  51
  52   Subclasses must follow these rules:
  53     - implement ExpandNames
  54     - implement CheckPrereq (except when tasklets are used)
  55     - implement Exec (except when tasklets are used)
  56     - implement BuildHooksEnv
  57     - redefine HPATH and HTYPE
  58     - optionally redefine their run requirements:
  59         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  60
  61   Note that all commands require root permissions.
  62
  63   @ivar dry_run_result: the value (if any) that will be returned to the caller
  64       in dry-run mode (signalled by opcode dry_run parameter)
  65
  66   """
  67   HPATH = None
  68   HTYPE = None
  69   _OP_REQP = []
  70   REQ_BGL = True
  71
  72   def __init__(self, processor, op, context, rpc):
  73     """Constructor for LogicalUnit.
  74
  75     This needs to be overridden in derived classes in order to check op
  76     validity.
  77
  78     """
  79     self.proc = processor
  80     self.op = op
  81     self.cfg = context.cfg
  82     self.context = context
  83     self.rpc = rpc
  84     # Dicts used to declare locking needs to mcpu
  85     self.needed_locks = None
  86     self.acquired_locks = {}
  87     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
  88     self.add_locks = {}
  89     self.remove_locks = {}
  90     # Used to force good behavior when calling helper functions
  91     self.recalculate_locks = {}
  92     self.__ssh = None
  93     # logging
  94     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
  95     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
  96     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
  97     # support for dry-run
  98     self.dry_run_result = None
  99     # support for generic debug attribute
 100     if (not hasattr(self.op, "debug_level") or
 101         not isinstance(self.op.debug_level, int)):
 102       self.op.debug_level = 0
 103
 104     # Tasklets
 105     self.tasklets = None
 106
 107     for attr_name in self._OP_REQP:
 108       attr_val = getattr(op, attr_name, None)
 109       if attr_val is None:
 110         raise errors.OpPrereqError("Required parameter '%s' missing" %
 111                                    attr_name, errors.ECODE_INVAL)
 112
 113     self.CheckArguments()
 114
 115   def __GetSSH(self):
 116     """Returns the SshRunner object
 117
 118     """
 119     if not self.__ssh:
 120       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 121     return self.__ssh
 122
 123   ssh = property(fget=__GetSSH)
 124
 125   def CheckArguments(self):
 126     """Check syntactic validity for the opcode arguments.
 127
 128     This method is for doing a simple syntactic check and ensure
 129     validity of opcode parameters, without any cluster-related
 130     checks. While the same can be accomplished in ExpandNames and/or
 131     CheckPrereq, doing these separate is better because:
 132
 133       - ExpandNames is left as as purely a lock-related function
 134       - CheckPrereq is run after we have acquired locks (and possible
 135         waited for them)
 136
 137     The function is allowed to change the self.op attribute so that
 138     later methods can no longer worry about missing parameters.
 139
 140     """
 141     pass
 142
 143   def ExpandNames(self):
 144     """Expand names for this LU.
 145
 146     This method is called before starting to execute the opcode, and it should
 147     update all the parameters of the opcode to their canonical form (e.g. a
 148     short node name must be fully expanded after this method has successfully
 149     completed). This way locking, hooks, logging, ecc. can work correctly.
 150
 151     LUs which implement this method must also populate the self.needed_locks
 152     member, as a dict with lock levels as keys, and a list of needed lock names
 153     as values. Rules:
 154
 155       - use an empty dict if you don't need any lock
 156       - if you don't need any lock at a particular level omit that level
 157       - don't put anything for the BGL level
 158       - if you want all locks at a level use locking.ALL_SET as a value
 159
 160     If you need to share locks (rather than acquire them exclusively) at one
 161     level you can modify self.share_locks, setting a true value (usually 1) for
 162     that level. By default locks are not shared.
 163
 164     This function can also define a list of tasklets, which then will be
 165     executed in order instead of the usual LU-level CheckPrereq and Exec
 166     functions, if those are not defined by the LU.
 167
 168     Examples::
 169
 170       # Acquire all nodes and one instance
 171       self.needed_locks = {
 172         locking.LEVEL_NODE: locking.ALL_SET,
 173         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 174       }
 175       # Acquire just two nodes
 176       self.needed_locks = {
 177         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 178       }
 179       # Acquire no locks
 180       self.needed_locks = {} # No, you can't leave it to the default value None
 181
 182     """
 183     # The implementation of this method is mandatory only if the new LU is
 184     # concurrent, so that old LUs don't need to be changed all at the same
 185     # time.
 186     if self.REQ_BGL:
 187       self.needed_locks = {} # Exclusive LUs don't need locks.
 188     else:
 189       raise NotImplementedError
 190
 191   def DeclareLocks(self, level):
 192     """Declare LU locking needs for a level
 193
 194     While most LUs can just declare their locking needs at ExpandNames time,
 195     sometimes there's the need to calculate some locks after having acquired
 196     the ones before. This function is called just before acquiring locks at a
 197     particular level, but after acquiring the ones at lower levels, and permits
 198     such calculations. It can be used to modify self.needed_locks, and by
 199     default it does nothing.
 200
 201     This function is only called if you have something already set in
 202     self.needed_locks for the level.
 203
 204     @param level: Locking level which is going to be locked
 205     @type level: member of ganeti.locking.LEVELS
 206
 207     """
 208
 209   def CheckPrereq(self):
 210     """Check prerequisites for this LU.
 211
 212     This method should check that the prerequisites for the execution
 213     of this LU are fulfilled. It can do internode communication, but
 214     it should be idempotent - no cluster or system changes are
 215     allowed.
 216
 217     The method should raise errors.OpPrereqError in case something is
 218     not fulfilled. Its return value is ignored.
 219
 220     This method should also update all the parameters of the opcode to
 221     their canonical form if it hasn't been done by ExpandNames before.
 222
 223     """
 224     if self.tasklets is not None:
 225       for (idx, tl) in enumerate(self.tasklets):
 226         logging.debug("Checking prerequisites for tasklet %s/%s",
 227                       idx + 1, len(self.tasklets))
 228         tl.CheckPrereq()
 229     else:
 230       raise NotImplementedError
 231
 232   def Exec(self, feedback_fn):
 233     """Execute the LU.
 234
 235     This method should implement the actual work. It should raise
 236     errors.OpExecError for failures that are somewhat dealt with in
 237     code, or expected.
 238
 239     """
 240     if self.tasklets is not None:
 241       for (idx, tl) in enumerate(self.tasklets):
 242         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 243         tl.Exec(feedback_fn)
 244     else:
 245       raise NotImplementedError
 246
 247   def BuildHooksEnv(self):
 248     """Build hooks environment for this LU.
 249
 250     This method should return a three-node tuple consisting of: a dict
 251     containing the environment that will be used for running the
 252     specific hook for this LU, a list of node names on which the hook
 253     should run before the execution, and a list of node names on which
 254     the hook should run after the execution.
 255
 256     The keys of the dict must not have 'GANETI_' prefixed as this will
 257     be handled in the hooks runner. Also note additional keys will be
 258     added by the hooks runner. If the LU doesn't define any
 259     environment, an empty dict (and not None) should be returned.
 260
 261     No nodes should be returned as an empty list (and not None).
 262
 263     Note that if the HPATH for a LU class is None, this function will
 264     not be called.
 265
 266     """
 267     raise NotImplementedError
 268
 269   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 270     """Notify the LU about the results of its hooks.
 271
 272     This method is called every time a hooks phase is executed, and notifies
 273     the Logical Unit about the hooks' result. The LU can then use it to alter
 274     its result based on the hooks.  By default the method does nothing and the
 275     previous result is passed back unchanged but any LU can define it if it
 276     wants to use the local cluster hook-scripts somehow.
 277
 278     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 279         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 280     @param hook_results: the results of the multi-node hooks rpc call
 281     @param feedback_fn: function used send feedback back to the caller
 282     @param lu_result: the previous Exec result this LU had, or None
 283         in the PRE phase
 284     @return: the new Exec result, based on the previous result
 285         and hook results
 286
 287     """
 288     # API must be kept, thus we ignore the unused argument and could
 289     # be a function warnings
 290     # pylint: disable-msg=W0613,R0201
 291     return lu_result
 292
 293   def _ExpandAndLockInstance(self):
 294     """Helper function to expand and lock an instance.
 295
 296     Many LUs that work on an instance take its name in self.op.instance_name
 297     and need to expand it and then declare the expanded name for locking. This
 298     function does it, and then updates self.op.instance_name to the expanded
 299     name. It also initializes needed_locks as a dict, if this hasn't been done
 300     before.
 301
 302     """
 303     if self.needed_locks is None:
 304       self.needed_locks = {}
 305     else:
 306       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 307         "_ExpandAndLockInstance called with instance-level locks set"
 308     self.op.instance_name = _ExpandInstanceName(self.cfg,
 309                                                 self.op.instance_name)
 310     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 311
 312   def _LockInstancesNodes(self, primary_only=False):
 313     """Helper function to declare instances' nodes for locking.
 314
 315     This function should be called after locking one or more instances to lock
 316     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 317     with all primary or secondary nodes for instances already locked and
 318     present in self.needed_locks[locking.LEVEL_INSTANCE].
 319
 320     It should be called from DeclareLocks, and for safety only works if
 321     self.recalculate_locks[locking.LEVEL_NODE] is set.
 322
 323     In the future it may grow parameters to just lock some instance's nodes, or
 324     to just lock primaries or secondary nodes, if needed.
 325
 326     If should be called in DeclareLocks in a way similar to::
 327
 328       if level == locking.LEVEL_NODE:
 329         self._LockInstancesNodes()
 330
 331     @type primary_only: boolean
 332     @param primary_only: only lock primary nodes of locked instances
 333
 334     """
 335     assert locking.LEVEL_NODE in self.recalculate_locks, \
 336       "_LockInstancesNodes helper function called with no nodes to recalculate"
 337
 338     # TODO: check if we're really been called with the instance locks held
 339
 340     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 341     # future we might want to have different behaviors depending on the value
 342     # of self.recalculate_locks[locking.LEVEL_NODE]
 343     wanted_nodes = []
 344     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 345       instance = self.context.cfg.GetInstanceInfo(instance_name)
 346       wanted_nodes.append(instance.primary_node)
 347       if not primary_only:
 348         wanted_nodes.extend(instance.secondary_nodes)
 349
 350     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 351       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 352     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 353       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 354
 355     del self.recalculate_locks[locking.LEVEL_NODE]
 356
 357
 358 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 359   """Simple LU which runs no hooks.
 360
 361   This LU is intended as a parent for other LogicalUnits which will
 362   run no hooks, in order to reduce duplicate code.
 363
 364   """
 365   HPATH = None
 366   HTYPE = None
 367
 368   def BuildHooksEnv(self):
 369     """Empty BuildHooksEnv for NoHooksLu.
 370
 371     This just raises an error.
 372
 373     """
 374     assert False, "BuildHooksEnv called for NoHooksLUs"
 375
 376
 377 class Tasklet:
 378   """Tasklet base class.
 379
 380   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 381   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 382   tasklets know nothing about locks.
 383
 384   Subclasses must follow these rules:
 385     - Implement CheckPrereq
 386     - Implement Exec
 387
 388   """
 389   def __init__(self, lu):
 390     self.lu = lu
 391
 392     # Shortcuts
 393     self.cfg = lu.cfg
 394     self.rpc = lu.rpc
 395
 396   def CheckPrereq(self):
 397     """Check prerequisites for this tasklets.
 398
 399     This method should check whether the prerequisites for the execution of
 400     this tasklet are fulfilled. It can do internode communication, but it
 401     should be idempotent - no cluster or system changes are allowed.
 402
 403     The method should raise errors.OpPrereqError in case something is not
 404     fulfilled. Its return value is ignored.
 405
 406     This method should also update all parameters to their canonical form if it
 407     hasn't been done before.
 408
 409     """
 410     raise NotImplementedError
 411
 412   def Exec(self, feedback_fn):
 413     """Execute the tasklet.
 414
 415     This method should implement the actual work. It should raise
 416     errors.OpExecError for failures that are somewhat dealt with in code, or
 417     expected.
 418
 419     """
 420     raise NotImplementedError
 421
 422
 423 def _GetWantedNodes(lu, nodes):
 424   """Returns list of checked and expanded node names.
 425
 426   @type lu: L{LogicalUnit}
 427   @param lu: the logical unit on whose behalf we execute
 428   @type nodes: list
 429   @param nodes: list of node names or None for all nodes
 430   @rtype: list
 431   @return: the list of nodes, sorted
 432   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 433
 434   """
 435   if not isinstance(nodes, list):
 436     raise errors.OpPrereqError("Invalid argument type 'nodes'",
 437                                errors.ECODE_INVAL)
 438
 439   if not nodes:
 440     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 441       " non-empty list of nodes whose name is to be expanded.")
 442
 443   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 444   return utils.NiceSort(wanted)
 445
 446
 447 def _GetWantedInstances(lu, instances):
 448   """Returns list of checked and expanded instance names.
 449
 450   @type lu: L{LogicalUnit}
 451   @param lu: the logical unit on whose behalf we execute
 452   @type instances: list
 453   @param instances: list of instance names or None for all instances
 454   @rtype: list
 455   @return: the list of instances, sorted
 456   @raise errors.OpPrereqError: if the instances parameter is wrong type
 457   @raise errors.OpPrereqError: if any of the passed instances is not found
 458
 459   """
 460   if not isinstance(instances, list):
 461     raise errors.OpPrereqError("Invalid argument type 'instances'",
 462                                errors.ECODE_INVAL)
 463
 464   if instances:
 465     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 466   else:
 467     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 468   return wanted
 469
 470
 471 def _CheckOutputFields(static, dynamic, selected):
 472   """Checks whether all selected fields are valid.
 473
 474   @type static: L{utils.FieldSet}
 475   @param static: static fields set
 476   @type dynamic: L{utils.FieldSet}
 477   @param dynamic: dynamic fields set
 478
 479   """
 480   f = utils.FieldSet()
 481   f.Extend(static)
 482   f.Extend(dynamic)
 483
 484   delta = f.NonMatching(selected)
 485   if delta:
 486     raise errors.OpPrereqError("Unknown output fields selected: %s"
 487                                % ",".join(delta), errors.ECODE_INVAL)
 488
 489
 490 def _CheckBooleanOpField(op, name):
 491   """Validates boolean opcode parameters.
 492
 493   This will ensure that an opcode parameter is either a boolean value,
 494   or None (but that it always exists).
 495
 496   """
 497   val = getattr(op, name, None)
 498   if not (val is None or isinstance(val, bool)):
 499     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 500                                (name, str(val)), errors.ECODE_INVAL)
 501   setattr(op, name, val)
 502
 503
 504 def _CheckGlobalHvParams(params):
 505   """Validates that given hypervisor params are not global ones.
 506
 507   This will ensure that instances don't get customised versions of
 508   global params.
 509
 510   """
 511   used_globals = constants.HVC_GLOBALS.intersection(params)
 512   if used_globals:
 513     msg = ("The following hypervisor parameters are global and cannot"
 514            " be customized at instance level, please modify them at"
 515            " cluster level: %s" % utils.CommaJoin(used_globals))
 516     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 517
 518
 519 def _CheckNodeOnline(lu, node):
 520   """Ensure that a given node is online.
 521
 522   @param lu: the LU on behalf of which we make the check
 523   @param node: the node to check
 524   @raise errors.OpPrereqError: if the node is offline
 525
 526   """
 527   if lu.cfg.GetNodeInfo(node).offline:
 528     raise errors.OpPrereqError("Can't use offline node %s" % node,
 529                                errors.ECODE_INVAL)
 530
 531
 532 def _CheckNodeNotDrained(lu, node):
 533   """Ensure that a given node is not drained.
 534
 535   @param lu: the LU on behalf of which we make the check
 536   @param node: the node to check
 537   @raise errors.OpPrereqError: if the node is drained
 538
 539   """
 540   if lu.cfg.GetNodeInfo(node).drained:
 541     raise errors.OpPrereqError("Can't use drained node %s" % node,
 542                                errors.ECODE_INVAL)
 543
 544
 545 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 546   """Ensure that a node supports a given OS.
 547
 548   @param lu: the LU on behalf of which we make the check
 549   @param node: the node to check
 550   @param os_name: the OS to query about
 551   @param force_variant: whether to ignore variant errors
 552   @raise errors.OpPrereqError: if the node is not supporting the OS
 553
 554   """
 555   result = lu.rpc.call_os_get(node, os_name)
 556   result.Raise("OS '%s' not in supported OS list for node %s" %
 557                (os_name, node),
 558                prereq=True, ecode=errors.ECODE_INVAL)
 559   if not force_variant:
 560     _CheckOSVariant(result.payload, os_name)
 561
 562
 563 def _CheckDiskTemplate(template):
 564   """Ensure a given disk template is valid.
 565
 566   """
 567   if template not in constants.DISK_TEMPLATES:
 568     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 569            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 570     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 571
 572
 573 def _CheckInstanceDown(lu, instance, reason):
 574   """Ensure that an instance is not running."""
 575   if instance.admin_up:
 576     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 577                                (instance.name, reason), errors.ECODE_STATE)
 578
 579   pnode = instance.primary_node
 580   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 581   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 582               prereq=True, ecode=errors.ECODE_ENVIRON)
 583
 584   if instance.name in ins_l.payload:
 585     raise errors.OpPrereqError("Instance %s is running, %s" %
 586                                (instance.name, reason), errors.ECODE_STATE)
 587
 588
 589 def _ExpandItemName(fn, name, kind):
 590   """Expand an item name.
 591
 592   @param fn: the function to use for expansion
 593   @param name: requested item name
 594   @param kind: text description ('Node' or 'Instance')
 595   @return: the resolved (full) name
 596   @raise errors.OpPrereqError: if the item is not found
 597
 598   """
 599   full_name = fn(name)
 600   if full_name is None:
 601     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 602                                errors.ECODE_NOENT)
 603   return full_name
 604
 605
 606 def _ExpandNodeName(cfg, name):
 607   """Wrapper over L{_ExpandItemName} for nodes."""
 608   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 609
 610
 611 def _ExpandInstanceName(cfg, name):
 612   """Wrapper over L{_ExpandItemName} for instance."""
 613   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 614
 615
 616 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 617                           memory, vcpus, nics, disk_template, disks,
 618                           bep, hvp, hypervisor_name):
 619   """Builds instance related env variables for hooks
 620
 621   This builds the hook environment from individual variables.
 622
 623   @type name: string
 624   @param name: the name of the instance
 625   @type primary_node: string
 626   @param primary_node: the name of the instance's primary node
 627   @type secondary_nodes: list
 628   @param secondary_nodes: list of secondary nodes as strings
 629   @type os_type: string
 630   @param os_type: the name of the instance's OS
 631   @type status: boolean
 632   @param status: the should_run status of the instance
 633   @type memory: string
 634   @param memory: the memory size of the instance
 635   @type vcpus: string
 636   @param vcpus: the count of VCPUs the instance has
 637   @type nics: list
 638   @param nics: list of tuples (ip, mac, mode, link) representing
 639       the NICs the instance has
 640   @type disk_template: string
 641   @param disk_template: the disk template of the instance
 642   @type disks: list
 643   @param disks: the list of (size, mode) pairs
 644   @type bep: dict
 645   @param bep: the backend parameters for the instance
 646   @type hvp: dict
 647   @param hvp: the hypervisor parameters for the instance
 648   @type hypervisor_name: string
 649   @param hypervisor_name: the hypervisor for the instance
 650   @rtype: dict
 651   @return: the hook environment for this instance
 652
 653   """
 654   if status:
 655     str_status = "up"
 656   else:
 657     str_status = "down"
 658   env = {
 659     "OP_TARGET": name,
 660     "INSTANCE_NAME": name,
 661     "INSTANCE_PRIMARY": primary_node,
 662     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 663     "INSTANCE_OS_TYPE": os_type,
 664     "INSTANCE_STATUS": str_status,
 665     "INSTANCE_MEMORY": memory,
 666     "INSTANCE_VCPUS": vcpus,
 667     "INSTANCE_DISK_TEMPLATE": disk_template,
 668     "INSTANCE_HYPERVISOR": hypervisor_name,
 669   }
 670
 671   if nics:
 672     nic_count = len(nics)
 673     for idx, (ip, mac, mode, link) in enumerate(nics):
 674       if ip is None:
 675         ip = ""
 676       env["INSTANCE_NIC%d_IP" % idx] = ip
 677       env["INSTANCE_NIC%d_MAC" % idx] = mac
 678       env["INSTANCE_NIC%d_MODE" % idx] = mode
 679       env["INSTANCE_NIC%d_LINK" % idx] = link
 680       if mode == constants.NIC_MODE_BRIDGED:
 681         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 682   else:
 683     nic_count = 0
 684
 685   env["INSTANCE_NIC_COUNT"] = nic_count
 686
 687   if disks:
 688     disk_count = len(disks)
 689     for idx, (size, mode) in enumerate(disks):
 690       env["INSTANCE_DISK%d_SIZE" % idx] = size
 691       env["INSTANCE_DISK%d_MODE" % idx] = mode
 692   else:
 693     disk_count = 0
 694
 695   env["INSTANCE_DISK_COUNT"] = disk_count
 696
 697   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 698     for key, value in source.items():
 699       env["INSTANCE_%s_%s" % (kind, key)] = value
 700
 701   return env
 702
 703
 704 def _NICListToTuple(lu, nics):
 705   """Build a list of nic information tuples.
 706
 707   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 708   value in LUQueryInstanceData.
 709
 710   @type lu:  L{LogicalUnit}
 711   @param lu: the logical unit on whose behalf we execute
 712   @type nics: list of L{objects.NIC}
 713   @param nics: list of nics to convert to hooks tuples
 714
 715   """
 716   hooks_nics = []
 717   c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
 718   for nic in nics:
 719     ip = nic.ip
 720     mac = nic.mac
 721     filled_params = objects.FillDict(c_nicparams, nic.nicparams)
 722     mode = filled_params[constants.NIC_MODE]
 723     link = filled_params[constants.NIC_LINK]
 724     hooks_nics.append((ip, mac, mode, link))
 725   return hooks_nics
 726
 727
 728 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 729   """Builds instance related env variables for hooks from an object.
 730
 731   @type lu: L{LogicalUnit}
 732   @param lu: the logical unit on whose behalf we execute
 733   @type instance: L{objects.Instance}
 734   @param instance: the instance for which we should build the
 735       environment
 736   @type override: dict
 737   @param override: dictionary with key/values that will override
 738       our values
 739   @rtype: dict
 740   @return: the hook environment dictionary
 741
 742   """
 743   cluster = lu.cfg.GetClusterInfo()
 744   bep = cluster.FillBE(instance)
 745   hvp = cluster.FillHV(instance)
 746   args = {
 747     'name': instance.name,
 748     'primary_node': instance.primary_node,
 749     'secondary_nodes': instance.secondary_nodes,
 750     'os_type': instance.os,
 751     'status': instance.admin_up,
 752     'memory': bep[constants.BE_MEMORY],
 753     'vcpus': bep[constants.BE_VCPUS],
 754     'nics': _NICListToTuple(lu, instance.nics),
 755     'disk_template': instance.disk_template,
 756     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 757     'bep': bep,
 758     'hvp': hvp,
 759     'hypervisor_name': instance.hypervisor,
 760   }
 761   if override:
 762     args.update(override)
 763   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 764
 765
 766 def _AdjustCandidatePool(lu, exceptions):
 767   """Adjust the candidate pool after node operations.
 768
 769   """
 770   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 771   if mod_list:
 772     lu.LogInfo("Promoted nodes to master candidate role: %s",
 773                utils.CommaJoin(node.name for node in mod_list))
 774     for name in mod_list:
 775       lu.context.ReaddNode(name)
 776   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 777   if mc_now > mc_max:
 778     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 779                (mc_now, mc_max))
 780
 781
 782 def _DecideSelfPromotion(lu, exceptions=None):
 783   """Decide whether I should promote myself as a master candidate.
 784
 785   """
 786   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 787   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 788   # the new node will increase mc_max with one, so:
 789   mc_should = min(mc_should + 1, cp_size)
 790   return mc_now < mc_should
 791
 792
 793 def _CheckNicsBridgesExist(lu, target_nics, target_node,
 794                                profile=constants.PP_DEFAULT):
 795   """Check that the brigdes needed by a list of nics exist.
 796
 797   """
 798   c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
 799   paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
 800                 for nic in target_nics]
 801   brlist = [params[constants.NIC_LINK] for params in paramslist
 802             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 803   if brlist:
 804     result = lu.rpc.call_bridges_exist(target_node, brlist)
 805     result.Raise("Error checking bridges on destination node '%s'" %
 806                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 807
 808
 809 def _CheckInstanceBridgesExist(lu, instance, node=None):
 810   """Check that the brigdes needed by an instance exist.
 811
 812   """
 813   if node is None:
 814     node = instance.primary_node
 815   _CheckNicsBridgesExist(lu, instance.nics, node)
 816
 817
 818 def _CheckOSVariant(os_obj, name):
 819   """Check whether an OS name conforms to the os variants specification.
 820
 821   @type os_obj: L{objects.OS}
 822   @param os_obj: OS object to check
 823   @type name: string
 824   @param name: OS name passed by the user, to check for validity
 825
 826   """
 827   if not os_obj.supported_variants:
 828     return
 829   try:
 830     variant = name.split("+", 1)[1]
 831   except IndexError:
 832     raise errors.OpPrereqError("OS name must include a variant",
 833                                errors.ECODE_INVAL)
 834
 835   if variant not in os_obj.supported_variants:
 836     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
 837
 838
 839 def _GetNodeInstancesInner(cfg, fn):
 840   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 841
 842
 843 def _GetNodeInstances(cfg, node_name):
 844   """Returns a list of all primary and secondary instances on a node.
 845
 846   """
 847
 848   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
 849
 850
 851 def _GetNodePrimaryInstances(cfg, node_name):
 852   """Returns primary instances on a node.
 853
 854   """
 855   return _GetNodeInstancesInner(cfg,
 856                                 lambda inst: node_name == inst.primary_node)
 857
 858
 859 def _GetNodeSecondaryInstances(cfg, node_name):
 860   """Returns secondary instances on a node.
 861
 862   """
 863   return _GetNodeInstancesInner(cfg,
 864                                 lambda inst: node_name in inst.secondary_nodes)
 865
 866
 867 def _GetStorageTypeArgs(cfg, storage_type):
 868   """Returns the arguments for a storage type.
 869
 870   """
 871   # Special case for file storage
 872   if storage_type == constants.ST_FILE:
 873     # storage.FileStorage wants a list of storage directories
 874     return [[cfg.GetFileStorageDir()]]
 875
 876   return []
 877
 878
 879 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
 880   faulty = []
 881
 882   for dev in instance.disks:
 883     cfg.SetDiskID(dev, node_name)
 884
 885   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
 886   result.Raise("Failed to get disk status from node %s" % node_name,
 887                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 888
 889   for idx, bdev_status in enumerate(result.payload):
 890     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
 891       faulty.append(idx)
 892
 893   return faulty
 894
 895
 896 def _FormatTimestamp(secs):
 897   """Formats a Unix timestamp with the local timezone.
 898
 899   """
 900   return time.strftime("%F %T %Z", time.gmtime(secs))
 901
 902
 903 class LUPostInitCluster(LogicalUnit):
 904   """Logical unit for running hooks after cluster initialization.
 905
 906   """
 907   HPATH = "cluster-init"
 908   HTYPE = constants.HTYPE_CLUSTER
 909   _OP_REQP = []
 910
 911   def BuildHooksEnv(self):
 912     """Build hooks env.
 913
 914     """
 915     env = {"OP_TARGET": self.cfg.GetClusterName()}
 916     mn = self.cfg.GetMasterNode()
 917     return env, [], [mn]
 918
 919   def CheckPrereq(self):
 920     """No prerequisites to check.
 921
 922     """
 923     return True
 924
 925   def Exec(self, feedback_fn):
 926     """Nothing to do.
 927
 928     """
 929     return True
 930
 931
 932 class LUDestroyCluster(LogicalUnit):
 933   """Logical unit for destroying the cluster.
 934
 935   """
 936   HPATH = "cluster-destroy"
 937   HTYPE = constants.HTYPE_CLUSTER
 938   _OP_REQP = []
 939
 940   def BuildHooksEnv(self):
 941     """Build hooks env.
 942
 943     """
 944     env = {"OP_TARGET": self.cfg.GetClusterName()}
 945     return env, [], []
 946
 947   def CheckPrereq(self):
 948     """Check prerequisites.
 949
 950     This checks whether the cluster is empty.
 951
 952     Any errors are signaled by raising errors.OpPrereqError.
 953
 954     """
 955     master = self.cfg.GetMasterNode()
 956
 957     nodelist = self.cfg.GetNodeList()
 958     if len(nodelist) != 1 or nodelist[0] != master:
 959       raise errors.OpPrereqError("There are still %d node(s) in"
 960                                  " this cluster." % (len(nodelist) - 1),
 961                                  errors.ECODE_INVAL)
 962     instancelist = self.cfg.GetInstanceList()
 963     if instancelist:
 964       raise errors.OpPrereqError("There are still %d instance(s) in"
 965                                  " this cluster." % len(instancelist),
 966                                  errors.ECODE_INVAL)
 967
 968   def Exec(self, feedback_fn):
 969     """Destroys the cluster.
 970
 971     """
 972     master = self.cfg.GetMasterNode()
 973     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
 974
 975     # Run post hooks on master node before it's removed
 976     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
 977     try:
 978       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
 979     except:
 980       # pylint: disable-msg=W0702
 981       self.LogWarning("Errors occurred running hooks on %s" % master)
 982
 983     result = self.rpc.call_node_stop_master(master, False)
 984     result.Raise("Could not disable the master role")
 985
 986     if modify_ssh_setup:
 987       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
 988       utils.CreateBackup(priv_key)
 989       utils.CreateBackup(pub_key)
 990
 991     return master
 992
 993
 994 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
 995                             warn_days=constants.SSL_CERT_EXPIRATION_WARN,
 996                             error_days=constants.SSL_CERT_EXPIRATION_ERROR):
 997   """Verifies certificate details for LUVerifyCluster.
 998
 999   """
1000   if expired:
1001     msg = "Certificate %s is expired" % filename
1002
1003     if not_before is not None and not_after is not None:
1004       msg += (" (valid from %s to %s)" %
1005               (_FormatTimestamp(not_before),
1006                _FormatTimestamp(not_after)))
1007     elif not_before is not None:
1008       msg += " (valid from %s)" % _FormatTimestamp(not_before)
1009     elif not_after is not None:
1010       msg += " (valid until %s)" % _FormatTimestamp(not_after)
1011
1012     return (LUVerifyCluster.ETYPE_ERROR, msg)
1013
1014   elif not_before is not None and not_before > now:
1015     return (LUVerifyCluster.ETYPE_WARNING,
1016             "Certificate %s not yet valid (valid from %s)" %
1017             (filename, _FormatTimestamp(not_before)))
1018
1019   elif not_after is not None:
1020     remaining_days = int((not_after - now) / (24 * 3600))
1021
1022     msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1023
1024     if remaining_days <= error_days:
1025       return (LUVerifyCluster.ETYPE_ERROR, msg)
1026
1027     if remaining_days <= warn_days:
1028       return (LUVerifyCluster.ETYPE_WARNING, msg)
1029
1030   return (None, None)
1031
1032
1033 def _VerifyCertificate(filename):
1034   """Verifies a certificate for LUVerifyCluster.
1035
1036   @type filename: string
1037   @param filename: Path to PEM file
1038
1039   """
1040   try:
1041     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1042                                            utils.ReadFile(filename))
1043   except Exception, err: # pylint: disable-msg=W0703
1044     return (LUVerifyCluster.ETYPE_ERROR,
1045             "Failed to load X509 certificate %s: %s" % (filename, err))
1046
1047   # Depending on the pyOpenSSL version, this can just return (None, None)
1048   (not_before, not_after) = utils.GetX509CertValidity(cert)
1049
1050   return _VerifyCertificateInner(filename, cert.has_expired(),
1051                                  not_before, not_after, time.time())
1052
1053
1054 class LUVerifyCluster(LogicalUnit):
1055   """Verifies the cluster status.
1056
1057   """
1058   HPATH = "cluster-verify"
1059   HTYPE = constants.HTYPE_CLUSTER
1060   _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1061   REQ_BGL = False
1062
1063   TCLUSTER = "cluster"
1064   TNODE = "node"
1065   TINSTANCE = "instance"
1066
1067   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1068   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1069   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1070   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1071   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1072   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1073   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1074   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1075   ENODEDRBD = (TNODE, "ENODEDRBD")
1076   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1077   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1078   ENODEHV = (TNODE, "ENODEHV")
1079   ENODELVM = (TNODE, "ENODELVM")
1080   ENODEN1 = (TNODE, "ENODEN1")
1081   ENODENET = (TNODE, "ENODENET")
1082   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1083   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1084   ENODERPC = (TNODE, "ENODERPC")
1085   ENODESSH = (TNODE, "ENODESSH")
1086   ENODEVERSION = (TNODE, "ENODEVERSION")
1087   ENODESETUP = (TNODE, "ENODESETUP")
1088   ENODETIME = (TNODE, "ENODETIME")
1089
1090   ETYPE_FIELD = "code"
1091   ETYPE_ERROR = "ERROR"
1092   ETYPE_WARNING = "WARNING"
1093
1094   def ExpandNames(self):
1095     self.needed_locks = {
1096       locking.LEVEL_NODE: locking.ALL_SET,
1097       locking.LEVEL_INSTANCE: locking.ALL_SET,
1098     }
1099     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1100
1101   def _Error(self, ecode, item, msg, *args, **kwargs):
1102     """Format an error message.
1103
1104     Based on the opcode's error_codes parameter, either format a
1105     parseable error code, or a simpler error string.
1106
1107     This must be called only from Exec and functions called from Exec.
1108
1109     """
1110     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1111     itype, etxt = ecode
1112     # first complete the msg
1113     if args:
1114       msg = msg % args
1115     # then format the whole message
1116     if self.op.error_codes:
1117       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1118     else:
1119       if item:
1120         item = " " + item
1121       else:
1122         item = ""
1123       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1124     # and finally report it via the feedback_fn
1125     self._feedback_fn("  - %s" % msg)
1126
1127   def _ErrorIf(self, cond, *args, **kwargs):
1128     """Log an error message if the passed condition is True.
1129
1130     """
1131     cond = bool(cond) or self.op.debug_simulate_errors
1132     if cond:
1133       self._Error(*args, **kwargs)
1134     # do not mark the operation as failed for WARN cases only
1135     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1136       self.bad = self.bad or cond
1137
1138   def _VerifyNode(self, nodeinfo, file_list, local_cksum,
1139                   node_result, master_files, drbd_map, vg_name):
1140     """Run multiple tests against a node.
1141
1142     Test list:
1143
1144       - compares ganeti version
1145       - checks vg existence and size > 20G
1146       - checks config file checksum
1147       - checks ssh to other nodes
1148
1149     @type nodeinfo: L{objects.Node}
1150     @param nodeinfo: the node to check
1151     @param file_list: required list of files
1152     @param local_cksum: dictionary of local files and their checksums
1153     @param node_result: the results from the node
1154     @param master_files: list of files that only masters should have
1155     @param drbd_map: the useddrbd minors for this node, in
1156         form of minor: (instance, must_exist) which correspond to instances
1157         and their running status
1158     @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1159
1160     """
1161     node = nodeinfo.name
1162     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1163
1164     # main result, node_result should be a non-empty dict
1165     test = not node_result or not isinstance(node_result, dict)
1166     _ErrorIf(test, self.ENODERPC, node,
1167                   "unable to verify node: no data returned")
1168     if test:
1169       return
1170
1171     # compares ganeti version
1172     local_version = constants.PROTOCOL_VERSION
1173     remote_version = node_result.get('version', None)
1174     test = not (remote_version and
1175                 isinstance(remote_version, (list, tuple)) and
1176                 len(remote_version) == 2)
1177     _ErrorIf(test, self.ENODERPC, node,
1178              "connection to node returned invalid data")
1179     if test:
1180       return
1181
1182     test = local_version != remote_version[0]
1183     _ErrorIf(test, self.ENODEVERSION, node,
1184              "incompatible protocol versions: master %s,"
1185              " node %s", local_version, remote_version[0])
1186     if test:
1187       return
1188
1189     # node seems compatible, we can actually try to look into its results
1190
1191     # full package version
1192     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1193                   self.ENODEVERSION, node,
1194                   "software version mismatch: master %s, node %s",
1195                   constants.RELEASE_VERSION, remote_version[1],
1196                   code=self.ETYPE_WARNING)
1197
1198     # checks vg existence and size > 20G
1199     if vg_name is not None:
1200       vglist = node_result.get(constants.NV_VGLIST, None)
1201       test = not vglist
1202       _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1203       if not test:
1204         vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1205                                               constants.MIN_VG_SIZE)
1206         _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1207
1208     # checks config file checksum
1209
1210     remote_cksum = node_result.get(constants.NV_FILELIST, None)
1211     test = not isinstance(remote_cksum, dict)
1212     _ErrorIf(test, self.ENODEFILECHECK, node,
1213              "node hasn't returned file checksum data")
1214     if not test:
1215       for file_name in file_list:
1216         node_is_mc = nodeinfo.master_candidate
1217         must_have = (file_name not in master_files) or node_is_mc
1218         # missing
1219         test1 = file_name not in remote_cksum
1220         # invalid checksum
1221         test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1222         # existing and good
1223         test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1224         _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1225                  "file '%s' missing", file_name)
1226         _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1227                  "file '%s' has wrong checksum", file_name)
1228         # not candidate and this is not a must-have file
1229         _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1230                  "file '%s' should not exist on non master"
1231                  " candidates (and the file is outdated)", file_name)
1232         # all good, except non-master/non-must have combination
1233         _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1234                  "file '%s' should not exist"
1235                  " on non master candidates", file_name)
1236
1237     # checks ssh to any
1238
1239     test = constants.NV_NODELIST not in node_result
1240     _ErrorIf(test, self.ENODESSH, node,
1241              "node hasn't returned node ssh connectivity data")
1242     if not test:
1243       if node_result[constants.NV_NODELIST]:
1244         for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1245           _ErrorIf(True, self.ENODESSH, node,
1246                    "ssh communication with node '%s': %s", a_node, a_msg)
1247
1248     test = constants.NV_NODENETTEST not in node_result
1249     _ErrorIf(test, self.ENODENET, node,
1250              "node hasn't returned node tcp connectivity data")
1251     if not test:
1252       if node_result[constants.NV_NODENETTEST]:
1253         nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1254         for anode in nlist:
1255           _ErrorIf(True, self.ENODENET, node,
1256                    "tcp communication with node '%s': %s",
1257                    anode, node_result[constants.NV_NODENETTEST][anode])
1258
1259     hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1260     if isinstance(hyp_result, dict):
1261       for hv_name, hv_result in hyp_result.iteritems():
1262         test = hv_result is not None
1263         _ErrorIf(test, self.ENODEHV, node,
1264                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1265
1266     # check used drbd list
1267     if vg_name is not None:
1268       used_minors = node_result.get(constants.NV_DRBDLIST, [])
1269       test = not isinstance(used_minors, (tuple, list))
1270       _ErrorIf(test, self.ENODEDRBD, node,
1271                "cannot parse drbd status file: %s", str(used_minors))
1272       if not test:
1273         for minor, (iname, must_exist) in drbd_map.items():
1274           test = minor not in used_minors and must_exist
1275           _ErrorIf(test, self.ENODEDRBD, node,
1276                    "drbd minor %d of instance %s is not active",
1277                    minor, iname)
1278         for minor in used_minors:
1279           test = minor not in drbd_map
1280           _ErrorIf(test, self.ENODEDRBD, node,
1281                    "unallocated drbd minor %d is in use", minor)
1282     test = node_result.get(constants.NV_NODESETUP,
1283                            ["Missing NODESETUP results"])
1284     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1285              "; ".join(test))
1286
1287     # check pv names
1288     if vg_name is not None:
1289       pvlist = node_result.get(constants.NV_PVLIST, None)
1290       test = pvlist is None
1291       _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1292       if not test:
1293         # check that ':' is not present in PV names, since it's a
1294         # special character for lvcreate (denotes the range of PEs to
1295         # use on the PV)
1296         for _, pvname, owner_vg in pvlist:
1297           test = ":" in pvname
1298           _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1299                    " '%s' of VG '%s'", pvname, owner_vg)
1300
1301   def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1302                       node_instance, n_offline):
1303     """Verify an instance.
1304
1305     This function checks to see if the required block devices are
1306     available on the instance's node.
1307
1308     """
1309     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1310     node_current = instanceconfig.primary_node
1311
1312     node_vol_should = {}
1313     instanceconfig.MapLVsByNode(node_vol_should)
1314
1315     for node in node_vol_should:
1316       if node in n_offline:
1317         # ignore missing volumes on offline nodes
1318         continue
1319       for volume in node_vol_should[node]:
1320         test = node not in node_vol_is or volume not in node_vol_is[node]
1321         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1322                  "volume %s missing on node %s", volume, node)
1323
1324     if instanceconfig.admin_up:
1325       test = ((node_current not in node_instance or
1326                not instance in node_instance[node_current]) and
1327               node_current not in n_offline)
1328       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1329                "instance not running on its primary node %s",
1330                node_current)
1331
1332     for node in node_instance:
1333       if (not node == node_current):
1334         test = instance in node_instance[node]
1335         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1336                  "instance should not run on node %s", node)
1337
1338   def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1339     """Verify if there are any unknown volumes in the cluster.
1340
1341     The .os, .swap and backup volumes are ignored. All other volumes are
1342     reported as unknown.
1343
1344     """
1345     for node in node_vol_is:
1346       for volume in node_vol_is[node]:
1347         test = (node not in node_vol_should or
1348                 volume not in node_vol_should[node])
1349         self._ErrorIf(test, self.ENODEORPHANLV, node,
1350                       "volume %s is unknown", volume)
1351
1352   def _VerifyOrphanInstances(self, instancelist, node_instance):
1353     """Verify the list of running instances.
1354
1355     This checks what instances are running but unknown to the cluster.
1356
1357     """
1358     for node in node_instance:
1359       for o_inst in node_instance[node]:
1360         test = o_inst not in instancelist
1361         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1362                       "instance %s on node %s should not exist", o_inst, node)
1363
1364   def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1365     """Verify N+1 Memory Resilience.
1366
1367     Check that if one single node dies we can still start all the instances it
1368     was primary for.
1369
1370     """
1371     for node, nodeinfo in node_info.iteritems():
1372       # This code checks that every node which is now listed as secondary has
1373       # enough memory to host all instances it is supposed to should a single
1374       # other node in the cluster fail.
1375       # FIXME: not ready for failover to an arbitrary node
1376       # FIXME: does not support file-backed instances
1377       # WARNING: we currently take into account down instances as well as up
1378       # ones, considering that even if they're down someone might want to start
1379       # them even in the event of a node failure.
1380       for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1381         needed_mem = 0
1382         for instance in instances:
1383           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1384           if bep[constants.BE_AUTO_BALANCE]:
1385             needed_mem += bep[constants.BE_MEMORY]
1386         test = nodeinfo['mfree'] < needed_mem
1387         self._ErrorIf(test, self.ENODEN1, node,
1388                       "not enough memory on to accommodate"
1389                       " failovers should peer node %s fail", prinode)
1390
1391   def CheckPrereq(self):
1392     """Check prerequisites.
1393
1394     Transform the list of checks we're going to skip into a set and check that
1395     all its members are valid.
1396
1397     """
1398     self.skip_set = frozenset(self.op.skip_checks)
1399     if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1400       raise errors.OpPrereqError("Invalid checks to be skipped specified",
1401                                  errors.ECODE_INVAL)
1402
1403   def BuildHooksEnv(self):
1404     """Build hooks env.
1405
1406     Cluster-Verify hooks just ran in the post phase and their failure makes
1407     the output be logged in the verify output and the verification to fail.
1408
1409     """
1410     all_nodes = self.cfg.GetNodeList()
1411     env = {
1412       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1413       }
1414     for node in self.cfg.GetAllNodesInfo().values():
1415       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1416
1417     return env, [], all_nodes
1418
1419   def Exec(self, feedback_fn):
1420     """Verify integrity of cluster, performing various test on nodes.
1421
1422     """
1423     self.bad = False
1424     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1425     verbose = self.op.verbose
1426     self._feedback_fn = feedback_fn
1427     feedback_fn("* Verifying global settings")
1428     for msg in self.cfg.VerifyConfig():
1429       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1430
1431     # Check the cluster certificates
1432     for cert_filename in constants.ALL_CERT_FILES:
1433       (errcode, msg) = _VerifyCertificate(cert_filename)
1434       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1435
1436     vg_name = self.cfg.GetVGName()
1437     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1438     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1439     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1440     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1441     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1442                         for iname in instancelist)
1443     i_non_redundant = [] # Non redundant instances
1444     i_non_a_balanced = [] # Non auto-balanced instances
1445     n_offline = [] # List of offline nodes
1446     n_drained = [] # List of nodes being drained
1447     node_volume = {}
1448     node_instance = {}
1449     node_info = {}
1450     instance_cfg = {}
1451
1452     # FIXME: verify OS list
1453     # do local checksums
1454     master_files = [constants.CLUSTER_CONF_FILE]
1455
1456     file_names = ssconf.SimpleStore().GetFileList()
1457     file_names.extend(constants.ALL_CERT_FILES)
1458     file_names.extend(master_files)
1459
1460     local_checksums = utils.FingerprintFiles(file_names)
1461
1462     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1463     node_verify_param = {
1464       constants.NV_FILELIST: file_names,
1465       constants.NV_NODELIST: [node.name for node in nodeinfo
1466                               if not node.offline],
1467       constants.NV_HYPERVISOR: hypervisors,
1468       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1469                                   node.secondary_ip) for node in nodeinfo
1470                                  if not node.offline],
1471       constants.NV_INSTANCELIST: hypervisors,
1472       constants.NV_VERSION: None,
1473       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1474       constants.NV_NODESETUP: None,
1475       constants.NV_TIME: None,
1476       }
1477
1478     if vg_name is not None:
1479       node_verify_param[constants.NV_VGLIST] = None
1480       node_verify_param[constants.NV_LVLIST] = vg_name
1481       node_verify_param[constants.NV_PVLIST] = [vg_name]
1482       node_verify_param[constants.NV_DRBDLIST] = None
1483
1484     # Due to the way our RPC system works, exact response times cannot be
1485     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1486     # time before and after executing the request, we can at least have a time
1487     # window.
1488     nvinfo_starttime = time.time()
1489     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1490                                            self.cfg.GetClusterName())
1491     nvinfo_endtime = time.time()
1492
1493     cluster = self.cfg.GetClusterInfo()
1494     master_node = self.cfg.GetMasterNode()
1495     all_drbd_map = self.cfg.ComputeDRBDMap()
1496
1497     feedback_fn("* Verifying node status")
1498     for node_i in nodeinfo:
1499       node = node_i.name
1500
1501       if node_i.offline:
1502         if verbose:
1503           feedback_fn("* Skipping offline node %s" % (node,))
1504         n_offline.append(node)
1505         continue
1506
1507       if node == master_node:
1508         ntype = "master"
1509       elif node_i.master_candidate:
1510         ntype = "master candidate"
1511       elif node_i.drained:
1512         ntype = "drained"
1513         n_drained.append(node)
1514       else:
1515         ntype = "regular"
1516       if verbose:
1517         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1518
1519       msg = all_nvinfo[node].fail_msg
1520       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1521       if msg:
1522         continue
1523
1524       nresult = all_nvinfo[node].payload
1525       node_drbd = {}
1526       for minor, instance in all_drbd_map[node].items():
1527         test = instance not in instanceinfo
1528         _ErrorIf(test, self.ECLUSTERCFG, None,
1529                  "ghost instance '%s' in temporary DRBD map", instance)
1530           # ghost instance should not be running, but otherwise we
1531           # don't give double warnings (both ghost instance and
1532           # unallocated minor in use)
1533         if test:
1534           node_drbd[minor] = (instance, False)
1535         else:
1536           instance = instanceinfo[instance]
1537           node_drbd[minor] = (instance.name, instance.admin_up)
1538
1539       self._VerifyNode(node_i, file_names, local_checksums,
1540                        nresult, master_files, node_drbd, vg_name)
1541
1542       lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1543       if vg_name is None:
1544         node_volume[node] = {}
1545       elif isinstance(lvdata, basestring):
1546         _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1547                  utils.SafeEncode(lvdata))
1548         node_volume[node] = {}
1549       elif not isinstance(lvdata, dict):
1550         _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1551         continue
1552       else:
1553         node_volume[node] = lvdata
1554
1555       # node_instance
1556       idata = nresult.get(constants.NV_INSTANCELIST, None)
1557       test = not isinstance(idata, list)
1558       _ErrorIf(test, self.ENODEHV, node,
1559                "rpc call to node failed (instancelist): %s",
1560                utils.SafeEncode(str(idata)))
1561       if test:
1562         continue
1563
1564       node_instance[node] = idata
1565
1566       # node_info
1567       nodeinfo = nresult.get(constants.NV_HVINFO, None)
1568       test = not isinstance(nodeinfo, dict)
1569       _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1570       if test:
1571         continue
1572
1573       # Node time
1574       ntime = nresult.get(constants.NV_TIME, None)
1575       try:
1576         ntime_merged = utils.MergeTime(ntime)
1577       except (ValueError, TypeError):
1578         _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1579
1580       if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1581         ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1582       elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1583         ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1584       else:
1585         ntime_diff = None
1586
1587       _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1588                "Node time diverges by at least %s from master node time",
1589                ntime_diff)
1590
1591       if ntime_diff is not None:
1592         continue
1593
1594       try:
1595         node_info[node] = {
1596           "mfree": int(nodeinfo['memory_free']),
1597           "pinst": [],
1598           "sinst": [],
1599           # dictionary holding all instances this node is secondary for,
1600           # grouped by their primary node. Each key is a cluster node, and each
1601           # value is a list of instances which have the key as primary and the
1602           # current node as secondary.  this is handy to calculate N+1 memory
1603           # availability if you can only failover from a primary to its
1604           # secondary.
1605           "sinst-by-pnode": {},
1606         }
1607         # FIXME: devise a free space model for file based instances as well
1608         if vg_name is not None:
1609           test = (constants.NV_VGLIST not in nresult or
1610                   vg_name not in nresult[constants.NV_VGLIST])
1611           _ErrorIf(test, self.ENODELVM, node,
1612                    "node didn't return data for the volume group '%s'"
1613                    " - it is either missing or broken", vg_name)
1614           if test:
1615             continue
1616           node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1617       except (ValueError, KeyError):
1618         _ErrorIf(True, self.ENODERPC, node,
1619                  "node returned invalid nodeinfo, check lvm/hypervisor")
1620         continue
1621
1622     node_vol_should = {}
1623
1624     feedback_fn("* Verifying instance status")
1625     for instance in instancelist:
1626       if verbose:
1627         feedback_fn("* Verifying instance %s" % instance)
1628       inst_config = instanceinfo[instance]
1629       self._VerifyInstance(instance, inst_config, node_volume,
1630                            node_instance, n_offline)
1631       inst_nodes_offline = []
1632
1633       inst_config.MapLVsByNode(node_vol_should)
1634
1635       instance_cfg[instance] = inst_config
1636
1637       pnode = inst_config.primary_node
1638       _ErrorIf(pnode not in node_info and pnode not in n_offline,
1639                self.ENODERPC, pnode, "instance %s, connection to"
1640                " primary node failed", instance)
1641       if pnode in node_info:
1642         node_info[pnode]['pinst'].append(instance)
1643
1644       if pnode in n_offline:
1645         inst_nodes_offline.append(pnode)
1646
1647       # If the instance is non-redundant we cannot survive losing its primary
1648       # node, so we are not N+1 compliant. On the other hand we have no disk
1649       # templates with more than one secondary so that situation is not well
1650       # supported either.
1651       # FIXME: does not support file-backed instances
1652       if len(inst_config.secondary_nodes) == 0:
1653         i_non_redundant.append(instance)
1654       _ErrorIf(len(inst_config.secondary_nodes) > 1,
1655                self.EINSTANCELAYOUT, instance,
1656                "instance has multiple secondary nodes", code="WARNING")
1657
1658       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1659         i_non_a_balanced.append(instance)
1660
1661       for snode in inst_config.secondary_nodes:
1662         _ErrorIf(snode not in node_info and snode not in n_offline,
1663                  self.ENODERPC, snode,
1664                  "instance %s, connection to secondary node"
1665                  " failed", instance)
1666
1667         if snode in node_info:
1668           node_info[snode]['sinst'].append(instance)
1669           if pnode not in node_info[snode]['sinst-by-pnode']:
1670             node_info[snode]['sinst-by-pnode'][pnode] = []
1671           node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1672
1673         if snode in n_offline:
1674           inst_nodes_offline.append(snode)
1675
1676       # warn that the instance lives on offline nodes
1677       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1678                "instance lives on offline node(s) %s",
1679                utils.CommaJoin(inst_nodes_offline))
1680
1681     feedback_fn("* Verifying orphan volumes")
1682     self._VerifyOrphanVolumes(node_vol_should, node_volume)
1683
1684     feedback_fn("* Verifying remaining instances")
1685     self._VerifyOrphanInstances(instancelist, node_instance)
1686
1687     if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1688       feedback_fn("* Verifying N+1 Memory redundancy")
1689       self._VerifyNPlusOneMemory(node_info, instance_cfg)
1690
1691     feedback_fn("* Other Notes")
1692     if i_non_redundant:
1693       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1694                   % len(i_non_redundant))
1695
1696     if i_non_a_balanced:
1697       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1698                   % len(i_non_a_balanced))
1699
1700     if n_offline:
1701       feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1702
1703     if n_drained:
1704       feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1705
1706     return not self.bad
1707
1708   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1709     """Analyze the post-hooks' result
1710
1711     This method analyses the hook result, handles it, and sends some
1712     nicely-formatted feedback back to the user.
1713
1714     @param phase: one of L{constants.HOOKS_PHASE_POST} or
1715         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1716     @param hooks_results: the results of the multi-node hooks rpc call
1717     @param feedback_fn: function used send feedback back to the caller
1718     @param lu_result: previous Exec result
1719     @return: the new Exec result, based on the previous result
1720         and hook results
1721
1722     """
1723     # We only really run POST phase hooks, and are only interested in
1724     # their results
1725     if phase == constants.HOOKS_PHASE_POST:
1726       # Used to change hooks' output to proper indentation
1727       indent_re = re.compile('^', re.M)
1728       feedback_fn("* Hooks Results")
1729       assert hooks_results, "invalid result from hooks"
1730
1731       for node_name in hooks_results:
1732         res = hooks_results[node_name]
1733         msg = res.fail_msg
1734         test = msg and not res.offline
1735         self._ErrorIf(test, self.ENODEHOOKS, node_name,
1736                       "Communication failure in hooks execution: %s", msg)
1737         if res.offline or msg:
1738           # No need to investigate payload if node is offline or gave an error.
1739           # override manually lu_result here as _ErrorIf only
1740           # overrides self.bad
1741           lu_result = 1
1742           continue
1743         for script, hkr, output in res.payload:
1744           test = hkr == constants.HKR_FAIL
1745           self._ErrorIf(test, self.ENODEHOOKS, node_name,
1746                         "Script %s failed, output:", script)
1747           if test:
1748             output = indent_re.sub('      ', output)
1749             feedback_fn("%s" % output)
1750             lu_result = 0
1751
1752       return lu_result
1753
1754
1755 class LUVerifyDisks(NoHooksLU):
1756   """Verifies the cluster disks status.
1757
1758   """
1759   _OP_REQP = []
1760   REQ_BGL = False
1761
1762   def ExpandNames(self):
1763     self.needed_locks = {
1764       locking.LEVEL_NODE: locking.ALL_SET,
1765       locking.LEVEL_INSTANCE: locking.ALL_SET,
1766     }
1767     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1768
1769   def CheckPrereq(self):
1770     """Check prerequisites.
1771
1772     This has no prerequisites.
1773
1774     """
1775     pass
1776
1777   def Exec(self, feedback_fn):
1778     """Verify integrity of cluster disks.
1779
1780     @rtype: tuple of three items
1781     @return: a tuple of (dict of node-to-node_error, list of instances
1782         which need activate-disks, dict of instance: (node, volume) for
1783         missing volumes
1784
1785     """
1786     result = res_nodes, res_instances, res_missing = {}, [], {}
1787
1788     vg_name = self.cfg.GetVGName()
1789     nodes = utils.NiceSort(self.cfg.GetNodeList())
1790     instances = [self.cfg.GetInstanceInfo(name)
1791                  for name in self.cfg.GetInstanceList()]
1792
1793     nv_dict = {}
1794     for inst in instances:
1795       inst_lvs = {}
1796       if (not inst.admin_up or
1797           inst.disk_template not in constants.DTS_NET_MIRROR):
1798         continue
1799       inst.MapLVsByNode(inst_lvs)
1800       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1801       for node, vol_list in inst_lvs.iteritems():
1802         for vol in vol_list:
1803           nv_dict[(node, vol)] = inst
1804
1805     if not nv_dict:
1806       return result
1807
1808     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1809
1810     for node in nodes:
1811       # node_volume
1812       node_res = node_lvs[node]
1813       if node_res.offline:
1814         continue
1815       msg = node_res.fail_msg
1816       if msg:
1817         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1818         res_nodes[node] = msg
1819         continue
1820
1821       lvs = node_res.payload
1822       for lv_name, (_, _, lv_online) in lvs.items():
1823         inst = nv_dict.pop((node, lv_name), None)
1824         if (not lv_online and inst is not None
1825             and inst.name not in res_instances):
1826           res_instances.append(inst.name)
1827
1828     # any leftover items in nv_dict are missing LVs, let's arrange the
1829     # data better
1830     for key, inst in nv_dict.iteritems():
1831       if inst.name not in res_missing:
1832         res_missing[inst.name] = []
1833       res_missing[inst.name].append(key)
1834
1835     return result
1836
1837
1838 class LURepairDiskSizes(NoHooksLU):
1839   """Verifies the cluster disks sizes.
1840
1841   """
1842   _OP_REQP = ["instances"]
1843   REQ_BGL = False
1844
1845   def ExpandNames(self):
1846     if not isinstance(self.op.instances, list):
1847       raise errors.OpPrereqError("Invalid argument type 'instances'",
1848                                  errors.ECODE_INVAL)
1849
1850     if self.op.instances:
1851       self.wanted_names = []
1852       for name in self.op.instances:
1853         full_name = _ExpandInstanceName(self.cfg, name)
1854         self.wanted_names.append(full_name)
1855       self.needed_locks = {
1856         locking.LEVEL_NODE: [],
1857         locking.LEVEL_INSTANCE: self.wanted_names,
1858         }
1859       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1860     else:
1861       self.wanted_names = None
1862       self.needed_locks = {
1863         locking.LEVEL_NODE: locking.ALL_SET,
1864         locking.LEVEL_INSTANCE: locking.ALL_SET,
1865         }
1866     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1867
1868   def DeclareLocks(self, level):
1869     if level == locking.LEVEL_NODE and self.wanted_names is not None:
1870       self._LockInstancesNodes(primary_only=True)
1871
1872   def CheckPrereq(self):
1873     """Check prerequisites.
1874
1875     This only checks the optional instance list against the existing names.
1876
1877     """
1878     if self.wanted_names is None:
1879       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1880
1881     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1882                              in self.wanted_names]
1883
1884   def _EnsureChildSizes(self, disk):
1885     """Ensure children of the disk have the needed disk size.
1886
1887     This is valid mainly for DRBD8 and fixes an issue where the
1888     children have smaller disk size.
1889
1890     @param disk: an L{ganeti.objects.Disk} object
1891
1892     """
1893     if disk.dev_type == constants.LD_DRBD8:
1894       assert disk.children, "Empty children for DRBD8?"
1895       fchild = disk.children[0]
1896       mismatch = fchild.size < disk.size
1897       if mismatch:
1898         self.LogInfo("Child disk has size %d, parent %d, fixing",
1899                      fchild.size, disk.size)
1900         fchild.size = disk.size
1901
1902       # and we recurse on this child only, not on the metadev
1903       return self._EnsureChildSizes(fchild) or mismatch
1904     else:
1905       return False
1906
1907   def Exec(self, feedback_fn):
1908     """Verify the size of cluster disks.
1909
1910     """
1911     # TODO: check child disks too
1912     # TODO: check differences in size between primary/secondary nodes
1913     per_node_disks = {}
1914     for instance in self.wanted_instances:
1915       pnode = instance.primary_node
1916       if pnode not in per_node_disks:
1917         per_node_disks[pnode] = []
1918       for idx, disk in enumerate(instance.disks):
1919         per_node_disks[pnode].append((instance, idx, disk))
1920
1921     changed = []
1922     for node, dskl in per_node_disks.items():
1923       newl = [v[2].Copy() for v in dskl]
1924       for dsk in newl:
1925         self.cfg.SetDiskID(dsk, node)
1926       result = self.rpc.call_blockdev_getsizes(node, newl)
1927       if result.fail_msg:
1928         self.LogWarning("Failure in blockdev_getsizes call to node"
1929                         " %s, ignoring", node)
1930         continue
1931       if len(result.data) != len(dskl):
1932         self.LogWarning("Invalid result from node %s, ignoring node results",
1933                         node)
1934         continue
1935       for ((instance, idx, disk), size) in zip(dskl, result.data):
1936         if size is None:
1937           self.LogWarning("Disk %d of instance %s did not return size"
1938                           " information, ignoring", idx, instance.name)
1939           continue
1940         if not isinstance(size, (int, long)):
1941           self.LogWarning("Disk %d of instance %s did not return valid"
1942                           " size information, ignoring", idx, instance.name)
1943           continue
1944         size = size >> 20
1945         if size != disk.size:
1946           self.LogInfo("Disk %d of instance %s has mismatched size,"
1947                        " correcting: recorded %d, actual %d", idx,
1948                        instance.name, disk.size, size)
1949           disk.size = size
1950           self.cfg.Update(instance, feedback_fn)
1951           changed.append((instance.name, idx, size))
1952         if self._EnsureChildSizes(disk):
1953           self.cfg.Update(instance, feedback_fn)
1954           changed.append((instance.name, idx, disk.size))
1955     return changed
1956
1957
1958 class LURenameCluster(LogicalUnit):
1959   """Rename the cluster.
1960
1961   """
1962   HPATH = "cluster-rename"
1963   HTYPE = constants.HTYPE_CLUSTER
1964   _OP_REQP = ["name"]
1965
1966   def BuildHooksEnv(self):
1967     """Build hooks env.
1968
1969     """
1970     env = {
1971       "OP_TARGET": self.cfg.GetClusterName(),
1972       "NEW_NAME": self.op.name,
1973       }
1974     mn = self.cfg.GetMasterNode()
1975     all_nodes = self.cfg.GetNodeList()
1976     return env, [mn], all_nodes
1977
1978   def CheckPrereq(self):
1979     """Verify that the passed name is a valid one.
1980
1981     """
1982     hostname = utils.GetHostInfo(self.op.name)
1983
1984     new_name = hostname.name
1985     self.ip = new_ip = hostname.ip
1986     old_name = self.cfg.GetClusterName()
1987     old_ip = self.cfg.GetMasterIP()
1988     if new_name == old_name and new_ip == old_ip:
1989       raise errors.OpPrereqError("Neither the name nor the IP address of the"
1990                                  " cluster has changed",
1991                                  errors.ECODE_INVAL)
1992     if new_ip != old_ip:
1993       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1994         raise errors.OpPrereqError("The given cluster IP address (%s) is"
1995                                    " reachable on the network. Aborting." %
1996                                    new_ip, errors.ECODE_NOTUNIQUE)
1997
1998     self.op.name = new_name
1999
2000   def Exec(self, feedback_fn):
2001     """Rename the cluster.
2002
2003     """
2004     clustername = self.op.name
2005     ip = self.ip
2006
2007     # shutdown the master IP
2008     master = self.cfg.GetMasterNode()
2009     result = self.rpc.call_node_stop_master(master, False)
2010     result.Raise("Could not disable the master role")
2011
2012     try:
2013       cluster = self.cfg.GetClusterInfo()
2014       cluster.cluster_name = clustername
2015       cluster.master_ip = ip
2016       self.cfg.Update(cluster, feedback_fn)
2017
2018       # update the known hosts file
2019       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2020       node_list = self.cfg.GetNodeList()
2021       try:
2022         node_list.remove(master)
2023       except ValueError:
2024         pass
2025       result = self.rpc.call_upload_file(node_list,
2026                                          constants.SSH_KNOWN_HOSTS_FILE)
2027       for to_node, to_result in result.iteritems():
2028         msg = to_result.fail_msg
2029         if msg:
2030           msg = ("Copy of file %s to node %s failed: %s" %
2031                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2032           self.proc.LogWarning(msg)
2033
2034     finally:
2035       result = self.rpc.call_node_start_master(master, False, False)
2036       msg = result.fail_msg
2037       if msg:
2038         self.LogWarning("Could not re-enable the master role on"
2039                         " the master, please restart manually: %s", msg)
2040
2041
2042 def _RecursiveCheckIfLVMBased(disk):
2043   """Check if the given disk or its children are lvm-based.
2044
2045   @type disk: L{objects.Disk}
2046   @param disk: the disk to check
2047   @rtype: boolean
2048   @return: boolean indicating whether a LD_LV dev_type was found or not
2049
2050   """
2051   if disk.children:
2052     for chdisk in disk.children:
2053       if _RecursiveCheckIfLVMBased(chdisk):
2054         return True
2055   return disk.dev_type == constants.LD_LV
2056
2057
2058 class LUSetClusterParams(LogicalUnit):
2059   """Change the parameters of the cluster.
2060
2061   """
2062   HPATH = "cluster-modify"
2063   HTYPE = constants.HTYPE_CLUSTER
2064   _OP_REQP = []
2065   REQ_BGL = False
2066
2067   def CheckArguments(self):
2068     """Check parameters
2069
2070     """
2071     if not hasattr(self.op, "candidate_pool_size"):
2072       self.op.candidate_pool_size = None
2073     if self.op.candidate_pool_size is not None:
2074       try:
2075         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2076       except (ValueError, TypeError), err:
2077         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2078                                    str(err), errors.ECODE_INVAL)
2079       if self.op.candidate_pool_size < 1:
2080         raise errors.OpPrereqError("At least one master candidate needed",
2081                                    errors.ECODE_INVAL)
2082
2083   def ExpandNames(self):
2084     # FIXME: in the future maybe other cluster params won't require checking on
2085     # all nodes to be modified.
2086     self.needed_locks = {
2087       locking.LEVEL_NODE: locking.ALL_SET,
2088     }
2089     self.share_locks[locking.LEVEL_NODE] = 1
2090
2091   def BuildHooksEnv(self):
2092     """Build hooks env.
2093
2094     """
2095     env = {
2096       "OP_TARGET": self.cfg.GetClusterName(),
2097       "NEW_VG_NAME": self.op.vg_name,
2098       }
2099     mn = self.cfg.GetMasterNode()
2100     return env, [mn], [mn]
2101
2102   def CheckPrereq(self):
2103     """Check prerequisites.
2104
2105     This checks whether the given params don't conflict and
2106     if the given volume group is valid.
2107
2108     """
2109     if self.op.vg_name is not None and not self.op.vg_name:
2110       instances = self.cfg.GetAllInstancesInfo().values()
2111       for inst in instances:
2112         for disk in inst.disks:
2113           if _RecursiveCheckIfLVMBased(disk):
2114             raise errors.OpPrereqError("Cannot disable lvm storage while"
2115                                        " lvm-based instances exist",
2116                                        errors.ECODE_INVAL)
2117
2118     node_list = self.acquired_locks[locking.LEVEL_NODE]
2119
2120     # if vg_name not None, checks given volume group on all nodes
2121     if self.op.vg_name:
2122       vglist = self.rpc.call_vg_list(node_list)
2123       for node in node_list:
2124         msg = vglist[node].fail_msg
2125         if msg:
2126           # ignoring down node
2127           self.LogWarning("Error while gathering data on node %s"
2128                           " (ignoring node): %s", node, msg)
2129           continue
2130         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2131                                               self.op.vg_name,
2132                                               constants.MIN_VG_SIZE)
2133         if vgstatus:
2134           raise errors.OpPrereqError("Error on node '%s': %s" %
2135                                      (node, vgstatus), errors.ECODE_ENVIRON)
2136
2137     self.cluster = cluster = self.cfg.GetClusterInfo()
2138     # validate params changes
2139     if self.op.beparams:
2140       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2141       self.new_beparams = objects.FillDict(
2142         cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2143
2144     if self.op.nicparams:
2145       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2146       self.new_nicparams = objects.FillDict(
2147         cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2148       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2149       nic_errors = []
2150
2151       # check all instances for consistency
2152       for instance in self.cfg.GetAllInstancesInfo().values():
2153         for nic_idx, nic in enumerate(instance.nics):
2154           params_copy = copy.deepcopy(nic.nicparams)
2155           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2156
2157           # check parameter syntax
2158           try:
2159             objects.NIC.CheckParameterSyntax(params_filled)
2160           except errors.ConfigurationError, err:
2161             nic_errors.append("Instance %s, nic/%d: %s" %
2162                               (instance.name, nic_idx, err))
2163
2164           # if we're moving instances to routed, check that they have an ip
2165           target_mode = params_filled[constants.NIC_MODE]
2166           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2167             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2168                               (instance.name, nic_idx))
2169       if nic_errors:
2170         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2171                                    "\n".join(nic_errors))
2172
2173     # hypervisor list/parameters
2174     self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2175     if self.op.hvparams:
2176       if not isinstance(self.op.hvparams, dict):
2177         raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2178                                    errors.ECODE_INVAL)
2179       for hv_name, hv_dict in self.op.hvparams.items():
2180         if hv_name not in self.new_hvparams:
2181           self.new_hvparams[hv_name] = hv_dict
2182         else:
2183           self.new_hvparams[hv_name].update(hv_dict)
2184
2185     # os hypervisor parameters
2186     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2187     if self.op.os_hvp:
2188       if not isinstance(self.op.os_hvp, dict):
2189         raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2190                                    errors.ECODE_INVAL)
2191       for os_name, hvs in self.op.os_hvp.items():
2192         if not isinstance(hvs, dict):
2193           raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2194                                       " input"), errors.ECODE_INVAL)
2195         if os_name not in self.new_os_hvp:
2196           self.new_os_hvp[os_name] = hvs
2197         else:
2198           for hv_name, hv_dict in hvs.items():
2199             if hv_name not in self.new_os_hvp[os_name]:
2200               self.new_os_hvp[os_name][hv_name] = hv_dict
2201             else:
2202               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2203
2204     if self.op.enabled_hypervisors is not None:
2205       self.hv_list = self.op.enabled_hypervisors
2206       if not self.hv_list:
2207         raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2208                                    " least one member",
2209                                    errors.ECODE_INVAL)
2210       invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2211       if invalid_hvs:
2212         raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2213                                    " entries: %s" %
2214                                    utils.CommaJoin(invalid_hvs),
2215                                    errors.ECODE_INVAL)
2216     else:
2217       self.hv_list = cluster.enabled_hypervisors
2218
2219     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2220       # either the enabled list has changed, or the parameters have, validate
2221       for hv_name, hv_params in self.new_hvparams.items():
2222         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2223             (self.op.enabled_hypervisors and
2224              hv_name in self.op.enabled_hypervisors)):
2225           # either this is a new hypervisor, or its parameters have changed
2226           hv_class = hypervisor.GetHypervisor(hv_name)
2227           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2228           hv_class.CheckParameterSyntax(hv_params)
2229           _CheckHVParams(self, node_list, hv_name, hv_params)
2230
2231     if self.op.os_hvp:
2232       # no need to check any newly-enabled hypervisors, since the
2233       # defaults have already been checked in the above code-block
2234       for os_name, os_hvp in self.new_os_hvp.items():
2235         for hv_name, hv_params in os_hvp.items():
2236           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2237           # we need to fill in the new os_hvp on top of the actual hv_p
2238           cluster_defaults = self.new_hvparams.get(hv_name, {})
2239           new_osp = objects.FillDict(cluster_defaults, hv_params)
2240           hv_class = hypervisor.GetHypervisor(hv_name)
2241           hv_class.CheckParameterSyntax(new_osp)
2242           _CheckHVParams(self, node_list, hv_name, new_osp)
2243
2244
2245   def Exec(self, feedback_fn):
2246     """Change the parameters of the cluster.
2247
2248     """
2249     if self.op.vg_name is not None:
2250       new_volume = self.op.vg_name
2251       if not new_volume:
2252         new_volume = None
2253       if new_volume != self.cfg.GetVGName():
2254         self.cfg.SetVGName(new_volume)
2255       else:
2256         feedback_fn("Cluster LVM configuration already in desired"
2257                     " state, not changing")
2258     if self.op.hvparams:
2259       self.cluster.hvparams = self.new_hvparams
2260     if self.op.os_hvp:
2261       self.cluster.os_hvp = self.new_os_hvp
2262     if self.op.enabled_hypervisors is not None:
2263       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2264     if self.op.beparams:
2265       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2266     if self.op.nicparams:
2267       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2268
2269     if self.op.candidate_pool_size is not None:
2270       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2271       # we need to update the pool size here, otherwise the save will fail
2272       _AdjustCandidatePool(self, [])
2273
2274     self.cfg.Update(self.cluster, feedback_fn)
2275
2276
2277 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2278   """Distribute additional files which are part of the cluster configuration.
2279
2280   ConfigWriter takes care of distributing the config and ssconf files, but
2281   there are more files which should be distributed to all nodes. This function
2282   makes sure those are copied.
2283
2284   @param lu: calling logical unit
2285   @param additional_nodes: list of nodes not in the config to distribute to
2286
2287   """
2288   # 1. Gather target nodes
2289   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2290   dist_nodes = lu.cfg.GetOnlineNodeList()
2291   if additional_nodes is not None:
2292     dist_nodes.extend(additional_nodes)
2293   if myself.name in dist_nodes:
2294     dist_nodes.remove(myself.name)
2295
2296   # 2. Gather files to distribute
2297   dist_files = set([constants.ETC_HOSTS,
2298                     constants.SSH_KNOWN_HOSTS_FILE,
2299                     constants.RAPI_CERT_FILE,
2300                     constants.RAPI_USERS_FILE,
2301                     constants.CONFD_HMAC_KEY,
2302                    ])
2303
2304   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2305   for hv_name in enabled_hypervisors:
2306     hv_class = hypervisor.GetHypervisor(hv_name)
2307     dist_files.update(hv_class.GetAncillaryFiles())
2308
2309   # 3. Perform the files upload
2310   for fname in dist_files:
2311     if os.path.exists(fname):
2312       result = lu.rpc.call_upload_file(dist_nodes, fname)
2313       for to_node, to_result in result.items():
2314         msg = to_result.fail_msg
2315         if msg:
2316           msg = ("Copy of file %s to node %s failed: %s" %
2317                  (fname, to_node, msg))
2318           lu.proc.LogWarning(msg)
2319
2320
2321 class LURedistributeConfig(NoHooksLU):
2322   """Force the redistribution of cluster configuration.
2323
2324   This is a very simple LU.
2325
2326   """
2327   _OP_REQP = []
2328   REQ_BGL = False
2329
2330   def ExpandNames(self):
2331     self.needed_locks = {
2332       locking.LEVEL_NODE: locking.ALL_SET,
2333     }
2334     self.share_locks[locking.LEVEL_NODE] = 1
2335
2336   def CheckPrereq(self):
2337     """Check prerequisites.
2338
2339     """
2340
2341   def Exec(self, feedback_fn):
2342     """Redistribute the configuration.
2343
2344     """
2345     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2346     _RedistributeAncillaryFiles(self)
2347
2348
2349 def _WaitForSync(lu, instance, oneshot=False):
2350   """Sleep and poll for an instance's disk to sync.
2351
2352   """
2353   if not instance.disks:
2354     return True
2355
2356   if not oneshot:
2357     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2358
2359   node = instance.primary_node
2360
2361   for dev in instance.disks:
2362     lu.cfg.SetDiskID(dev, node)
2363
2364   # TODO: Convert to utils.Retry
2365
2366   retries = 0
2367   degr_retries = 10 # in seconds, as we sleep 1 second each time
2368   while True:
2369     max_time = 0
2370     done = True
2371     cumul_degraded = False
2372     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2373     msg = rstats.fail_msg
2374     if msg:
2375       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2376       retries += 1
2377       if retries >= 10:
2378         raise errors.RemoteError("Can't contact node %s for mirror data,"
2379                                  " aborting." % node)
2380       time.sleep(6)
2381       continue
2382     rstats = rstats.payload
2383     retries = 0
2384     for i, mstat in enumerate(rstats):
2385       if mstat is None:
2386         lu.LogWarning("Can't compute data for node %s/%s",
2387                            node, instance.disks[i].iv_name)
2388         continue
2389
2390       cumul_degraded = (cumul_degraded or
2391                         (mstat.is_degraded and mstat.sync_percent is None))
2392       if mstat.sync_percent is not None:
2393         done = False
2394         if mstat.estimated_time is not None:
2395           rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2396           max_time = mstat.estimated_time
2397         else:
2398           rem_time = "no time estimate"
2399         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2400                         (instance.disks[i].iv_name, mstat.sync_percent,
2401                          rem_time))
2402
2403     # if we're done but degraded, let's do a few small retries, to
2404     # make sure we see a stable and not transient situation; therefore
2405     # we force restart of the loop
2406     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2407       logging.info("Degraded disks found, %d retries left", degr_retries)
2408       degr_retries -= 1
2409       time.sleep(1)
2410       continue
2411
2412     if done or oneshot:
2413       break
2414
2415     time.sleep(min(60, max_time))
2416
2417   if done:
2418     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2419   return not cumul_degraded
2420
2421
2422 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2423   """Check that mirrors are not degraded.
2424
2425   The ldisk parameter, if True, will change the test from the
2426   is_degraded attribute (which represents overall non-ok status for
2427   the device(s)) to the ldisk (representing the local storage status).
2428
2429   """
2430   lu.cfg.SetDiskID(dev, node)
2431
2432   result = True
2433
2434   if on_primary or dev.AssembleOnSecondary():
2435     rstats = lu.rpc.call_blockdev_find(node, dev)
2436     msg = rstats.fail_msg
2437     if msg:
2438       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2439       result = False
2440     elif not rstats.payload:
2441       lu.LogWarning("Can't find disk on node %s", node)
2442       result = False
2443     else:
2444       if ldisk:
2445         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2446       else:
2447         result = result and not rstats.payload.is_degraded
2448
2449   if dev.children:
2450     for child in dev.children:
2451       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2452
2453   return result
2454
2455
2456 class LUDiagnoseOS(NoHooksLU):
2457   """Logical unit for OS diagnose/query.
2458
2459   """
2460   _OP_REQP = ["output_fields", "names"]
2461   REQ_BGL = False
2462   _FIELDS_STATIC = utils.FieldSet()
2463   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2464   # Fields that need calculation of global os validity
2465   _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2466
2467   def ExpandNames(self):
2468     if self.op.names:
2469       raise errors.OpPrereqError("Selective OS query not supported",
2470                                  errors.ECODE_INVAL)
2471
2472     _CheckOutputFields(static=self._FIELDS_STATIC,
2473                        dynamic=self._FIELDS_DYNAMIC,
2474                        selected=self.op.output_fields)
2475
2476     # Lock all nodes, in shared mode
2477     # Temporary removal of locks, should be reverted later
2478     # TODO: reintroduce locks when they are lighter-weight
2479     self.needed_locks = {}
2480     #self.share_locks[locking.LEVEL_NODE] = 1
2481     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2482
2483   def CheckPrereq(self):
2484     """Check prerequisites.
2485
2486     """
2487
2488   @staticmethod
2489   def _DiagnoseByOS(rlist):
2490     """Remaps a per-node return list into an a per-os per-node dictionary
2491
2492     @param rlist: a map with node names as keys and OS objects as values
2493
2494     @rtype: dict
2495     @return: a dictionary with osnames as keys and as value another map, with
2496         nodes as keys and tuples of (path, status, diagnose) as values, eg::
2497
2498           {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2499                                      (/srv/..., False, "invalid api")],
2500                            "node2": [(/srv/..., True, "")]}
2501           }
2502
2503     """
2504     all_os = {}
2505     # we build here the list of nodes that didn't fail the RPC (at RPC
2506     # level), so that nodes with a non-responding node daemon don't
2507     # make all OSes invalid
2508     good_nodes = [node_name for node_name in rlist
2509                   if not rlist[node_name].fail_msg]
2510     for node_name, nr in rlist.items():
2511       if nr.fail_msg or not nr.payload:
2512         continue
2513       for name, path, status, diagnose, variants in nr.payload:
2514         if name not in all_os:
2515           # build a list of nodes for this os containing empty lists
2516           # for each node in node_list
2517           all_os[name] = {}
2518           for nname in good_nodes:
2519             all_os[name][nname] = []
2520         all_os[name][node_name].append((path, status, diagnose, variants))
2521     return all_os
2522
2523   def Exec(self, feedback_fn):
2524     """Compute the list of OSes.
2525
2526     """
2527     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2528     node_data = self.rpc.call_os_diagnose(valid_nodes)
2529     pol = self._DiagnoseByOS(node_data)
2530     output = []
2531     calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2532     calc_variants = "variants" in self.op.output_fields
2533
2534     for os_name, os_data in pol.items():
2535       row = []
2536       if calc_valid:
2537         valid = True
2538         variants = None
2539         for osl in os_data.values():
2540           valid = valid and osl and osl[0][1]
2541           if not valid:
2542             variants = None
2543             break
2544           if calc_variants:
2545             node_variants = osl[0][3]
2546             if variants is None:
2547               variants = node_variants
2548             else:
2549               variants = [v for v in variants if v in node_variants]
2550
2551       for field in self.op.output_fields:
2552         if field == "name":
2553           val = os_name
2554         elif field == "valid":
2555           val = valid
2556         elif field == "node_status":
2557           # this is just a copy of the dict
2558           val = {}
2559           for node_name, nos_list in os_data.items():
2560             val[node_name] = nos_list
2561         elif field == "variants":
2562           val =  variants
2563         else:
2564           raise errors.ParameterError(field)
2565         row.append(val)
2566       output.append(row)
2567
2568     return output
2569
2570
2571 class LURemoveNode(LogicalUnit):
2572   """Logical unit for removing a node.
2573
2574   """
2575   HPATH = "node-remove"
2576   HTYPE = constants.HTYPE_NODE
2577   _OP_REQP = ["node_name"]
2578
2579   def BuildHooksEnv(self):
2580     """Build hooks env.
2581
2582     This doesn't run on the target node in the pre phase as a failed
2583     node would then be impossible to remove.
2584
2585     """
2586     env = {
2587       "OP_TARGET": self.op.node_name,
2588       "NODE_NAME": self.op.node_name,
2589       }
2590     all_nodes = self.cfg.GetNodeList()
2591     try:
2592       all_nodes.remove(self.op.node_name)
2593     except ValueError:
2594       logging.warning("Node %s which is about to be removed not found"
2595                       " in the all nodes list", self.op.node_name)
2596     return env, all_nodes, all_nodes
2597
2598   def CheckPrereq(self):
2599     """Check prerequisites.
2600
2601     This checks:
2602      - the node exists in the configuration
2603      - it does not have primary or secondary instances
2604      - it's not the master
2605
2606     Any errors are signaled by raising errors.OpPrereqError.
2607
2608     """
2609     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2610     node = self.cfg.GetNodeInfo(self.op.node_name)
2611     assert node is not None
2612
2613     instance_list = self.cfg.GetInstanceList()
2614
2615     masternode = self.cfg.GetMasterNode()
2616     if node.name == masternode:
2617       raise errors.OpPrereqError("Node is the master node,"
2618                                  " you need to failover first.",
2619                                  errors.ECODE_INVAL)
2620
2621     for instance_name in instance_list:
2622       instance = self.cfg.GetInstanceInfo(instance_name)
2623       if node.name in instance.all_nodes:
2624         raise errors.OpPrereqError("Instance %s is still running on the node,"
2625                                    " please remove first." % instance_name,
2626                                    errors.ECODE_INVAL)
2627     self.op.node_name = node.name
2628     self.node = node
2629
2630   def Exec(self, feedback_fn):
2631     """Removes the node from the cluster.
2632
2633     """
2634     node = self.node
2635     logging.info("Stopping the node daemon and removing configs from node %s",
2636                  node.name)
2637
2638     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2639
2640     # Promote nodes to master candidate as needed
2641     _AdjustCandidatePool(self, exceptions=[node.name])
2642     self.context.RemoveNode(node.name)
2643
2644     # Run post hooks on the node before it's removed
2645     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2646     try:
2647       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2648     except:
2649       # pylint: disable-msg=W0702
2650       self.LogWarning("Errors occurred running hooks on %s" % node.name)
2651
2652     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2653     msg = result.fail_msg
2654     if msg:
2655       self.LogWarning("Errors encountered on the remote node while leaving"
2656                       " the cluster: %s", msg)
2657
2658
2659 class LUQueryNodes(NoHooksLU):
2660   """Logical unit for querying nodes.
2661
2662   """
2663   # pylint: disable-msg=W0142
2664   _OP_REQP = ["output_fields", "names", "use_locking"]
2665   REQ_BGL = False
2666
2667   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2668                     "master_candidate", "offline", "drained"]
2669
2670   _FIELDS_DYNAMIC = utils.FieldSet(
2671     "dtotal", "dfree",
2672     "mtotal", "mnode", "mfree",
2673     "bootid",
2674     "ctotal", "cnodes", "csockets",
2675     )
2676
2677   _FIELDS_STATIC = utils.FieldSet(*[
2678     "pinst_cnt", "sinst_cnt",
2679     "pinst_list", "sinst_list",
2680     "pip", "sip", "tags",
2681     "master",
2682     "role"] + _SIMPLE_FIELDS
2683     )
2684
2685   def ExpandNames(self):
2686     _CheckOutputFields(static=self._FIELDS_STATIC,
2687                        dynamic=self._FIELDS_DYNAMIC,
2688                        selected=self.op.output_fields)
2689
2690     self.needed_locks = {}
2691     self.share_locks[locking.LEVEL_NODE] = 1
2692
2693     if self.op.names:
2694       self.wanted = _GetWantedNodes(self, self.op.names)
2695     else:
2696       self.wanted = locking.ALL_SET
2697
2698     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2699     self.do_locking = self.do_node_query and self.op.use_locking
2700     if self.do_locking:
2701       # if we don't request only static fields, we need to lock the nodes
2702       self.needed_locks[locking.LEVEL_NODE] = self.wanted
2703
2704   def CheckPrereq(self):
2705     """Check prerequisites.
2706
2707     """
2708     # The validation of the node list is done in the _GetWantedNodes,
2709     # if non empty, and if empty, there's no validation to do
2710     pass
2711
2712   def Exec(self, feedback_fn):
2713     """Computes the list of nodes and their attributes.
2714
2715     """
2716     all_info = self.cfg.GetAllNodesInfo()
2717     if self.do_locking:
2718       nodenames = self.acquired_locks[locking.LEVEL_NODE]
2719     elif self.wanted != locking.ALL_SET:
2720       nodenames = self.wanted
2721       missing = set(nodenames).difference(all_info.keys())
2722       if missing:
2723         raise errors.OpExecError(
2724           "Some nodes were removed before retrieving their data: %s" % missing)
2725     else:
2726       nodenames = all_info.keys()
2727
2728     nodenames = utils.NiceSort(nodenames)
2729     nodelist = [all_info[name] for name in nodenames]
2730
2731     # begin data gathering
2732
2733     if self.do_node_query:
2734       live_data = {}
2735       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2736                                           self.cfg.GetHypervisorType())
2737       for name in nodenames:
2738         nodeinfo = node_data[name]
2739         if not nodeinfo.fail_msg and nodeinfo.payload:
2740           nodeinfo = nodeinfo.payload
2741           fn = utils.TryConvert
2742           live_data[name] = {
2743             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2744             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2745             "mfree": fn(int, nodeinfo.get('memory_free', None)),
2746             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2747             "dfree": fn(int, nodeinfo.get('vg_free', None)),
2748             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2749             "bootid": nodeinfo.get('bootid', None),
2750             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2751             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2752             }
2753         else:
2754           live_data[name] = {}
2755     else:
2756       live_data = dict.fromkeys(nodenames, {})
2757
2758     node_to_primary = dict([(name, set()) for name in nodenames])
2759     node_to_secondary = dict([(name, set()) for name in nodenames])
2760
2761     inst_fields = frozenset(("pinst_cnt", "pinst_list",
2762                              "sinst_cnt", "sinst_list"))
2763     if inst_fields & frozenset(self.op.output_fields):
2764       inst_data = self.cfg.GetAllInstancesInfo()
2765
2766       for inst in inst_data.values():
2767         if inst.primary_node in node_to_primary:
2768           node_to_primary[inst.primary_node].add(inst.name)
2769         for secnode in inst.secondary_nodes:
2770           if secnode in node_to_secondary:
2771             node_to_secondary[secnode].add(inst.name)
2772
2773     master_node = self.cfg.GetMasterNode()
2774
2775     # end data gathering
2776
2777     output = []
2778     for node in nodelist:
2779       node_output = []
2780       for field in self.op.output_fields:
2781         if field in self._SIMPLE_FIELDS:
2782           val = getattr(node, field)
2783         elif field == "pinst_list":
2784           val = list(node_to_primary[node.name])
2785         elif field == "sinst_list":
2786           val = list(node_to_secondary[node.name])
2787         elif field == "pinst_cnt":
2788           val = len(node_to_primary[node.name])
2789         elif field == "sinst_cnt":
2790           val = len(node_to_secondary[node.name])
2791         elif field == "pip":
2792           val = node.primary_ip
2793         elif field == "sip":
2794           val = node.secondary_ip
2795         elif field == "tags":
2796           val = list(node.GetTags())
2797         elif field == "master":
2798           val = node.name == master_node
2799         elif self._FIELDS_DYNAMIC.Matches(field):
2800           val = live_data[node.name].get(field, None)
2801         elif field == "role":
2802           if node.name == master_node:
2803             val = "M"
2804           elif node.master_candidate:
2805             val = "C"
2806           elif node.drained:
2807             val = "D"
2808           elif node.offline:
2809             val = "O"
2810           else:
2811             val = "R"
2812         else:
2813           raise errors.ParameterError(field)
2814         node_output.append(val)
2815       output.append(node_output)
2816
2817     return output
2818
2819
2820 class LUQueryNodeVolumes(NoHooksLU):
2821   """Logical unit for getting volumes on node(s).
2822
2823   """
2824   _OP_REQP = ["nodes", "output_fields"]
2825   REQ_BGL = False
2826   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2827   _FIELDS_STATIC = utils.FieldSet("node")
2828
2829   def ExpandNames(self):
2830     _CheckOutputFields(static=self._FIELDS_STATIC,
2831                        dynamic=self._FIELDS_DYNAMIC,
2832                        selected=self.op.output_fields)
2833
2834     self.needed_locks = {}
2835     self.share_locks[locking.LEVEL_NODE] = 1
2836     if not self.op.nodes:
2837       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2838     else:
2839       self.needed_locks[locking.LEVEL_NODE] = \
2840         _GetWantedNodes(self, self.op.nodes)
2841
2842   def CheckPrereq(self):
2843     """Check prerequisites.
2844
2845     This checks that the fields required are valid output fields.
2846
2847     """
2848     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2849
2850   def Exec(self, feedback_fn):
2851     """Computes the list of nodes and their attributes.
2852
2853     """
2854     nodenames = self.nodes
2855     volumes = self.rpc.call_node_volumes(nodenames)
2856
2857     ilist = [self.cfg.GetInstanceInfo(iname) for iname
2858              in self.cfg.GetInstanceList()]
2859
2860     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2861
2862     output = []
2863     for node in nodenames:
2864       nresult = volumes[node]
2865       if nresult.offline:
2866         continue
2867       msg = nresult.fail_msg
2868       if msg:
2869         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2870         continue
2871
2872       node_vols = nresult.payload[:]
2873       node_vols.sort(key=lambda vol: vol['dev'])
2874
2875       for vol in node_vols:
2876         node_output = []
2877         for field in self.op.output_fields:
2878           if field == "node":
2879             val = node
2880           elif field == "phys":
2881             val = vol['dev']
2882           elif field == "vg":
2883             val = vol['vg']
2884           elif field == "name":
2885             val = vol['name']
2886           elif field == "size":
2887             val = int(float(vol['size']))
2888           elif field == "instance":
2889             for inst in ilist:
2890               if node not in lv_by_node[inst]:
2891                 continue
2892               if vol['name'] in lv_by_node[inst][node]:
2893                 val = inst.name
2894                 break
2895             else:
2896               val = '-'
2897           else:
2898             raise errors.ParameterError(field)
2899           node_output.append(str(val))
2900
2901         output.append(node_output)
2902
2903     return output
2904
2905
2906 class LUQueryNodeStorage(NoHooksLU):
2907   """Logical unit for getting information on storage units on node(s).
2908
2909   """
2910   _OP_REQP = ["nodes", "storage_type", "output_fields"]
2911   REQ_BGL = False
2912   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2913
2914   def ExpandNames(self):
2915     storage_type = self.op.storage_type
2916
2917     if storage_type not in constants.VALID_STORAGE_TYPES:
2918       raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2919                                  errors.ECODE_INVAL)
2920
2921     _CheckOutputFields(static=self._FIELDS_STATIC,
2922                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2923                        selected=self.op.output_fields)
2924
2925     self.needed_locks = {}
2926     self.share_locks[locking.LEVEL_NODE] = 1
2927
2928     if self.op.nodes:
2929       self.needed_locks[locking.LEVEL_NODE] = \
2930         _GetWantedNodes(self, self.op.nodes)
2931     else:
2932       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2933
2934   def CheckPrereq(self):
2935     """Check prerequisites.
2936
2937     This checks that the fields required are valid output fields.
2938
2939     """
2940     self.op.name = getattr(self.op, "name", None)
2941
2942     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2943
2944   def Exec(self, feedback_fn):
2945     """Computes the list of nodes and their attributes.
2946
2947     """
2948     # Always get name to sort by
2949     if constants.SF_NAME in self.op.output_fields:
2950       fields = self.op.output_fields[:]
2951     else:
2952       fields = [constants.SF_NAME] + self.op.output_fields
2953
2954     # Never ask for node or type as it's only known to the LU
2955     for extra in [constants.SF_NODE, constants.SF_TYPE]:
2956       while extra in fields:
2957         fields.remove(extra)
2958
2959     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2960     name_idx = field_idx[constants.SF_NAME]
2961
2962     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2963     data = self.rpc.call_storage_list(self.nodes,
2964                                       self.op.storage_type, st_args,
2965                                       self.op.name, fields)
2966
2967     result = []
2968
2969     for node in utils.NiceSort(self.nodes):
2970       nresult = data[node]
2971       if nresult.offline:
2972         continue
2973
2974       msg = nresult.fail_msg
2975       if msg:
2976         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2977         continue
2978
2979       rows = dict([(row[name_idx], row) for row in nresult.payload])
2980
2981       for name in utils.NiceSort(rows.keys()):
2982         row = rows[name]
2983
2984         out = []
2985
2986         for field in self.op.output_fields:
2987           if field == constants.SF_NODE:
2988             val = node
2989           elif field == constants.SF_TYPE:
2990             val = self.op.storage_type
2991           elif field in field_idx:
2992             val = row[field_idx[field]]
2993           else:
2994             raise errors.ParameterError(field)
2995
2996           out.append(val)
2997
2998         result.append(out)
2999
3000     return result
3001
3002
3003 class LUModifyNodeStorage(NoHooksLU):
3004   """Logical unit for modifying a storage volume on a node.
3005
3006   """
3007   _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3008   REQ_BGL = False
3009
3010   def CheckArguments(self):
3011     self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3012
3013     storage_type = self.op.storage_type
3014     if storage_type not in constants.VALID_STORAGE_TYPES:
3015       raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
3016                                  errors.ECODE_INVAL)
3017
3018   def ExpandNames(self):
3019     self.needed_locks = {
3020       locking.LEVEL_NODE: self.op.node_name,
3021       }
3022
3023   def CheckPrereq(self):
3024     """Check prerequisites.
3025
3026     """
3027     storage_type = self.op.storage_type
3028
3029     try:
3030       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3031     except KeyError:
3032       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3033                                  " modified" % storage_type,
3034                                  errors.ECODE_INVAL)
3035
3036     diff = set(self.op.changes.keys()) - modifiable
3037     if diff:
3038       raise errors.OpPrereqError("The following fields can not be modified for"
3039                                  " storage units of type '%s': %r" %
3040                                  (storage_type, list(diff)),
3041                                  errors.ECODE_INVAL)
3042
3043   def Exec(self, feedback_fn):
3044     """Computes the list of nodes and their attributes.
3045
3046     """
3047     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3048     result = self.rpc.call_storage_modify(self.op.node_name,
3049                                           self.op.storage_type, st_args,
3050                                           self.op.name, self.op.changes)
3051     result.Raise("Failed to modify storage unit '%s' on %s" %
3052                  (self.op.name, self.op.node_name))
3053
3054
3055 class LUAddNode(LogicalUnit):
3056   """Logical unit for adding node to the cluster.
3057
3058   """
3059   HPATH = "node-add"
3060   HTYPE = constants.HTYPE_NODE
3061   _OP_REQP = ["node_name"]
3062
3063   def CheckArguments(self):
3064     # validate/normalize the node name
3065     self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3066
3067   def BuildHooksEnv(self):
3068     """Build hooks env.
3069
3070     This will run on all nodes before, and on all nodes + the new node after.
3071
3072     """
3073     env = {
3074       "OP_TARGET": self.op.node_name,
3075       "NODE_NAME": self.op.node_name,
3076       "NODE_PIP": self.op.primary_ip,
3077       "NODE_SIP": self.op.secondary_ip,
3078       }
3079     nodes_0 = self.cfg.GetNodeList()
3080     nodes_1 = nodes_0 + [self.op.node_name, ]
3081     return env, nodes_0, nodes_1
3082
3083   def CheckPrereq(self):
3084     """Check prerequisites.
3085
3086     This checks:
3087      - the new node is not already in the config
3088      - it is resolvable
3089      - its parameters (single/dual homed) matches the cluster
3090
3091     Any errors are signaled by raising errors.OpPrereqError.
3092
3093     """
3094     node_name = self.op.node_name
3095     cfg = self.cfg
3096
3097     dns_data = utils.GetHostInfo(node_name)
3098
3099     node = dns_data.name
3100     primary_ip = self.op.primary_ip = dns_data.ip
3101     secondary_ip = getattr(self.op, "secondary_ip", None)
3102     if secondary_ip is None:
3103       secondary_ip = primary_ip
3104     if not utils.IsValidIP(secondary_ip):
3105       raise errors.OpPrereqError("Invalid secondary IP given",
3106                                  errors.ECODE_INVAL)
3107     self.op.secondary_ip = secondary_ip
3108
3109     node_list = cfg.GetNodeList()
3110     if not self.op.readd and node in node_list:
3111       raise errors.OpPrereqError("Node %s is already in the configuration" %
3112                                  node, errors.ECODE_EXISTS)
3113     elif self.op.readd and node not in node_list:
3114       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3115                                  errors.ECODE_NOENT)
3116
3117     for existing_node_name in node_list:
3118       existing_node = cfg.GetNodeInfo(existing_node_name)
3119
3120       if self.op.readd and node == existing_node_name:
3121         if (existing_node.primary_ip != primary_ip or
3122             existing_node.secondary_ip != secondary_ip):
3123           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3124                                      " address configuration as before",
3125                                      errors.ECODE_INVAL)
3126         continue
3127
3128       if (existing_node.primary_ip == primary_ip or
3129           existing_node.secondary_ip == primary_ip or
3130           existing_node.primary_ip == secondary_ip or
3131           existing_node.secondary_ip == secondary_ip):
3132         raise errors.OpPrereqError("New node ip address(es) conflict with"
3133                                    " existing node %s" % existing_node.name,
3134                                    errors.ECODE_NOTUNIQUE)
3135
3136     # check that the type of the node (single versus dual homed) is the
3137     # same as for the master
3138     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3139     master_singlehomed = myself.secondary_ip == myself.primary_ip
3140     newbie_singlehomed = secondary_ip == primary_ip
3141     if master_singlehomed != newbie_singlehomed:
3142       if master_singlehomed:
3143         raise errors.OpPrereqError("The master has no private ip but the"
3144                                    " new node has one",
3145                                    errors.ECODE_INVAL)
3146       else:
3147         raise errors.OpPrereqError("The master has a private ip but the"
3148                                    " new node doesn't have one",
3149                                    errors.ECODE_INVAL)
3150
3151     # checks reachability
3152     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3153       raise errors.OpPrereqError("Node not reachable by ping",
3154                                  errors.ECODE_ENVIRON)
3155
3156     if not newbie_singlehomed:
3157       # check reachability from my secondary ip to newbie's secondary ip
3158       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3159                            source=myself.secondary_ip):
3160         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3161                                    " based ping to noded port",
3162                                    errors.ECODE_ENVIRON)
3163
3164     if self.op.readd:
3165       exceptions = [node]
3166     else:
3167       exceptions = []
3168
3169     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3170
3171     if self.op.readd:
3172       self.new_node = self.cfg.GetNodeInfo(node)
3173       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3174     else:
3175       self.new_node = objects.Node(name=node,
3176                                    primary_ip=primary_ip,
3177                                    secondary_ip=secondary_ip,
3178                                    master_candidate=self.master_candidate,
3179                                    offline=False, drained=False)
3180
3181   def Exec(self, feedback_fn):
3182     """Adds the new node to the cluster.
3183
3184     """
3185     new_node = self.new_node
3186     node = new_node.name
3187
3188     # for re-adds, reset the offline/drained/master-candidate flags;
3189     # we need to reset here, otherwise offline would prevent RPC calls
3190     # later in the procedure; this also means that if the re-add
3191     # fails, we are left with a non-offlined, broken node
3192     if self.op.readd:
3193       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3194       self.LogInfo("Readding a node, the offline/drained flags were reset")
3195       # if we demote the node, we do cleanup later in the procedure
3196       new_node.master_candidate = self.master_candidate
3197
3198     # notify the user about any possible mc promotion
3199     if new_node.master_candidate:
3200       self.LogInfo("Node will be a master candidate")
3201
3202     # check connectivity
3203     result = self.rpc.call_version([node])[node]
3204     result.Raise("Can't get version information from node %s" % node)
3205     if constants.PROTOCOL_VERSION == result.payload:
3206       logging.info("Communication to node %s fine, sw version %s match",
3207                    node, result.payload)
3208     else:
3209       raise errors.OpExecError("Version mismatch master version %s,"
3210                                " node version %s" %
3211                                (constants.PROTOCOL_VERSION, result.payload))
3212
3213     # setup ssh on node
3214     if self.cfg.GetClusterInfo().modify_ssh_setup:
3215       logging.info("Copy ssh key to node %s", node)
3216       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3217       keyarray = []
3218       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3219                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3220                   priv_key, pub_key]
3221
3222       for i in keyfiles:
3223         keyarray.append(utils.ReadFile(i))
3224
3225       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3226                                       keyarray[2], keyarray[3], keyarray[4],
3227                                       keyarray[5])
3228       result.Raise("Cannot transfer ssh keys to the new node")
3229
3230     # Add node to our /etc/hosts, and add key to known_hosts
3231     if self.cfg.GetClusterInfo().modify_etc_hosts:
3232       utils.AddHostToEtcHosts(new_node.name)
3233
3234     if new_node.secondary_ip != new_node.primary_ip:
3235       result = self.rpc.call_node_has_ip_address(new_node.name,
3236                                                  new_node.secondary_ip)
3237       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3238                    prereq=True, ecode=errors.ECODE_ENVIRON)
3239       if not result.payload:
3240         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3241                                  " you gave (%s). Please fix and re-run this"
3242                                  " command." % new_node.secondary_ip)
3243
3244     node_verify_list = [self.cfg.GetMasterNode()]
3245     node_verify_param = {
3246       constants.NV_NODELIST: [node],
3247       # TODO: do a node-net-test as well?
3248     }
3249
3250     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3251                                        self.cfg.GetClusterName())
3252     for verifier in node_verify_list:
3253       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3254       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3255       if nl_payload:
3256         for failed in nl_payload:
3257           feedback_fn("ssh/hostname verification failed"
3258                       " (checking from %s): %s" %
3259                       (verifier, nl_payload[failed]))
3260         raise errors.OpExecError("ssh/hostname verification failed.")
3261
3262     if self.op.readd:
3263       _RedistributeAncillaryFiles(self)
3264       self.context.ReaddNode(new_node)
3265       # make sure we redistribute the config
3266       self.cfg.Update(new_node, feedback_fn)
3267       # and make sure the new node will not have old files around
3268       if not new_node.master_candidate:
3269         result = self.rpc.call_node_demote_from_mc(new_node.name)
3270         msg = result.fail_msg
3271         if msg:
3272           self.LogWarning("Node failed to demote itself from master"
3273                           " candidate status: %s" % msg)
3274     else:
3275       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3276       self.context.AddNode(new_node, self.proc.GetECId())
3277
3278
3279 class LUSetNodeParams(LogicalUnit):
3280   """Modifies the parameters of a node.
3281
3282   """
3283   HPATH = "node-modify"
3284   HTYPE = constants.HTYPE_NODE
3285   _OP_REQP = ["node_name"]
3286   REQ_BGL = False
3287
3288   def CheckArguments(self):
3289     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3290     _CheckBooleanOpField(self.op, 'master_candidate')
3291     _CheckBooleanOpField(self.op, 'offline')
3292     _CheckBooleanOpField(self.op, 'drained')
3293     _CheckBooleanOpField(self.op, 'auto_promote')
3294     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3295     if all_mods.count(None) == 3:
3296       raise errors.OpPrereqError("Please pass at least one modification",
3297                                  errors.ECODE_INVAL)
3298     if all_mods.count(True) > 1:
3299       raise errors.OpPrereqError("Can't set the node into more than one"
3300                                  " state at the same time",
3301                                  errors.ECODE_INVAL)
3302
3303     # Boolean value that tells us whether we're offlining or draining the node
3304     self.offline_or_drain = (self.op.offline == True or
3305                              self.op.drained == True)
3306     self.deoffline_or_drain = (self.op.offline == False or
3307                                self.op.drained == False)
3308     self.might_demote = (self.op.master_candidate == False or
3309                          self.offline_or_drain)
3310
3311     self.lock_all = self.op.auto_promote and self.might_demote
3312
3313
3314   def ExpandNames(self):
3315     if self.lock_all:
3316       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3317     else:
3318       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3319
3320   def BuildHooksEnv(self):
3321     """Build hooks env.
3322
3323     This runs on the master node.
3324
3325     """
3326     env = {
3327       "OP_TARGET": self.op.node_name,
3328       "MASTER_CANDIDATE": str(self.op.master_candidate),
3329       "OFFLINE": str(self.op.offline),
3330       "DRAINED": str(self.op.drained),
3331       }
3332     nl = [self.cfg.GetMasterNode(),
3333           self.op.node_name]
3334     return env, nl, nl
3335
3336   def CheckPrereq(self):
3337     """Check prerequisites.
3338
3339     This only checks the instance list against the existing names.
3340
3341     """
3342     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3343
3344     if (self.op.master_candidate is not None or
3345         self.op.drained is not None or
3346         self.op.offline is not None):
3347       # we can't change the master's node flags
3348       if self.op.node_name == self.cfg.GetMasterNode():
3349         raise errors.OpPrereqError("The master role can be changed"
3350                                    " only via masterfailover",
3351                                    errors.ECODE_INVAL)
3352
3353
3354     if node.master_candidate and self.might_demote and not self.lock_all:
3355       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3356       # check if after removing the current node, we're missing master
3357       # candidates
3358       (mc_remaining, mc_should, _) = \
3359           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3360       if mc_remaining < mc_should:
3361         raise errors.OpPrereqError("Not enough master candidates, please"
3362                                    " pass auto_promote to allow promotion",
3363                                    errors.ECODE_INVAL)
3364
3365     if (self.op.master_candidate == True and
3366         ((node.offline and not self.op.offline == False) or
3367          (node.drained and not self.op.drained == False))):
3368       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3369                                  " to master_candidate" % node.name,
3370                                  errors.ECODE_INVAL)
3371
3372     # If we're being deofflined/drained, we'll MC ourself if needed
3373     if (self.deoffline_or_drain and not self.offline_or_drain and not
3374         self.op.master_candidate == True and not node.master_candidate):
3375       self.op.master_candidate = _DecideSelfPromotion(self)
3376       if self.op.master_candidate:
3377         self.LogInfo("Autopromoting node to master candidate")
3378
3379     return
3380
3381   def Exec(self, feedback_fn):
3382     """Modifies a node.
3383
3384     """
3385     node = self.node
3386
3387     result = []
3388     changed_mc = False
3389
3390     if self.op.offline is not None:
3391       node.offline = self.op.offline
3392       result.append(("offline", str(self.op.offline)))
3393       if self.op.offline == True:
3394         if node.master_candidate:
3395           node.master_candidate = False
3396           changed_mc = True
3397           result.append(("master_candidate", "auto-demotion due to offline"))
3398         if node.drained:
3399           node.drained = False
3400           result.append(("drained", "clear drained status due to offline"))
3401
3402     if self.op.master_candidate is not None:
3403       node.master_candidate = self.op.master_candidate
3404       changed_mc = True
3405       result.append(("master_candidate", str(self.op.master_candidate)))
3406       if self.op.master_candidate == False:
3407         rrc = self.rpc.call_node_demote_from_mc(node.name)
3408         msg = rrc.fail_msg
3409         if msg:
3410           self.LogWarning("Node failed to demote itself: %s" % msg)
3411
3412     if self.op.drained is not None:
3413       node.drained = self.op.drained
3414       result.append(("drained", str(self.op.drained)))
3415       if self.op.drained == True:
3416         if node.master_candidate:
3417           node.master_candidate = False
3418           changed_mc = True
3419           result.append(("master_candidate", "auto-demotion due to drain"))
3420           rrc = self.rpc.call_node_demote_from_mc(node.name)
3421           msg = rrc.fail_msg
3422           if msg:
3423             self.LogWarning("Node failed to demote itself: %s" % msg)
3424         if node.offline:
3425           node.offline = False
3426           result.append(("offline", "clear offline status due to drain"))
3427
3428     # we locked all nodes, we adjust the CP before updating this node
3429     if self.lock_all:
3430       _AdjustCandidatePool(self, [node.name])
3431
3432     # this will trigger configuration file update, if needed
3433     self.cfg.Update(node, feedback_fn)
3434
3435     # this will trigger job queue propagation or cleanup
3436     if changed_mc:
3437       self.context.ReaddNode(node)
3438
3439     return result
3440
3441
3442 class LUPowercycleNode(NoHooksLU):
3443   """Powercycles a node.
3444
3445   """
3446   _OP_REQP = ["node_name", "force"]
3447   REQ_BGL = False
3448
3449   def CheckArguments(self):
3450     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3451     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3452       raise errors.OpPrereqError("The node is the master and the force"
3453                                  " parameter was not set",
3454                                  errors.ECODE_INVAL)
3455
3456   def ExpandNames(self):
3457     """Locking for PowercycleNode.
3458
3459     This is a last-resort option and shouldn't block on other
3460     jobs. Therefore, we grab no locks.
3461
3462     """
3463     self.needed_locks = {}
3464
3465   def CheckPrereq(self):
3466     """Check prerequisites.
3467
3468     This LU has no prereqs.
3469
3470     """
3471     pass
3472
3473   def Exec(self, feedback_fn):
3474     """Reboots a node.
3475
3476     """
3477     result = self.rpc.call_node_powercycle(self.op.node_name,
3478                                            self.cfg.GetHypervisorType())
3479     result.Raise("Failed to schedule the reboot")
3480     return result.payload
3481
3482
3483 class LUQueryClusterInfo(NoHooksLU):
3484   """Query cluster configuration.
3485
3486   """
3487   _OP_REQP = []
3488   REQ_BGL = False
3489
3490   def ExpandNames(self):
3491     self.needed_locks = {}
3492
3493   def CheckPrereq(self):
3494     """No prerequsites needed for this LU.
3495
3496     """
3497     pass
3498
3499   def Exec(self, feedback_fn):
3500     """Return cluster config.
3501
3502     """
3503     cluster = self.cfg.GetClusterInfo()
3504     os_hvp = {}
3505
3506     # Filter just for enabled hypervisors
3507     for os_name, hv_dict in cluster.os_hvp.items():
3508       os_hvp[os_name] = {}
3509       for hv_name, hv_params in hv_dict.items():
3510         if hv_name in cluster.enabled_hypervisors:
3511           os_hvp[os_name][hv_name] = hv_params
3512
3513     result = {
3514       "software_version": constants.RELEASE_VERSION,
3515       "protocol_version": constants.PROTOCOL_VERSION,
3516       "config_version": constants.CONFIG_VERSION,
3517       "os_api_version": max(constants.OS_API_VERSIONS),
3518       "export_version": constants.EXPORT_VERSION,
3519       "architecture": (platform.architecture()[0], platform.machine()),
3520       "name": cluster.cluster_name,
3521       "master": cluster.master_node,
3522       "default_hypervisor": cluster.enabled_hypervisors[0],
3523       "enabled_hypervisors": cluster.enabled_hypervisors,
3524       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3525                         for hypervisor_name in cluster.enabled_hypervisors]),
3526       "os_hvp": os_hvp,
3527       "beparams": cluster.beparams,
3528       "nicparams": cluster.nicparams,
3529       "candidate_pool_size": cluster.candidate_pool_size,
3530       "master_netdev": cluster.master_netdev,
3531       "volume_group_name": cluster.volume_group_name,
3532       "file_storage_dir": cluster.file_storage_dir,
3533       "ctime": cluster.ctime,
3534       "mtime": cluster.mtime,
3535       "uuid": cluster.uuid,
3536       "tags": list(cluster.GetTags()),
3537       }
3538
3539     return result
3540
3541
3542 class LUQueryConfigValues(NoHooksLU):
3543   """Return configuration values.
3544
3545   """
3546   _OP_REQP = []
3547   REQ_BGL = False
3548   _FIELDS_DYNAMIC = utils.FieldSet()
3549   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3550                                   "watcher_pause")
3551
3552   def ExpandNames(self):
3553     self.needed_locks = {}
3554
3555     _CheckOutputFields(static=self._FIELDS_STATIC,
3556                        dynamic=self._FIELDS_DYNAMIC,
3557                        selected=self.op.output_fields)
3558
3559   def CheckPrereq(self):
3560     """No prerequisites.
3561
3562     """
3563     pass
3564
3565   def Exec(self, feedback_fn):
3566     """Dump a representation of the cluster config to the standard output.
3567
3568     """
3569     values = []
3570     for field in self.op.output_fields:
3571       if field == "cluster_name":
3572         entry = self.cfg.GetClusterName()
3573       elif field == "master_node":
3574         entry = self.cfg.GetMasterNode()
3575       elif field == "drain_flag":
3576         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3577       elif field == "watcher_pause":
3578         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3579       else:
3580         raise errors.ParameterError(field)
3581       values.append(entry)
3582     return values
3583
3584
3585 class LUActivateInstanceDisks(NoHooksLU):
3586   """Bring up an instance's disks.
3587
3588   """
3589   _OP_REQP = ["instance_name"]
3590   REQ_BGL = False
3591
3592   def ExpandNames(self):
3593     self._ExpandAndLockInstance()
3594     self.needed_locks[locking.LEVEL_NODE] = []
3595     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3596
3597   def DeclareLocks(self, level):
3598     if level == locking.LEVEL_NODE:
3599       self._LockInstancesNodes()
3600
3601   def CheckPrereq(self):
3602     """Check prerequisites.
3603
3604     This checks that the instance is in the cluster.
3605
3606     """
3607     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3608     assert self.instance is not None, \
3609       "Cannot retrieve locked instance %s" % self.op.instance_name
3610     _CheckNodeOnline(self, self.instance.primary_node)
3611     if not hasattr(self.op, "ignore_size"):
3612       self.op.ignore_size = False
3613
3614   def Exec(self, feedback_fn):
3615     """Activate the disks.
3616
3617     """
3618     disks_ok, disks_info = \
3619               _AssembleInstanceDisks(self, self.instance,
3620                                      ignore_size=self.op.ignore_size)
3621     if not disks_ok:
3622       raise errors.OpExecError("Cannot activate block devices")
3623
3624     return disks_info
3625
3626
3627 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3628                            ignore_size=False):
3629   """Prepare the block devices for an instance.
3630
3631   This sets up the block devices on all nodes.
3632
3633   @type lu: L{LogicalUnit}
3634   @param lu: the logical unit on whose behalf we execute
3635   @type instance: L{objects.Instance}
3636   @param instance: the instance for whose disks we assemble
3637   @type ignore_secondaries: boolean
3638   @param ignore_secondaries: if true, errors on secondary nodes
3639       won't result in an error return from the function
3640   @type ignore_size: boolean
3641   @param ignore_size: if true, the current known size of the disk
3642       will not be used during the disk activation, useful for cases
3643       when the size is wrong
3644   @return: False if the operation failed, otherwise a list of
3645       (host, instance_visible_name, node_visible_name)
3646       with the mapping from node devices to instance devices
3647
3648   """
3649   device_info = []
3650   disks_ok = True
3651   iname = instance.name
3652   # With the two passes mechanism we try to reduce the window of
3653   # opportunity for the race condition of switching DRBD to primary
3654   # before handshaking occured, but we do not eliminate it
3655
3656   # The proper fix would be to wait (with some limits) until the
3657   # connection has been made and drbd transitions from WFConnection
3658   # into any other network-connected state (Connected, SyncTarget,
3659   # SyncSource, etc.)
3660
3661   # 1st pass, assemble on all nodes in secondary mode
3662   for inst_disk in instance.disks:
3663     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3664       if ignore_size:
3665         node_disk = node_disk.Copy()
3666         node_disk.UnsetSize()
3667       lu.cfg.SetDiskID(node_disk, node)
3668       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3669       msg = result.fail_msg
3670       if msg:
3671         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3672                            " (is_primary=False, pass=1): %s",
3673                            inst_disk.iv_name, node, msg)
3674         if not ignore_secondaries:
3675           disks_ok = False
3676
3677   # FIXME: race condition on drbd migration to primary
3678
3679   # 2nd pass, do only the primary node
3680   for inst_disk in instance.disks:
3681     dev_path = None
3682
3683     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3684       if node != instance.primary_node:
3685         continue
3686       if ignore_size:
3687         node_disk = node_disk.Copy()
3688         node_disk.UnsetSize()
3689       lu.cfg.SetDiskID(node_disk, node)
3690       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3691       msg = result.fail_msg
3692       if msg:
3693         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3694                            " (is_primary=True, pass=2): %s",
3695                            inst_disk.iv_name, node, msg)
3696         disks_ok = False
3697       else:
3698         dev_path = result.payload
3699
3700     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3701
3702   # leave the disks configured for the primary node
3703   # this is a workaround that would be fixed better by
3704   # improving the logical/physical id handling
3705   for disk in instance.disks:
3706     lu.cfg.SetDiskID(disk, instance.primary_node)
3707
3708   return disks_ok, device_info
3709
3710
3711 def _StartInstanceDisks(lu, instance, force):
3712   """Start the disks of an instance.
3713
3714   """
3715   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3716                                            ignore_secondaries=force)
3717   if not disks_ok:
3718     _ShutdownInstanceDisks(lu, instance)
3719     if force is not None and not force:
3720       lu.proc.LogWarning("", hint="If the message above refers to a"
3721                          " secondary node,"
3722                          " you can retry the operation using '--force'.")
3723     raise errors.OpExecError("Disk consistency error")
3724
3725
3726 class LUDeactivateInstanceDisks(NoHooksLU):
3727   """Shutdown an instance's disks.
3728
3729   """
3730   _OP_REQP = ["instance_name"]
3731   REQ_BGL = False
3732
3733   def ExpandNames(self):
3734     self._ExpandAndLockInstance()
3735     self.needed_locks[locking.LEVEL_NODE] = []
3736     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3737
3738   def DeclareLocks(self, level):
3739     if level == locking.LEVEL_NODE:
3740       self._LockInstancesNodes()
3741
3742   def CheckPrereq(self):
3743     """Check prerequisites.
3744
3745     This checks that the instance is in the cluster.
3746
3747     """
3748     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3749     assert self.instance is not None, \
3750       "Cannot retrieve locked instance %s" % self.op.instance_name
3751
3752   def Exec(self, feedback_fn):
3753     """Deactivate the disks
3754
3755     """
3756     instance = self.instance
3757     _SafeShutdownInstanceDisks(self, instance)
3758
3759
3760 def _SafeShutdownInstanceDisks(lu, instance):
3761   """Shutdown block devices of an instance.
3762
3763   This function checks if an instance is running, before calling
3764   _ShutdownInstanceDisks.
3765
3766   """
3767   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3768   _ShutdownInstanceDisks(lu, instance)
3769
3770
3771 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3772   """Shutdown block devices of an instance.
3773
3774   This does the shutdown on all nodes of the instance.
3775
3776   If the ignore_primary is false, errors on the primary node are
3777   ignored.
3778
3779   """
3780   all_result = True
3781   for disk in instance.disks:
3782     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3783       lu.cfg.SetDiskID(top_disk, node)
3784       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3785       msg = result.fail_msg
3786       if msg:
3787         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3788                       disk.iv_name, node, msg)
3789         if not ignore_primary or node != instance.primary_node:
3790           all_result = False
3791   return all_result
3792
3793
3794 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3795   """Checks if a node has enough free memory.
3796
3797   This function check if a given node has the needed amount of free
3798   memory. In case the node has less memory or we cannot get the
3799   information from the node, this function raise an OpPrereqError
3800   exception.
3801
3802   @type lu: C{LogicalUnit}
3803   @param lu: a logical unit from which we get configuration data
3804   @type node: C{str}
3805   @param node: the node to check
3806   @type reason: C{str}
3807   @param reason: string to use in the error message
3808   @type requested: C{int}
3809   @param requested: the amount of memory in MiB to check for
3810   @type hypervisor_name: C{str}
3811   @param hypervisor_name: the hypervisor to ask for memory stats
3812   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3813       we cannot check the node
3814
3815   """
3816   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3817   nodeinfo[node].Raise("Can't get data from node %s" % node,
3818                        prereq=True, ecode=errors.ECODE_ENVIRON)
3819   free_mem = nodeinfo[node].payload.get('memory_free', None)
3820   if not isinstance(free_mem, int):
3821     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3822                                " was '%s'" % (node, free_mem),
3823                                errors.ECODE_ENVIRON)
3824   if requested > free_mem:
3825     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3826                                " needed %s MiB, available %s MiB" %
3827                                (node, reason, requested, free_mem),
3828                                errors.ECODE_NORES)
3829
3830
3831 def _CheckNodesFreeDisk(lu, nodenames, requested):
3832   """Checks if nodes have enough free disk space in the default VG.
3833
3834   This function check if all given nodes have the needed amount of
3835   free disk. In case any node has less disk or we cannot get the
3836   information from the node, this function raise an OpPrereqError
3837   exception.
3838
3839   @type lu: C{LogicalUnit}
3840   @param lu: a logical unit from which we get configuration data
3841   @type nodenames: C{list}
3842   @param node: the list of node names to check
3843   @type requested: C{int}
3844   @param requested: the amount of disk in MiB to check for
3845   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
3846       we cannot check the node
3847
3848   """
3849   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3850                                    lu.cfg.GetHypervisorType())
3851   for node in nodenames:
3852     info = nodeinfo[node]
3853     info.Raise("Cannot get current information from node %s" % node,
3854                prereq=True, ecode=errors.ECODE_ENVIRON)
3855     vg_free = info.payload.get("vg_free", None)
3856     if not isinstance(vg_free, int):
3857       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
3858                                  " result was '%s'" % (node, vg_free),
3859                                  errors.ECODE_ENVIRON)
3860     if requested > vg_free:
3861       raise errors.OpPrereqError("Not enough disk space on target node %s:"
3862                                  " required %d MiB, available %d MiB" %
3863                                  (node, requested, vg_free),
3864                                  errors.ECODE_NORES)
3865
3866
3867 class LUStartupInstance(LogicalUnit):
3868   """Starts an instance.
3869
3870   """
3871   HPATH = "instance-start"
3872   HTYPE = constants.HTYPE_INSTANCE
3873   _OP_REQP = ["instance_name", "force"]
3874   REQ_BGL = False
3875
3876   def ExpandNames(self):
3877     self._ExpandAndLockInstance()
3878
3879   def BuildHooksEnv(self):
3880     """Build hooks env.
3881
3882     This runs on master, primary and secondary nodes of the instance.
3883
3884     """
3885     env = {
3886       "FORCE": self.op.force,
3887       }
3888     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3889     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3890     return env, nl, nl
3891
3892   def CheckPrereq(self):
3893     """Check prerequisites.
3894
3895     This checks that the instance is in the cluster.
3896
3897     """
3898     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3899     assert self.instance is not None, \
3900       "Cannot retrieve locked instance %s" % self.op.instance_name
3901
3902     # extra beparams
3903     self.beparams = getattr(self.op, "beparams", {})
3904     if self.beparams:
3905       if not isinstance(self.beparams, dict):
3906         raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3907                                    " dict" % (type(self.beparams), ),
3908                                    errors.ECODE_INVAL)
3909       # fill the beparams dict
3910       utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3911       self.op.beparams = self.beparams
3912
3913     # extra hvparams
3914     self.hvparams = getattr(self.op, "hvparams", {})
3915     if self.hvparams:
3916       if not isinstance(self.hvparams, dict):
3917         raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3918                                    " dict" % (type(self.hvparams), ),
3919                                    errors.ECODE_INVAL)
3920
3921       # check hypervisor parameter syntax (locally)
3922       cluster = self.cfg.GetClusterInfo()
3923       utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3924       filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3925                                     instance.hvparams)
3926       filled_hvp.update(self.hvparams)
3927       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3928       hv_type.CheckParameterSyntax(filled_hvp)
3929       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3930       self.op.hvparams = self.hvparams
3931
3932     _CheckNodeOnline(self, instance.primary_node)
3933
3934     bep = self.cfg.GetClusterInfo().FillBE(instance)
3935     # check bridges existence
3936     _CheckInstanceBridgesExist(self, instance)
3937
3938     remote_info = self.rpc.call_instance_info(instance.primary_node,
3939                                               instance.name,
3940                                               instance.hypervisor)
3941     remote_info.Raise("Error checking node %s" % instance.primary_node,
3942                       prereq=True, ecode=errors.ECODE_ENVIRON)
3943     if not remote_info.payload: # not running already
3944       _CheckNodeFreeMemory(self, instance.primary_node,
3945                            "starting instance %s" % instance.name,
3946                            bep[constants.BE_MEMORY], instance.hypervisor)
3947
3948   def Exec(self, feedback_fn):
3949     """Start the instance.
3950
3951     """
3952     instance = self.instance
3953     force = self.op.force
3954
3955     self.cfg.MarkInstanceUp(instance.name)
3956
3957     node_current = instance.primary_node
3958
3959     _StartInstanceDisks(self, instance, force)
3960
3961     result = self.rpc.call_instance_start(node_current, instance,
3962                                           self.hvparams, self.beparams)
3963     msg = result.fail_msg
3964     if msg:
3965       _ShutdownInstanceDisks(self, instance)
3966       raise errors.OpExecError("Could not start instance: %s" % msg)
3967
3968
3969 class LURebootInstance(LogicalUnit):
3970   """Reboot an instance.
3971
3972   """
3973   HPATH = "instance-reboot"
3974   HTYPE = constants.HTYPE_INSTANCE
3975   _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3976   REQ_BGL = False
3977
3978   def CheckArguments(self):
3979     """Check the arguments.
3980
3981     """
3982     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3983                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
3984
3985   def ExpandNames(self):
3986     if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3987                                    constants.INSTANCE_REBOOT_HARD,
3988                                    constants.INSTANCE_REBOOT_FULL]:
3989       raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3990                                   (constants.INSTANCE_REBOOT_SOFT,
3991                                    constants.INSTANCE_REBOOT_HARD,
3992                                    constants.INSTANCE_REBOOT_FULL))
3993     self._ExpandAndLockInstance()
3994
3995   def BuildHooksEnv(self):
3996     """Build hooks env.
3997
3998     This runs on master, primary and secondary nodes of the instance.
3999
4000     """
4001     env = {
4002       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4003       "REBOOT_TYPE": self.op.reboot_type,
4004       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4005       }
4006     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4007     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4008     return env, nl, nl
4009
4010   def CheckPrereq(self):
4011     """Check prerequisites.
4012
4013     This checks that the instance is in the cluster.
4014
4015     """
4016     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4017     assert self.instance is not None, \
4018       "Cannot retrieve locked instance %s" % self.op.instance_name
4019
4020     _CheckNodeOnline(self, instance.primary_node)
4021
4022     # check bridges existence
4023     _CheckInstanceBridgesExist(self, instance)
4024
4025   def Exec(self, feedback_fn):
4026     """Reboot the instance.
4027
4028     """
4029     instance = self.instance
4030     ignore_secondaries = self.op.ignore_secondaries
4031     reboot_type = self.op.reboot_type
4032
4033     node_current = instance.primary_node
4034
4035     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4036                        constants.INSTANCE_REBOOT_HARD]:
4037       for disk in instance.disks:
4038         self.cfg.SetDiskID(disk, node_current)
4039       result = self.rpc.call_instance_reboot(node_current, instance,
4040                                              reboot_type,
4041                                              self.shutdown_timeout)
4042       result.Raise("Could not reboot instance")
4043     else:
4044       result = self.rpc.call_instance_shutdown(node_current, instance,
4045                                                self.shutdown_timeout)
4046       result.Raise("Could not shutdown instance for full reboot")
4047       _ShutdownInstanceDisks(self, instance)
4048       _StartInstanceDisks(self, instance, ignore_secondaries)
4049       result = self.rpc.call_instance_start(node_current, instance, None, None)
4050       msg = result.fail_msg
4051       if msg:
4052         _ShutdownInstanceDisks(self, instance)
4053         raise errors.OpExecError("Could not start instance for"
4054                                  " full reboot: %s" % msg)
4055
4056     self.cfg.MarkInstanceUp(instance.name)
4057
4058
4059 class LUShutdownInstance(LogicalUnit):
4060   """Shutdown an instance.
4061
4062   """
4063   HPATH = "instance-stop"
4064   HTYPE = constants.HTYPE_INSTANCE
4065   _OP_REQP = ["instance_name"]
4066   REQ_BGL = False
4067
4068   def CheckArguments(self):
4069     """Check the arguments.
4070
4071     """
4072     self.timeout = getattr(self.op, "timeout",
4073                            constants.DEFAULT_SHUTDOWN_TIMEOUT)
4074
4075   def ExpandNames(self):
4076     self._ExpandAndLockInstance()
4077
4078   def BuildHooksEnv(self):
4079     """Build hooks env.
4080
4081     This runs on master, primary and secondary nodes of the instance.
4082
4083     """
4084     env = _BuildInstanceHookEnvByObject(self, self.instance)
4085     env["TIMEOUT"] = self.timeout
4086     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4087     return env, nl, nl
4088
4089   def CheckPrereq(self):
4090     """Check prerequisites.
4091
4092     This checks that the instance is in the cluster.
4093
4094     """
4095     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4096     assert self.instance is not None, \
4097       "Cannot retrieve locked instance %s" % self.op.instance_name
4098     _CheckNodeOnline(self, self.instance.primary_node)
4099
4100   def Exec(self, feedback_fn):
4101     """Shutdown the instance.
4102
4103     """
4104     instance = self.instance
4105     node_current = instance.primary_node
4106     timeout = self.timeout
4107     self.cfg.MarkInstanceDown(instance.name)
4108     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4109     msg = result.fail_msg
4110     if msg:
4111       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4112
4113     _ShutdownInstanceDisks(self, instance)
4114
4115
4116 class LUReinstallInstance(LogicalUnit):
4117   """Reinstall an instance.
4118
4119   """
4120   HPATH = "instance-reinstall"
4121   HTYPE = constants.HTYPE_INSTANCE
4122   _OP_REQP = ["instance_name"]
4123   REQ_BGL = False
4124
4125   def ExpandNames(self):
4126     self._ExpandAndLockInstance()
4127
4128   def BuildHooksEnv(self):
4129     """Build hooks env.
4130
4131     This runs on master, primary and secondary nodes of the instance.
4132
4133     """
4134     env = _BuildInstanceHookEnvByObject(self, self.instance)
4135     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4136     return env, nl, nl
4137
4138   def CheckPrereq(self):
4139     """Check prerequisites.
4140
4141     This checks that the instance is in the cluster and is not running.
4142
4143     """
4144     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4145     assert instance is not None, \
4146       "Cannot retrieve locked instance %s" % self.op.instance_name
4147     _CheckNodeOnline(self, instance.primary_node)
4148
4149     if instance.disk_template == constants.DT_DISKLESS:
4150       raise errors.OpPrereqError("Instance '%s' has no disks" %
4151                                  self.op.instance_name,
4152                                  errors.ECODE_INVAL)
4153     _CheckInstanceDown(self, instance, "cannot reinstall")
4154
4155     self.op.os_type = getattr(self.op, "os_type", None)
4156     self.op.force_variant = getattr(self.op, "force_variant", False)
4157     if self.op.os_type is not None:
4158       # OS verification
4159       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4160       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4161
4162     self.instance = instance
4163
4164   def Exec(self, feedback_fn):
4165     """Reinstall the instance.
4166
4167     """
4168     inst = self.instance
4169
4170     if self.op.os_type is not None:
4171       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4172       inst.os = self.op.os_type
4173       self.cfg.Update(inst, feedback_fn)
4174
4175     _StartInstanceDisks(self, inst, None)
4176     try:
4177       feedback_fn("Running the instance OS create scripts...")
4178       # FIXME: pass debug option from opcode to backend
4179       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4180                                              self.op.debug_level)
4181       result.Raise("Could not install OS for instance %s on node %s" %
4182                    (inst.name, inst.primary_node))
4183     finally:
4184       _ShutdownInstanceDisks(self, inst)
4185
4186
4187 class LURecreateInstanceDisks(LogicalUnit):
4188   """Recreate an instance's missing disks.
4189
4190   """
4191   HPATH = "instance-recreate-disks"
4192   HTYPE = constants.HTYPE_INSTANCE
4193   _OP_REQP = ["instance_name", "disks"]
4194   REQ_BGL = False
4195
4196   def CheckArguments(self):
4197     """Check the arguments.
4198
4199     """
4200     if not isinstance(self.op.disks, list):
4201       raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4202     for item in self.op.disks:
4203       if (not isinstance(item, int) or
4204           item < 0):
4205         raise errors.OpPrereqError("Invalid disk specification '%s'" %
4206                                    str(item), errors.ECODE_INVAL)
4207
4208   def ExpandNames(self):
4209     self._ExpandAndLockInstance()
4210
4211   def BuildHooksEnv(self):
4212     """Build hooks env.
4213
4214     This runs on master, primary and secondary nodes of the instance.
4215
4216     """
4217     env = _BuildInstanceHookEnvByObject(self, self.instance)
4218     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4219     return env, nl, nl
4220
4221   def CheckPrereq(self):
4222     """Check prerequisites.
4223
4224     This checks that the instance is in the cluster and is not running.
4225
4226     """
4227     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4228     assert instance is not None, \
4229       "Cannot retrieve locked instance %s" % self.op.instance_name
4230     _CheckNodeOnline(self, instance.primary_node)
4231
4232     if instance.disk_template == constants.DT_DISKLESS:
4233       raise errors.OpPrereqError("Instance '%s' has no disks" %
4234                                  self.op.instance_name, errors.ECODE_INVAL)
4235     _CheckInstanceDown(self, instance, "cannot recreate disks")
4236
4237     if not self.op.disks:
4238       self.op.disks = range(len(instance.disks))
4239     else:
4240       for idx in self.op.disks:
4241         if idx >= len(instance.disks):
4242           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4243                                      errors.ECODE_INVAL)
4244
4245     self.instance = instance
4246
4247   def Exec(self, feedback_fn):
4248     """Recreate the disks.
4249
4250     """
4251     to_skip = []
4252     for idx, _ in enumerate(self.instance.disks):
4253       if idx not in self.op.disks: # disk idx has not been passed in
4254         to_skip.append(idx)
4255         continue
4256
4257     _CreateDisks(self, self.instance, to_skip=to_skip)
4258
4259
4260 class LURenameInstance(LogicalUnit):
4261   """Rename an instance.
4262
4263   """
4264   HPATH = "instance-rename"
4265   HTYPE = constants.HTYPE_INSTANCE
4266   _OP_REQP = ["instance_name", "new_name"]
4267
4268   def BuildHooksEnv(self):
4269     """Build hooks env.
4270
4271     This runs on master, primary and secondary nodes of the instance.
4272
4273     """
4274     env = _BuildInstanceHookEnvByObject(self, self.instance)
4275     env["INSTANCE_NEW_NAME"] = self.op.new_name
4276     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4277     return env, nl, nl
4278
4279   def CheckPrereq(self):
4280     """Check prerequisites.
4281
4282     This checks that the instance is in the cluster and is not running.
4283
4284     """
4285     self.op.instance_name = _ExpandInstanceName(self.cfg,
4286                                                 self.op.instance_name)
4287     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4288     assert instance is not None
4289     _CheckNodeOnline(self, instance.primary_node)
4290     _CheckInstanceDown(self, instance, "cannot rename")
4291     self.instance = instance
4292
4293     # new name verification
4294     name_info = utils.GetHostInfo(self.op.new_name)
4295
4296     self.op.new_name = new_name = name_info.name
4297     instance_list = self.cfg.GetInstanceList()
4298     if new_name in instance_list:
4299       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4300                                  new_name, errors.ECODE_EXISTS)
4301
4302     if not getattr(self.op, "ignore_ip", False):
4303       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4304         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4305                                    (name_info.ip, new_name),
4306                                    errors.ECODE_NOTUNIQUE)
4307
4308
4309   def Exec(self, feedback_fn):
4310     """Reinstall the instance.
4311
4312     """
4313     inst = self.instance
4314     old_name = inst.name
4315
4316     if inst.disk_template == constants.DT_FILE:
4317       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4318
4319     self.cfg.RenameInstance(inst.name, self.op.new_name)
4320     # Change the instance lock. This is definitely safe while we hold the BGL
4321     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4322     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4323
4324     # re-read the instance from the configuration after rename
4325     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4326
4327     if inst.disk_template == constants.DT_FILE:
4328       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4329       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4330                                                      old_file_storage_dir,
4331                                                      new_file_storage_dir)
4332       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4333                    " (but the instance has been renamed in Ganeti)" %
4334                    (inst.primary_node, old_file_storage_dir,
4335                     new_file_storage_dir))
4336
4337     _StartInstanceDisks(self, inst, None)
4338     try:
4339       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4340                                                  old_name, self.op.debug_level)
4341       msg = result.fail_msg
4342       if msg:
4343         msg = ("Could not run OS rename script for instance %s on node %s"
4344                " (but the instance has been renamed in Ganeti): %s" %
4345                (inst.name, inst.primary_node, msg))
4346         self.proc.LogWarning(msg)
4347     finally:
4348       _ShutdownInstanceDisks(self, inst)
4349
4350
4351 class LURemoveInstance(LogicalUnit):
4352   """Remove an instance.
4353
4354   """
4355   HPATH = "instance-remove"
4356   HTYPE = constants.HTYPE_INSTANCE
4357   _OP_REQP = ["instance_name", "ignore_failures"]
4358   REQ_BGL = False
4359
4360   def CheckArguments(self):
4361     """Check the arguments.
4362
4363     """
4364     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4365                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4366
4367   def ExpandNames(self):
4368     self._ExpandAndLockInstance()
4369     self.needed_locks[locking.LEVEL_NODE] = []
4370     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4371
4372   def DeclareLocks(self, level):
4373     if level == locking.LEVEL_NODE:
4374       self._LockInstancesNodes()
4375
4376   def BuildHooksEnv(self):
4377     """Build hooks env.
4378
4379     This runs on master, primary and secondary nodes of the instance.
4380
4381     """
4382     env = _BuildInstanceHookEnvByObject(self, self.instance)
4383     env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4384     nl = [self.cfg.GetMasterNode()]
4385     nl_post = list(self.instance.all_nodes) + nl
4386     return env, nl, nl_post
4387
4388   def CheckPrereq(self):
4389     """Check prerequisites.
4390
4391     This checks that the instance is in the cluster.
4392
4393     """
4394     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4395     assert self.instance is not None, \
4396       "Cannot retrieve locked instance %s" % self.op.instance_name
4397
4398   def Exec(self, feedback_fn):
4399     """Remove the instance.
4400
4401     """
4402     instance = self.instance
4403     logging.info("Shutting down instance %s on node %s",
4404                  instance.name, instance.primary_node)
4405
4406     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4407                                              self.shutdown_timeout)
4408     msg = result.fail_msg
4409     if msg:
4410       if self.op.ignore_failures:
4411         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4412       else:
4413         raise errors.OpExecError("Could not shutdown instance %s on"
4414                                  " node %s: %s" %
4415                                  (instance.name, instance.primary_node, msg))
4416
4417     logging.info("Removing block devices for instance %s", instance.name)
4418
4419     if not _RemoveDisks(self, instance):
4420       if self.op.ignore_failures:
4421         feedback_fn("Warning: can't remove instance's disks")
4422       else:
4423         raise errors.OpExecError("Can't remove instance's disks")
4424
4425     logging.info("Removing instance %s out of cluster config", instance.name)
4426
4427     self.cfg.RemoveInstance(instance.name)
4428     self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4429
4430
4431 class LUQueryInstances(NoHooksLU):
4432   """Logical unit for querying instances.
4433
4434   """
4435   # pylint: disable-msg=W0142
4436   _OP_REQP = ["output_fields", "names", "use_locking"]
4437   REQ_BGL = False
4438   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4439                     "serial_no", "ctime", "mtime", "uuid"]
4440   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4441                                     "admin_state",
4442                                     "disk_template", "ip", "mac", "bridge",
4443                                     "nic_mode", "nic_link",
4444                                     "sda_size", "sdb_size", "vcpus", "tags",
4445                                     "network_port", "beparams",
4446                                     r"(disk)\.(size)/([0-9]+)",
4447                                     r"(disk)\.(sizes)", "disk_usage",
4448                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4449                                     r"(nic)\.(bridge)/([0-9]+)",
4450                                     r"(nic)\.(macs|ips|modes|links|bridges)",
4451                                     r"(disk|nic)\.(count)",
4452                                     "hvparams",
4453                                     ] + _SIMPLE_FIELDS +
4454                                   ["hv/%s" % name
4455                                    for name in constants.HVS_PARAMETERS
4456                                    if name not in constants.HVC_GLOBALS] +
4457                                   ["be/%s" % name
4458                                    for name in constants.BES_PARAMETERS])
4459   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4460
4461
4462   def ExpandNames(self):
4463     _CheckOutputFields(static=self._FIELDS_STATIC,
4464                        dynamic=self._FIELDS_DYNAMIC,
4465                        selected=self.op.output_fields)
4466
4467     self.needed_locks = {}
4468     self.share_locks[locking.LEVEL_INSTANCE] = 1
4469     self.share_locks[locking.LEVEL_NODE] = 1
4470
4471     if self.op.names:
4472       self.wanted = _GetWantedInstances(self, self.op.names)
4473     else:
4474       self.wanted = locking.ALL_SET
4475
4476     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4477     self.do_locking = self.do_node_query and self.op.use_locking
4478     if self.do_locking:
4479       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4480       self.needed_locks[locking.LEVEL_NODE] = []
4481       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4482
4483   def DeclareLocks(self, level):
4484     if level == locking.LEVEL_NODE and self.do_locking:
4485       self._LockInstancesNodes()
4486
4487   def CheckPrereq(self):
4488     """Check prerequisites.
4489
4490     """
4491     pass
4492
4493   def Exec(self, feedback_fn):
4494     """Computes the list of nodes and their attributes.
4495
4496     """
4497     # pylint: disable-msg=R0912
4498     # way too many branches here
4499     all_info = self.cfg.GetAllInstancesInfo()
4500     if self.wanted == locking.ALL_SET:
4501       # caller didn't specify instance names, so ordering is not important
4502       if self.do_locking:
4503         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4504       else:
4505         instance_names = all_info.keys()
4506       instance_names = utils.NiceSort(instance_names)
4507     else:
4508       # caller did specify names, so we must keep the ordering
4509       if self.do_locking:
4510         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4511       else:
4512         tgt_set = all_info.keys()
4513       missing = set(self.wanted).difference(tgt_set)
4514       if missing:
4515         raise errors.OpExecError("Some instances were removed before"
4516                                  " retrieving their data: %s" % missing)
4517       instance_names = self.wanted
4518
4519     instance_list = [all_info[iname] for iname in instance_names]
4520
4521     # begin data gathering
4522
4523     nodes = frozenset([inst.primary_node for inst in instance_list])
4524     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4525
4526     bad_nodes = []
4527     off_nodes = []
4528     if self.do_node_query:
4529       live_data = {}
4530       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4531       for name in nodes:
4532         result = node_data[name]
4533         if result.offline:
4534           # offline nodes will be in both lists
4535           off_nodes.append(name)
4536         if result.fail_msg:
4537           bad_nodes.append(name)
4538         else:
4539           if result.payload:
4540             live_data.update(result.payload)
4541           # else no instance is alive
4542     else:
4543       live_data = dict([(name, {}) for name in instance_names])
4544
4545     # end data gathering
4546
4547     HVPREFIX = "hv/"
4548     BEPREFIX = "be/"
4549     output = []
4550     cluster = self.cfg.GetClusterInfo()
4551     for instance in instance_list:
4552       iout = []
4553       i_hv = cluster.FillHV(instance, skip_globals=True)
4554       i_be = cluster.FillBE(instance)
4555       i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4556                                  nic.nicparams) for nic in instance.nics]
4557       for field in self.op.output_fields:
4558         st_match = self._FIELDS_STATIC.Matches(field)
4559         if field in self._SIMPLE_FIELDS:
4560           val = getattr(instance, field)
4561         elif field == "pnode":
4562           val = instance.primary_node
4563         elif field == "snodes":
4564           val = list(instance.secondary_nodes)
4565         elif field == "admin_state":
4566           val = instance.admin_up
4567         elif field == "oper_state":
4568           if instance.primary_node in bad_nodes:
4569             val = None
4570           else:
4571             val = bool(live_data.get(instance.name))
4572         elif field == "status":
4573           if instance.primary_node in off_nodes:
4574             val = "ERROR_nodeoffline"
4575           elif instance.primary_node in bad_nodes:
4576             val = "ERROR_nodedown"
4577           else:
4578             running = bool(live_data.get(instance.name))
4579             if running:
4580               if instance.admin_up:
4581                 val = "running"
4582               else:
4583                 val = "ERROR_up"
4584             else:
4585               if instance.admin_up:
4586                 val = "ERROR_down"
4587               else:
4588                 val = "ADMIN_down"
4589         elif field == "oper_ram":
4590           if instance.primary_node in bad_nodes:
4591             val = None
4592           elif instance.name in live_data:
4593             val = live_data[instance.name].get("memory", "?")
4594           else:
4595             val = "-"
4596         elif field == "vcpus":
4597           val = i_be[constants.BE_VCPUS]
4598         elif field == "disk_template":
4599           val = instance.disk_template
4600         elif field == "ip":
4601           if instance.nics:
4602             val = instance.nics[0].ip
4603           else:
4604             val = None
4605         elif field == "nic_mode":
4606           if instance.nics:
4607             val = i_nicp[0][constants.NIC_MODE]
4608           else:
4609             val = None
4610         elif field == "nic_link":
4611           if instance.nics:
4612             val = i_nicp[0][constants.NIC_LINK]
4613           else:
4614             val = None
4615         elif field == "bridge":
4616           if (instance.nics and
4617               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4618             val = i_nicp[0][constants.NIC_LINK]
4619           else:
4620             val = None
4621         elif field == "mac":
4622           if instance.nics:
4623             val = instance.nics[0].mac
4624           else:
4625             val = None
4626         elif field == "sda_size" or field == "sdb_size":
4627           idx = ord(field[2]) - ord('a')
4628           try:
4629             val = instance.FindDisk(idx).size
4630           except errors.OpPrereqError:
4631             val = None
4632         elif field == "disk_usage": # total disk usage per node
4633           disk_sizes = [{'size': disk.size} for disk in instance.disks]
4634           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4635         elif field == "tags":
4636           val = list(instance.GetTags())
4637         elif field == "hvparams":
4638           val = i_hv
4639         elif (field.startswith(HVPREFIX) and
4640               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4641               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4642           val = i_hv.get(field[len(HVPREFIX):], None)
4643         elif field == "beparams":
4644           val = i_be
4645         elif (field.startswith(BEPREFIX) and
4646               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4647           val = i_be.get(field[len(BEPREFIX):], None)
4648         elif st_match and st_match.groups():
4649           # matches a variable list
4650           st_groups = st_match.groups()
4651           if st_groups and st_groups[0] == "disk":
4652             if st_groups[1] == "count":
4653               val = len(instance.disks)
4654             elif st_groups[1] == "sizes":
4655               val = [disk.size for disk in instance.disks]
4656             elif st_groups[1] == "size":
4657               try:
4658                 val = instance.FindDisk(st_groups[2]).size
4659               except errors.OpPrereqError:
4660                 val = None
4661             else:
4662               assert False, "Unhandled disk parameter"
4663           elif st_groups[0] == "nic":
4664             if st_groups[1] == "count":
4665               val = len(instance.nics)
4666             elif st_groups[1] == "macs":
4667               val = [nic.mac for nic in instance.nics]
4668             elif st_groups[1] == "ips":
4669               val = [nic.ip for nic in instance.nics]
4670             elif st_groups[1] == "modes":
4671               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4672             elif st_groups[1] == "links":
4673               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4674             elif st_groups[1] == "bridges":
4675               val = []
4676               for nicp in i_nicp:
4677                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4678                   val.append(nicp[constants.NIC_LINK])
4679                 else:
4680                   val.append(None)
4681             else:
4682               # index-based item
4683               nic_idx = int(st_groups[2])
4684               if nic_idx >= len(instance.nics):
4685                 val = None
4686               else:
4687                 if st_groups[1] == "mac":
4688                   val = instance.nics[nic_idx].mac
4689                 elif st_groups[1] == "ip":
4690                   val = instance.nics[nic_idx].ip
4691                 elif st_groups[1] == "mode":
4692                   val = i_nicp[nic_idx][constants.NIC_MODE]
4693                 elif st_groups[1] == "link":
4694                   val = i_nicp[nic_idx][constants.NIC_LINK]
4695                 elif st_groups[1] == "bridge":
4696                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4697                   if nic_mode == constants.NIC_MODE_BRIDGED:
4698                     val = i_nicp[nic_idx][constants.NIC_LINK]
4699                   else:
4700                     val = None
4701                 else:
4702                   assert False, "Unhandled NIC parameter"
4703           else:
4704             assert False, ("Declared but unhandled variable parameter '%s'" %
4705                            field)
4706         else:
4707           assert False, "Declared but unhandled parameter '%s'" % field
4708         iout.append(val)
4709       output.append(iout)
4710
4711     return output
4712
4713
4714 class LUFailoverInstance(LogicalUnit):
4715   """Failover an instance.
4716
4717   """
4718   HPATH = "instance-failover"
4719   HTYPE = constants.HTYPE_INSTANCE
4720   _OP_REQP = ["instance_name", "ignore_consistency"]
4721   REQ_BGL = False
4722
4723   def CheckArguments(self):
4724     """Check the arguments.
4725
4726     """
4727     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4728                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4729
4730   def ExpandNames(self):
4731     self._ExpandAndLockInstance()
4732     self.needed_locks[locking.LEVEL_NODE] = []
4733     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4734
4735   def DeclareLocks(self, level):
4736     if level == locking.LEVEL_NODE:
4737       self._LockInstancesNodes()
4738
4739   def BuildHooksEnv(self):
4740     """Build hooks env.
4741
4742     This runs on master, primary and secondary nodes of the instance.
4743
4744     """
4745     instance = self.instance
4746     source_node = instance.primary_node
4747     target_node = instance.secondary_nodes[0]
4748     env = {
4749       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4750       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4751       "OLD_PRIMARY": source_node,
4752       "OLD_SECONDARY": target_node,
4753       "NEW_PRIMARY": target_node,
4754       "NEW_SECONDARY": source_node,
4755       }
4756     env.update(_BuildInstanceHookEnvByObject(self, instance))
4757     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4758     nl_post = list(nl)
4759     nl_post.append(source_node)
4760     return env, nl, nl_post
4761
4762   def CheckPrereq(self):
4763     """Check prerequisites.
4764
4765     This checks that the instance is in the cluster.
4766
4767     """
4768     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4769     assert self.instance is not None, \
4770       "Cannot retrieve locked instance %s" % self.op.instance_name
4771
4772     bep = self.cfg.GetClusterInfo().FillBE(instance)
4773     if instance.disk_template not in constants.DTS_NET_MIRROR:
4774       raise errors.OpPrereqError("Instance's disk layout is not"
4775                                  " network mirrored, cannot failover.",
4776                                  errors.ECODE_STATE)
4777
4778     secondary_nodes = instance.secondary_nodes
4779     if not secondary_nodes:
4780       raise errors.ProgrammerError("no secondary node but using "
4781                                    "a mirrored disk template")
4782
4783     target_node = secondary_nodes[0]
4784     _CheckNodeOnline(self, target_node)
4785     _CheckNodeNotDrained(self, target_node)
4786     if instance.admin_up:
4787       # check memory requirements on the secondary node
4788       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4789                            instance.name, bep[constants.BE_MEMORY],
4790                            instance.hypervisor)
4791     else:
4792       self.LogInfo("Not checking memory on the secondary node as"
4793                    " instance will not be started")
4794
4795     # check bridge existance
4796     _CheckInstanceBridgesExist(self, instance, node=target_node)
4797
4798   def Exec(self, feedback_fn):
4799     """Failover an instance.
4800
4801     The failover is done by shutting it down on its present node and
4802     starting it on the secondary.
4803
4804     """
4805     instance = self.instance
4806
4807     source_node = instance.primary_node
4808     target_node = instance.secondary_nodes[0]
4809
4810     if instance.admin_up:
4811       feedback_fn("* checking disk consistency between source and target")
4812       for dev in instance.disks:
4813         # for drbd, these are drbd over lvm
4814         if not _CheckDiskConsistency(self, dev, target_node, False):
4815           if not self.op.ignore_consistency:
4816             raise errors.OpExecError("Disk %s is degraded on target node,"
4817                                      " aborting failover." % dev.iv_name)
4818     else:
4819       feedback_fn("* not checking disk consistency as instance is not running")
4820
4821     feedback_fn("* shutting down instance on source node")
4822     logging.info("Shutting down instance %s on node %s",
4823                  instance.name, source_node)
4824
4825     result = self.rpc.call_instance_shutdown(source_node, instance,
4826                                              self.shutdown_timeout)
4827     msg = result.fail_msg
4828     if msg:
4829       if self.op.ignore_consistency:
4830         self.proc.LogWarning("Could not shutdown instance %s on node %s."
4831                              " Proceeding anyway. Please make sure node"
4832                              " %s is down. Error details: %s",
4833                              instance.name, source_node, source_node, msg)
4834       else:
4835         raise errors.OpExecError("Could not shutdown instance %s on"
4836                                  " node %s: %s" %
4837                                  (instance.name, source_node, msg))
4838
4839     feedback_fn("* deactivating the instance's disks on source node")
4840     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4841       raise errors.OpExecError("Can't shut down the instance's disks.")
4842
4843     instance.primary_node = target_node
4844     # distribute new instance config to the other nodes
4845     self.cfg.Update(instance, feedback_fn)
4846
4847     # Only start the instance if it's marked as up
4848     if instance.admin_up:
4849       feedback_fn("* activating the instance's disks on target node")
4850       logging.info("Starting instance %s on node %s",
4851                    instance.name, target_node)
4852
4853       disks_ok, _ = _AssembleInstanceDisks(self, instance,
4854                                                ignore_secondaries=True)
4855       if not disks_ok:
4856         _ShutdownInstanceDisks(self, instance)
4857         raise errors.OpExecError("Can't activate the instance's disks")
4858
4859       feedback_fn("* starting the instance on the target node")
4860       result = self.rpc.call_instance_start(target_node, instance, None, None)
4861       msg = result.fail_msg
4862       if msg:
4863         _ShutdownInstanceDisks(self, instance)
4864         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4865                                  (instance.name, target_node, msg))
4866
4867
4868 class LUMigrateInstance(LogicalUnit):
4869   """Migrate an instance.
4870
4871   This is migration without shutting down, compared to the failover,
4872   which is done with shutdown.
4873
4874   """
4875   HPATH = "instance-migrate"
4876   HTYPE = constants.HTYPE_INSTANCE
4877   _OP_REQP = ["instance_name", "live", "cleanup"]
4878
4879   REQ_BGL = False
4880
4881   def ExpandNames(self):
4882     self._ExpandAndLockInstance()
4883
4884     self.needed_locks[locking.LEVEL_NODE] = []
4885     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4886
4887     self._migrater = TLMigrateInstance(self, self.op.instance_name,
4888                                        self.op.live, self.op.cleanup)
4889     self.tasklets = [self._migrater]
4890
4891   def DeclareLocks(self, level):
4892     if level == locking.LEVEL_NODE:
4893       self._LockInstancesNodes()
4894
4895   def BuildHooksEnv(self):
4896     """Build hooks env.
4897
4898     This runs on master, primary and secondary nodes of the instance.
4899
4900     """
4901     instance = self._migrater.instance
4902     source_node = instance.primary_node
4903     target_node = instance.secondary_nodes[0]
4904     env = _BuildInstanceHookEnvByObject(self, instance)
4905     env["MIGRATE_LIVE"] = self.op.live
4906     env["MIGRATE_CLEANUP"] = self.op.cleanup
4907     env.update({
4908         "OLD_PRIMARY": source_node,
4909         "OLD_SECONDARY": target_node,
4910         "NEW_PRIMARY": target_node,
4911         "NEW_SECONDARY": source_node,
4912         })
4913     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4914     nl_post = list(nl)
4915     nl_post.append(source_node)
4916     return env, nl, nl_post
4917
4918
4919 class LUMoveInstance(LogicalUnit):
4920   """Move an instance by data-copying.
4921
4922   """
4923   HPATH = "instance-move"
4924   HTYPE = constants.HTYPE_INSTANCE
4925   _OP_REQP = ["instance_name", "target_node"]
4926   REQ_BGL = False
4927
4928   def CheckArguments(self):
4929     """Check the arguments.
4930
4931     """
4932     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4933                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4934
4935   def ExpandNames(self):
4936     self._ExpandAndLockInstance()
4937     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
4938     self.op.target_node = target_node
4939     self.needed_locks[locking.LEVEL_NODE] = [target_node]
4940     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4941
4942   def DeclareLocks(self, level):
4943     if level == locking.LEVEL_NODE:
4944       self._LockInstancesNodes(primary_only=True)
4945
4946   def BuildHooksEnv(self):
4947     """Build hooks env.
4948
4949     This runs on master, primary and secondary nodes of the instance.
4950
4951     """
4952     env = {
4953       "TARGET_NODE": self.op.target_node,
4954       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4955       }
4956     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4957     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4958                                        self.op.target_node]
4959     return env, nl, nl
4960
4961   def CheckPrereq(self):
4962     """Check prerequisites.
4963
4964     This checks that the instance is in the cluster.
4965
4966     """
4967     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4968     assert self.instance is not None, \
4969       "Cannot retrieve locked instance %s" % self.op.instance_name
4970
4971     node = self.cfg.GetNodeInfo(self.op.target_node)
4972     assert node is not None, \
4973       "Cannot retrieve locked node %s" % self.op.target_node
4974
4975     self.target_node = target_node = node.name
4976
4977     if target_node == instance.primary_node:
4978       raise errors.OpPrereqError("Instance %s is already on the node %s" %
4979                                  (instance.name, target_node),
4980                                  errors.ECODE_STATE)
4981
4982     bep = self.cfg.GetClusterInfo().FillBE(instance)
4983
4984     for idx, dsk in enumerate(instance.disks):
4985       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4986         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4987                                    " cannot copy" % idx, errors.ECODE_STATE)
4988
4989     _CheckNodeOnline(self, target_node)
4990     _CheckNodeNotDrained(self, target_node)
4991
4992     if instance.admin_up:
4993       # check memory requirements on the secondary node
4994       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4995                            instance.name, bep[constants.BE_MEMORY],
4996                            instance.hypervisor)
4997     else:
4998       self.LogInfo("Not checking memory on the secondary node as"
4999                    " instance will not be started")
5000
5001     # check bridge existance
5002     _CheckInstanceBridgesExist(self, instance, node=target_node)
5003
5004   def Exec(self, feedback_fn):
5005     """Move an instance.
5006
5007     The move is done by shutting it down on its present node, copying
5008     the data over (slow) and starting it on the new node.
5009
5010     """
5011     instance = self.instance
5012
5013     source_node = instance.primary_node
5014     target_node = self.target_node
5015
5016     self.LogInfo("Shutting down instance %s on source node %s",
5017                  instance.name, source_node)
5018
5019     result = self.rpc.call_instance_shutdown(source_node, instance,
5020                                              self.shutdown_timeout)
5021     msg = result.fail_msg
5022     if msg:
5023       if self.op.ignore_consistency:
5024         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5025                              " Proceeding anyway. Please make sure node"
5026                              " %s is down. Error details: %s",
5027                              instance.name, source_node, source_node, msg)
5028       else:
5029         raise errors.OpExecError("Could not shutdown instance %s on"
5030                                  " node %s: %s" %
5031                                  (instance.name, source_node, msg))
5032
5033     # create the target disks
5034     try:
5035       _CreateDisks(self, instance, target_node=target_node)
5036     except errors.OpExecError:
5037       self.LogWarning("Device creation failed, reverting...")
5038       try:
5039         _RemoveDisks(self, instance, target_node=target_node)
5040       finally:
5041         self.cfg.ReleaseDRBDMinors(instance.name)
5042         raise
5043
5044     cluster_name = self.cfg.GetClusterInfo().cluster_name
5045
5046     errs = []
5047     # activate, get path, copy the data over
5048     for idx, disk in enumerate(instance.disks):
5049       self.LogInfo("Copying data for disk %d", idx)
5050       result = self.rpc.call_blockdev_assemble(target_node, disk,
5051                                                instance.name, True)
5052       if result.fail_msg:
5053         self.LogWarning("Can't assemble newly created disk %d: %s",
5054                         idx, result.fail_msg)
5055         errs.append(result.fail_msg)
5056         break
5057       dev_path = result.payload
5058       result = self.rpc.call_blockdev_export(source_node, disk,
5059                                              target_node, dev_path,
5060                                              cluster_name)
5061       if result.fail_msg:
5062         self.LogWarning("Can't copy data over for disk %d: %s",
5063                         idx, result.fail_msg)
5064         errs.append(result.fail_msg)
5065         break
5066
5067     if errs:
5068       self.LogWarning("Some disks failed to copy, aborting")
5069       try:
5070         _RemoveDisks(self, instance, target_node=target_node)
5071       finally:
5072         self.cfg.ReleaseDRBDMinors(instance.name)
5073         raise errors.OpExecError("Errors during disk copy: %s" %
5074                                  (",".join(errs),))
5075
5076     instance.primary_node = target_node
5077     self.cfg.Update(instance, feedback_fn)
5078
5079     self.LogInfo("Removing the disks on the original node")
5080     _RemoveDisks(self, instance, target_node=source_node)
5081
5082     # Only start the instance if it's marked as up
5083     if instance.admin_up:
5084       self.LogInfo("Starting instance %s on node %s",
5085                    instance.name, target_node)
5086
5087       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5088                                            ignore_secondaries=True)
5089       if not disks_ok:
5090         _ShutdownInstanceDisks(self, instance)
5091         raise errors.OpExecError("Can't activate the instance's disks")
5092
5093       result = self.rpc.call_instance_start(target_node, instance, None, None)
5094       msg = result.fail_msg
5095       if msg:
5096         _ShutdownInstanceDisks(self, instance)
5097         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5098                                  (instance.name, target_node, msg))
5099
5100
5101 class LUMigrateNode(LogicalUnit):
5102   """Migrate all instances from a node.
5103
5104   """
5105   HPATH = "node-migrate"
5106   HTYPE = constants.HTYPE_NODE
5107   _OP_REQP = ["node_name", "live"]
5108   REQ_BGL = False
5109
5110   def ExpandNames(self):
5111     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5112
5113     self.needed_locks = {
5114       locking.LEVEL_NODE: [self.op.node_name],
5115       }
5116
5117     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5118
5119     # Create tasklets for migrating instances for all instances on this node
5120     names = []
5121     tasklets = []
5122
5123     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5124       logging.debug("Migrating instance %s", inst.name)
5125       names.append(inst.name)
5126
5127       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5128
5129     self.tasklets = tasklets
5130
5131     # Declare instance locks
5132     self.needed_locks[locking.LEVEL_INSTANCE] = names
5133
5134   def DeclareLocks(self, level):
5135     if level == locking.LEVEL_NODE:
5136       self._LockInstancesNodes()
5137
5138   def BuildHooksEnv(self):
5139     """Build hooks env.
5140
5141     This runs on the master, the primary and all the secondaries.
5142
5143     """
5144     env = {
5145       "NODE_NAME": self.op.node_name,
5146       }
5147
5148     nl = [self.cfg.GetMasterNode()]
5149
5150     return (env, nl, nl)
5151
5152
5153 class TLMigrateInstance(Tasklet):
5154   def __init__(self, lu, instance_name, live, cleanup):
5155     """Initializes this class.
5156
5157     """
5158     Tasklet.__init__(self, lu)
5159
5160     # Parameters
5161     self.instance_name = instance_name
5162     self.live = live
5163     self.cleanup = cleanup
5164
5165   def CheckPrereq(self):
5166     """Check prerequisites.
5167
5168     This checks that the instance is in the cluster.
5169
5170     """
5171     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5172     instance = self.cfg.GetInstanceInfo(instance_name)
5173     assert instance is not None
5174
5175     if instance.disk_template != constants.DT_DRBD8:
5176       raise errors.OpPrereqError("Instance's disk layout is not"
5177                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5178
5179     secondary_nodes = instance.secondary_nodes
5180     if not secondary_nodes:
5181       raise errors.ConfigurationError("No secondary node but using"
5182                                       " drbd8 disk template")
5183
5184     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5185
5186     target_node = secondary_nodes[0]
5187     # check memory requirements on the secondary node
5188     _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5189                          instance.name, i_be[constants.BE_MEMORY],
5190                          instance.hypervisor)
5191
5192     # check bridge existance
5193     _CheckInstanceBridgesExist(self, instance, node=target_node)
5194
5195     if not self.cleanup:
5196       _CheckNodeNotDrained(self, target_node)
5197       result = self.rpc.call_instance_migratable(instance.primary_node,
5198                                                  instance)
5199       result.Raise("Can't migrate, please use failover",
5200                    prereq=True, ecode=errors.ECODE_STATE)
5201
5202     self.instance = instance
5203
5204   def _WaitUntilSync(self):
5205     """Poll with custom rpc for disk sync.
5206
5207     This uses our own step-based rpc call.
5208
5209     """
5210     self.feedback_fn("* wait until resync is done")
5211     all_done = False
5212     while not all_done:
5213       all_done = True
5214       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5215                                             self.nodes_ip,
5216                                             self.instance.disks)
5217       min_percent = 100
5218       for node, nres in result.items():
5219         nres.Raise("Cannot resync disks on node %s" % node)
5220         node_done, node_percent = nres.payload
5221         all_done = all_done and node_done
5222         if node_percent is not None:
5223           min_percent = min(min_percent, node_percent)
5224       if not all_done:
5225         if min_percent < 100:
5226           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5227         time.sleep(2)
5228
5229   def _EnsureSecondary(self, node):
5230     """Demote a node to secondary.
5231
5232     """
5233     self.feedback_fn("* switching node %s to secondary mode" % node)
5234
5235     for dev in self.instance.disks:
5236       self.cfg.SetDiskID(dev, node)
5237
5238     result = self.rpc.call_blockdev_close(node, self.instance.name,
5239                                           self.instance.disks)
5240     result.Raise("Cannot change disk to secondary on node %s" % node)
5241
5242   def _GoStandalone(self):
5243     """Disconnect from the network.
5244
5245     """
5246     self.feedback_fn("* changing into standalone mode")
5247     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5248                                                self.instance.disks)
5249     for node, nres in result.items():
5250       nres.Raise("Cannot disconnect disks node %s" % node)
5251
5252   def _GoReconnect(self, multimaster):
5253     """Reconnect to the network.
5254
5255     """
5256     if multimaster:
5257       msg = "dual-master"
5258     else:
5259       msg = "single-master"
5260     self.feedback_fn("* changing disks into %s mode" % msg)
5261     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5262                                            self.instance.disks,
5263                                            self.instance.name, multimaster)
5264     for node, nres in result.items():
5265       nres.Raise("Cannot change disks config on node %s" % node)
5266
5267   def _ExecCleanup(self):
5268     """Try to cleanup after a failed migration.
5269
5270     The cleanup is done by:
5271       - check that the instance is running only on one node
5272         (and update the config if needed)
5273       - change disks on its secondary node to secondary
5274       - wait until disks are fully synchronized
5275       - disconnect from the network
5276       - change disks into single-master mode
5277       - wait again until disks are fully synchronized
5278
5279     """
5280     instance = self.instance
5281     target_node = self.target_node
5282     source_node = self.source_node
5283
5284     # check running on only one node
5285     self.feedback_fn("* checking where the instance actually runs"
5286                      " (if this hangs, the hypervisor might be in"
5287                      " a bad state)")
5288     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5289     for node, result in ins_l.items():
5290       result.Raise("Can't contact node %s" % node)
5291
5292     runningon_source = instance.name in ins_l[source_node].payload
5293     runningon_target = instance.name in ins_l[target_node].payload
5294
5295     if runningon_source and runningon_target:
5296       raise errors.OpExecError("Instance seems to be running on two nodes,"
5297                                " or the hypervisor is confused. You will have"
5298                                " to ensure manually that it runs only on one"
5299                                " and restart this operation.")
5300
5301     if not (runningon_source or runningon_target):
5302       raise errors.OpExecError("Instance does not seem to be running at all."
5303                                " In this case, it's safer to repair by"
5304                                " running 'gnt-instance stop' to ensure disk"
5305                                " shutdown, and then restarting it.")
5306
5307     if runningon_target:
5308       # the migration has actually succeeded, we need to update the config
5309       self.feedback_fn("* instance running on secondary node (%s),"
5310                        " updating config" % target_node)
5311       instance.primary_node = target_node
5312       self.cfg.Update(instance, self.feedback_fn)
5313       demoted_node = source_node
5314     else:
5315       self.feedback_fn("* instance confirmed to be running on its"
5316                        " primary node (%s)" % source_node)
5317       demoted_node = target_node
5318
5319     self._EnsureSecondary(demoted_node)
5320     try:
5321       self._WaitUntilSync()
5322     except errors.OpExecError:
5323       # we ignore here errors, since if the device is standalone, it
5324       # won't be able to sync
5325       pass
5326     self._GoStandalone()
5327     self._GoReconnect(False)
5328     self._WaitUntilSync()
5329
5330     self.feedback_fn("* done")
5331
5332   def _RevertDiskStatus(self):
5333     """Try to revert the disk status after a failed migration.
5334
5335     """
5336     target_node = self.target_node
5337     try:
5338       self._EnsureSecondary(target_node)
5339       self._GoStandalone()
5340       self._GoReconnect(False)
5341       self._WaitUntilSync()
5342     except errors.OpExecError, err:
5343       self.lu.LogWarning("Migration failed and I can't reconnect the"
5344                          " drives: error '%s'\n"
5345                          "Please look and recover the instance status" %
5346                          str(err))
5347
5348   def _AbortMigration(self):
5349     """Call the hypervisor code to abort a started migration.
5350
5351     """
5352     instance = self.instance
5353     target_node = self.target_node
5354     migration_info = self.migration_info
5355
5356     abort_result = self.rpc.call_finalize_migration(target_node,
5357                                                     instance,
5358                                                     migration_info,
5359                                                     False)
5360     abort_msg = abort_result.fail_msg
5361     if abort_msg:
5362       logging.error("Aborting migration failed on target node %s: %s",
5363                     target_node, abort_msg)
5364       # Don't raise an exception here, as we stil have to try to revert the
5365       # disk status, even if this step failed.
5366
5367   def _ExecMigration(self):
5368     """Migrate an instance.
5369
5370     The migrate is done by:
5371       - change the disks into dual-master mode
5372       - wait until disks are fully synchronized again
5373       - migrate the instance
5374       - change disks on the new secondary node (the old primary) to secondary
5375       - wait until disks are fully synchronized
5376       - change disks into single-master mode
5377
5378     """
5379     instance = self.instance
5380     target_node = self.target_node
5381     source_node = self.source_node
5382
5383     self.feedback_fn("* checking disk consistency between source and target")
5384     for dev in instance.disks:
5385       if not _CheckDiskConsistency(self, dev, target_node, False):
5386         raise errors.OpExecError("Disk %s is degraded or not fully"
5387                                  " synchronized on target node,"
5388                                  " aborting migrate." % dev.iv_name)
5389
5390     # First get the migration information from the remote node
5391     result = self.rpc.call_migration_info(source_node, instance)
5392     msg = result.fail_msg
5393     if msg:
5394       log_err = ("Failed fetching source migration information from %s: %s" %
5395                  (source_node, msg))
5396       logging.error(log_err)
5397       raise errors.OpExecError(log_err)
5398
5399     self.migration_info = migration_info = result.payload
5400
5401     # Then switch the disks to master/master mode
5402     self._EnsureSecondary(target_node)
5403     self._GoStandalone()
5404     self._GoReconnect(True)
5405     self._WaitUntilSync()
5406
5407     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5408     result = self.rpc.call_accept_instance(target_node,
5409                                            instance,
5410                                            migration_info,
5411                                            self.nodes_ip[target_node])
5412
5413     msg = result.fail_msg
5414     if msg:
5415       logging.error("Instance pre-migration failed, trying to revert"
5416                     " disk status: %s", msg)
5417       self.feedback_fn("Pre-migration failed, aborting")
5418       self._AbortMigration()
5419       self._RevertDiskStatus()
5420       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5421                                (instance.name, msg))
5422
5423     self.feedback_fn("* migrating instance to %s" % target_node)
5424     time.sleep(10)
5425     result = self.rpc.call_instance_migrate(source_node, instance,
5426                                             self.nodes_ip[target_node],
5427                                             self.live)
5428     msg = result.fail_msg
5429     if msg:
5430       logging.error("Instance migration failed, trying to revert"
5431                     " disk status: %s", msg)
5432       self.feedback_fn("Migration failed, aborting")
5433       self._AbortMigration()
5434       self._RevertDiskStatus()
5435       raise errors.OpExecError("Could not migrate instance %s: %s" %
5436                                (instance.name, msg))
5437     time.sleep(10)
5438
5439     instance.primary_node = target_node
5440     # distribute new instance config to the other nodes
5441     self.cfg.Update(instance, self.feedback_fn)
5442
5443     result = self.rpc.call_finalize_migration(target_node,
5444                                               instance,
5445                                               migration_info,
5446                                               True)
5447     msg = result.fail_msg
5448     if msg:
5449       logging.error("Instance migration succeeded, but finalization failed:"
5450                     " %s", msg)
5451       raise errors.OpExecError("Could not finalize instance migration: %s" %
5452                                msg)
5453
5454     self._EnsureSecondary(source_node)
5455     self._WaitUntilSync()
5456     self._GoStandalone()
5457     self._GoReconnect(False)
5458     self._WaitUntilSync()
5459
5460     self.feedback_fn("* done")
5461
5462   def Exec(self, feedback_fn):
5463     """Perform the migration.
5464
5465     """
5466     feedback_fn("Migrating instance %s" % self.instance.name)
5467
5468     self.feedback_fn = feedback_fn
5469
5470     self.source_node = self.instance.primary_node
5471     self.target_node = self.instance.secondary_nodes[0]
5472     self.all_nodes = [self.source_node, self.target_node]
5473     self.nodes_ip = {
5474       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5475       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5476       }
5477
5478     if self.cleanup:
5479       return self._ExecCleanup()
5480     else:
5481       return self._ExecMigration()
5482
5483
5484 def _CreateBlockDev(lu, node, instance, device, force_create,
5485                     info, force_open):
5486   """Create a tree of block devices on a given node.
5487
5488   If this device type has to be created on secondaries, create it and
5489   all its children.
5490
5491   If not, just recurse to children keeping the same 'force' value.
5492
5493   @param lu: the lu on whose behalf we execute
5494   @param node: the node on which to create the device
5495   @type instance: L{objects.Instance}
5496   @param instance: the instance which owns the device
5497   @type device: L{objects.Disk}
5498   @param device: the device to create
5499   @type force_create: boolean
5500   @param force_create: whether to force creation of this device; this
5501       will be change to True whenever we find a device which has
5502       CreateOnSecondary() attribute
5503   @param info: the extra 'metadata' we should attach to the device
5504       (this will be represented as a LVM tag)
5505   @type force_open: boolean
5506   @param force_open: this parameter will be passes to the
5507       L{backend.BlockdevCreate} function where it specifies
5508       whether we run on primary or not, and it affects both
5509       the child assembly and the device own Open() execution
5510
5511   """
5512   if device.CreateOnSecondary():
5513     force_create = True
5514
5515   if device.children:
5516     for child in device.children:
5517       _CreateBlockDev(lu, node, instance, child, force_create,
5518                       info, force_open)
5519
5520   if not force_create:
5521     return
5522
5523   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5524
5525
5526 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5527   """Create a single block device on a given node.
5528
5529   This will not recurse over children of the device, so they must be
5530   created in advance.
5531
5532   @param lu: the lu on whose behalf we execute
5533   @param node: the node on which to create the device
5534   @type instance: L{objects.Instance}
5535   @param instance: the instance which owns the device
5536   @type device: L{objects.Disk}
5537   @param device: the device to create
5538   @param info: the extra 'metadata' we should attach to the device
5539       (this will be represented as a LVM tag)
5540   @type force_open: boolean
5541   @param force_open: this parameter will be passes to the
5542       L{backend.BlockdevCreate} function where it specifies
5543       whether we run on primary or not, and it affects both
5544       the child assembly and the device own Open() execution
5545
5546   """
5547   lu.cfg.SetDiskID(device, node)
5548   result = lu.rpc.call_blockdev_create(node, device, device.size,
5549                                        instance.name, force_open, info)
5550   result.Raise("Can't create block device %s on"
5551                " node %s for instance %s" % (device, node, instance.name))
5552   if device.physical_id is None:
5553     device.physical_id = result.payload
5554
5555
5556 def _GenerateUniqueNames(lu, exts):
5557   """Generate a suitable LV name.
5558
5559   This will generate a logical volume name for the given instance.
5560
5561   """
5562   results = []
5563   for val in exts:
5564     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5565     results.append("%s%s" % (new_id, val))
5566   return results
5567
5568
5569 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5570                          p_minor, s_minor):
5571   """Generate a drbd8 device complete with its children.
5572
5573   """
5574   port = lu.cfg.AllocatePort()
5575   vgname = lu.cfg.GetVGName()
5576   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5577   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5578                           logical_id=(vgname, names[0]))
5579   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5580                           logical_id=(vgname, names[1]))
5581   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5582                           logical_id=(primary, secondary, port,
5583                                       p_minor, s_minor,
5584                                       shared_secret),
5585                           children=[dev_data, dev_meta],
5586                           iv_name=iv_name)
5587   return drbd_dev
5588
5589
5590 def _GenerateDiskTemplate(lu, template_name,
5591                           instance_name, primary_node,
5592                           secondary_nodes, disk_info,
5593                           file_storage_dir, file_driver,
5594                           base_index):
5595   """Generate the entire disk layout for a given template type.
5596
5597   """
5598   #TODO: compute space requirements
5599
5600   vgname = lu.cfg.GetVGName()
5601   disk_count = len(disk_info)
5602   disks = []
5603   if template_name == constants.DT_DISKLESS:
5604     pass
5605   elif template_name == constants.DT_PLAIN:
5606     if len(secondary_nodes) != 0:
5607       raise errors.ProgrammerError("Wrong template configuration")
5608
5609     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5610                                       for i in range(disk_count)])
5611     for idx, disk in enumerate(disk_info):
5612       disk_index = idx + base_index
5613       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5614                               logical_id=(vgname, names[idx]),
5615                               iv_name="disk/%d" % disk_index,
5616                               mode=disk["mode"])
5617       disks.append(disk_dev)
5618   elif template_name == constants.DT_DRBD8:
5619     if len(secondary_nodes) != 1:
5620       raise errors.ProgrammerError("Wrong template configuration")
5621     remote_node = secondary_nodes[0]
5622     minors = lu.cfg.AllocateDRBDMinor(
5623       [primary_node, remote_node] * len(disk_info), instance_name)
5624
5625     names = []
5626     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5627                                                for i in range(disk_count)]):
5628       names.append(lv_prefix + "_data")
5629       names.append(lv_prefix + "_meta")
5630     for idx, disk in enumerate(disk_info):
5631       disk_index = idx + base_index
5632       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5633                                       disk["size"], names[idx*2:idx*2+2],
5634                                       "disk/%d" % disk_index,
5635                                       minors[idx*2], minors[idx*2+1])
5636       disk_dev.mode = disk["mode"]
5637       disks.append(disk_dev)
5638   elif template_name == constants.DT_FILE:
5639     if len(secondary_nodes) != 0:
5640       raise errors.ProgrammerError("Wrong template configuration")
5641
5642     for idx, disk in enumerate(disk_info):
5643       disk_index = idx + base_index
5644       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5645                               iv_name="disk/%d" % disk_index,
5646                               logical_id=(file_driver,
5647                                           "%s/disk%d" % (file_storage_dir,
5648                                                          disk_index)),
5649                               mode=disk["mode"])
5650       disks.append(disk_dev)
5651   else:
5652     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5653   return disks
5654
5655
5656 def _GetInstanceInfoText(instance):
5657   """Compute that text that should be added to the disk's metadata.
5658
5659   """
5660   return "originstname+%s" % instance.name
5661
5662
5663 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5664   """Create all disks for an instance.
5665
5666   This abstracts away some work from AddInstance.
5667
5668   @type lu: L{LogicalUnit}
5669   @param lu: the logical unit on whose behalf we execute
5670   @type instance: L{objects.Instance}
5671   @param instance: the instance whose disks we should create
5672   @type to_skip: list
5673   @param to_skip: list of indices to skip
5674   @type target_node: string
5675   @param target_node: if passed, overrides the target node for creation
5676   @rtype: boolean
5677   @return: the success of the creation
5678
5679   """
5680   info = _GetInstanceInfoText(instance)
5681   if target_node is None:
5682     pnode = instance.primary_node
5683     all_nodes = instance.all_nodes
5684   else:
5685     pnode = target_node
5686     all_nodes = [pnode]
5687
5688   if instance.disk_template == constants.DT_FILE:
5689     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5690     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5691
5692     result.Raise("Failed to create directory '%s' on"
5693                  " node %s" % (file_storage_dir, pnode))
5694
5695   # Note: this needs to be kept in sync with adding of disks in
5696   # LUSetInstanceParams
5697   for idx, device in enumerate(instance.disks):
5698     if to_skip and idx in to_skip:
5699       continue
5700     logging.info("Creating volume %s for instance %s",
5701                  device.iv_name, instance.name)
5702     #HARDCODE
5703     for node in all_nodes:
5704       f_create = node == pnode
5705       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5706
5707
5708 def _RemoveDisks(lu, instance, target_node=None):
5709   """Remove all disks for an instance.
5710
5711   This abstracts away some work from `AddInstance()` and
5712   `RemoveInstance()`. Note that in case some of the devices couldn't
5713   be removed, the removal will continue with the other ones (compare
5714   with `_CreateDisks()`).
5715
5716   @type lu: L{LogicalUnit}
5717   @param lu: the logical unit on whose behalf we execute
5718   @type instance: L{objects.Instance}
5719   @param instance: the instance whose disks we should remove
5720   @type target_node: string
5721   @param target_node: used to override the node on which to remove the disks
5722   @rtype: boolean
5723   @return: the success of the removal
5724
5725   """
5726   logging.info("Removing block devices for instance %s", instance.name)
5727
5728   all_result = True
5729   for device in instance.disks:
5730     if target_node:
5731       edata = [(target_node, device)]
5732     else:
5733       edata = device.ComputeNodeTree(instance.primary_node)
5734     for node, disk in edata:
5735       lu.cfg.SetDiskID(disk, node)
5736       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5737       if msg:
5738         lu.LogWarning("Could not remove block device %s on node %s,"
5739                       " continuing anyway: %s", device.iv_name, node, msg)
5740         all_result = False
5741
5742   if instance.disk_template == constants.DT_FILE:
5743     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5744     if target_node:
5745       tgt = target_node
5746     else:
5747       tgt = instance.primary_node
5748     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5749     if result.fail_msg:
5750       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5751                     file_storage_dir, instance.primary_node, result.fail_msg)
5752       all_result = False
5753
5754   return all_result
5755
5756
5757 def _ComputeDiskSize(disk_template, disks):
5758   """Compute disk size requirements in the volume group
5759
5760   """
5761   # Required free disk space as a function of disk and swap space
5762   req_size_dict = {
5763     constants.DT_DISKLESS: None,
5764     constants.DT_PLAIN: sum(d["size"] for d in disks),
5765     # 128 MB are added for drbd metadata for each disk
5766     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5767     constants.DT_FILE: None,
5768   }
5769
5770   if disk_template not in req_size_dict:
5771     raise errors.ProgrammerError("Disk template '%s' size requirement"
5772                                  " is unknown" %  disk_template)
5773
5774   return req_size_dict[disk_template]
5775
5776
5777 def _CheckHVParams(lu, nodenames, hvname, hvparams):
5778   """Hypervisor parameter validation.
5779
5780   This function abstract the hypervisor parameter validation to be
5781   used in both instance create and instance modify.
5782
5783   @type lu: L{LogicalUnit}
5784   @param lu: the logical unit for which we check
5785   @type nodenames: list
5786   @param nodenames: the list of nodes on which we should check
5787   @type hvname: string
5788   @param hvname: the name of the hypervisor we should use
5789   @type hvparams: dict
5790   @param hvparams: the parameters which we need to check
5791   @raise errors.OpPrereqError: if the parameters are not valid
5792
5793   """
5794   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5795                                                   hvname,
5796                                                   hvparams)
5797   for node in nodenames:
5798     info = hvinfo[node]
5799     if info.offline:
5800       continue
5801     info.Raise("Hypervisor parameter validation failed on node %s" % node)
5802
5803
5804 class LUCreateInstance(LogicalUnit):
5805   """Create an instance.
5806
5807   """
5808   HPATH = "instance-add"
5809   HTYPE = constants.HTYPE_INSTANCE
5810   _OP_REQP = ["instance_name", "disks", "disk_template",
5811               "mode", "start",
5812               "wait_for_sync", "ip_check", "nics",
5813               "hvparams", "beparams"]
5814   REQ_BGL = False
5815
5816   def CheckArguments(self):
5817     """Check arguments.
5818
5819     """
5820     # set optional parameters to none if they don't exist
5821     for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5822       if not hasattr(self.op, attr):
5823         setattr(self.op, attr, None)
5824
5825     # do not require name_check to ease forward/backward compatibility
5826     # for tools
5827     if not hasattr(self.op, "name_check"):
5828       self.op.name_check = True
5829     if not hasattr(self.op, "no_install"):
5830       self.op.no_install = False
5831     if self.op.no_install and self.op.start:
5832       self.LogInfo("No-installation mode selected, disabling startup")
5833       self.op.start = False
5834     # validate/normalize the instance name
5835     self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
5836     if self.op.ip_check and not self.op.name_check:
5837       # TODO: make the ip check more flexible and not depend on the name check
5838       raise errors.OpPrereqError("Cannot do ip checks without a name check",
5839                                  errors.ECODE_INVAL)
5840     if (self.op.disk_template == constants.DT_FILE and
5841         not constants.ENABLE_FILE_STORAGE):
5842       raise errors.OpPrereqError("File storage disabled at configure time",
5843                                  errors.ECODE_INVAL)
5844     # check disk information: either all adopt, or no adopt
5845     has_adopt = has_no_adopt = False
5846     for disk in self.op.disks:
5847       if "adopt" in disk:
5848         has_adopt = True
5849       else:
5850         has_no_adopt = True
5851     if has_adopt and has_no_adopt:
5852       raise errors.OpPrereqError("Either all disks have are adoped or none is",
5853                                  errors.ECODE_INVAL)
5854     if has_adopt:
5855       if self.op.disk_template != constants.DT_PLAIN:
5856         raise errors.OpPrereqError("Disk adoption is only supported for the"
5857                                    " 'plain' disk template",
5858                                    errors.ECODE_INVAL)
5859       if self.op.iallocator is not None:
5860         raise errors.OpPrereqError("Disk adoption not allowed with an"
5861                                    " iallocator script", errors.ECODE_INVAL)
5862       if self.op.mode == constants.INSTANCE_IMPORT:
5863         raise errors.OpPrereqError("Disk adoption not allowed for"
5864                                    " instance import", errors.ECODE_INVAL)
5865
5866     self.adopt_disks = has_adopt
5867
5868   def ExpandNames(self):
5869     """ExpandNames for CreateInstance.
5870
5871     Figure out the right locks for instance creation.
5872
5873     """
5874     self.needed_locks = {}
5875
5876     # cheap checks, mostly valid constants given
5877
5878     # verify creation mode
5879     if self.op.mode not in (constants.INSTANCE_CREATE,
5880                             constants.INSTANCE_IMPORT):
5881       raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5882                                  self.op.mode, errors.ECODE_INVAL)
5883
5884     # disk template and mirror node verification
5885     _CheckDiskTemplate(self.op.disk_template)
5886
5887     if self.op.hypervisor is None:
5888       self.op.hypervisor = self.cfg.GetHypervisorType()
5889
5890     cluster = self.cfg.GetClusterInfo()
5891     enabled_hvs = cluster.enabled_hypervisors
5892     if self.op.hypervisor not in enabled_hvs:
5893       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5894                                  " cluster (%s)" % (self.op.hypervisor,
5895                                   ",".join(enabled_hvs)),
5896                                  errors.ECODE_STATE)
5897
5898     # check hypervisor parameter syntax (locally)
5899     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5900     filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5901                                   self.op.hvparams)
5902     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5903     hv_type.CheckParameterSyntax(filled_hvp)
5904     self.hv_full = filled_hvp
5905     # check that we don't specify global parameters on an instance
5906     _CheckGlobalHvParams(self.op.hvparams)
5907
5908     # fill and remember the beparams dict
5909     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5910     self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5911                                     self.op.beparams)
5912
5913     #### instance parameters check
5914
5915     # instance name verification
5916     if self.op.name_check:
5917       hostname1 = utils.GetHostInfo(self.op.instance_name)
5918       self.op.instance_name = instance_name = hostname1.name
5919       # used in CheckPrereq for ip ping check
5920       self.check_ip = hostname1.ip
5921     else:
5922       instance_name = self.op.instance_name
5923       self.check_ip = None
5924
5925     # this is just a preventive check, but someone might still add this
5926     # instance in the meantime, and creation will fail at lock-add time
5927     if instance_name in self.cfg.GetInstanceList():
5928       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5929                                  instance_name, errors.ECODE_EXISTS)
5930
5931     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5932
5933     # NIC buildup
5934     self.nics = []
5935     for idx, nic in enumerate(self.op.nics):
5936       nic_mode_req = nic.get("mode", None)
5937       nic_mode = nic_mode_req
5938       if nic_mode is None:
5939         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5940
5941       # in routed mode, for the first nic, the default ip is 'auto'
5942       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5943         default_ip_mode = constants.VALUE_AUTO
5944       else:
5945         default_ip_mode = constants.VALUE_NONE
5946
5947       # ip validity checks
5948       ip = nic.get("ip", default_ip_mode)
5949       if ip is None or ip.lower() == constants.VALUE_NONE:
5950         nic_ip = None
5951       elif ip.lower() == constants.VALUE_AUTO:
5952         if not self.op.name_check:
5953           raise errors.OpPrereqError("IP address set to auto but name checks"
5954                                      " have been skipped. Aborting.",
5955                                      errors.ECODE_INVAL)
5956         nic_ip = hostname1.ip
5957       else:
5958         if not utils.IsValidIP(ip):
5959           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5960                                      " like a valid IP" % ip,
5961                                      errors.ECODE_INVAL)
5962         nic_ip = ip
5963
5964       # TODO: check the ip address for uniqueness
5965       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5966         raise errors.OpPrereqError("Routed nic mode requires an ip address",
5967                                    errors.ECODE_INVAL)
5968
5969       # MAC address verification
5970       mac = nic.get("mac", constants.VALUE_AUTO)
5971       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5972         mac = utils.NormalizeAndValidateMac(mac)
5973
5974         try:
5975           self.cfg.ReserveMAC(mac, self.proc.GetECId())
5976         except errors.ReservationError:
5977           raise errors.OpPrereqError("MAC address %s already in use"
5978                                      " in cluster" % mac,
5979                                      errors.ECODE_NOTUNIQUE)
5980
5981       # bridge verification
5982       bridge = nic.get("bridge", None)
5983       link = nic.get("link", None)
5984       if bridge and link:
5985         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5986                                    " at the same time", errors.ECODE_INVAL)
5987       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5988         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
5989                                    errors.ECODE_INVAL)
5990       elif bridge:
5991         link = bridge
5992
5993       nicparams = {}
5994       if nic_mode_req:
5995         nicparams[constants.NIC_MODE] = nic_mode_req
5996       if link:
5997         nicparams[constants.NIC_LINK] = link
5998
5999       check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6000                                       nicparams)
6001       objects.NIC.CheckParameterSyntax(check_params)
6002       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6003
6004     # disk checks/pre-build
6005     self.disks = []
6006     for disk in self.op.disks:
6007       mode = disk.get("mode", constants.DISK_RDWR)
6008       if mode not in constants.DISK_ACCESS_SET:
6009         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6010                                    mode, errors.ECODE_INVAL)
6011       size = disk.get("size", None)
6012       if size is None:
6013         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6014       try:
6015         size = int(size)
6016       except (TypeError, ValueError):
6017         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6018                                    errors.ECODE_INVAL)
6019       new_disk = {"size": size, "mode": mode}
6020       if "adopt" in disk:
6021         new_disk["adopt"] = disk["adopt"]
6022       self.disks.append(new_disk)
6023
6024     # file storage checks
6025     if (self.op.file_driver and
6026         not self.op.file_driver in constants.FILE_DRIVER):
6027       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6028                                  self.op.file_driver, errors.ECODE_INVAL)
6029
6030     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6031       raise errors.OpPrereqError("File storage directory path not absolute",
6032                                  errors.ECODE_INVAL)
6033
6034     ### Node/iallocator related checks
6035     if [self.op.iallocator, self.op.pnode].count(None) != 1:
6036       raise errors.OpPrereqError("One and only one of iallocator and primary"
6037                                  " node must be given",
6038                                  errors.ECODE_INVAL)
6039
6040     if self.op.iallocator:
6041       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6042     else:
6043       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6044       nodelist = [self.op.pnode]
6045       if self.op.snode is not None:
6046         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6047         nodelist.append(self.op.snode)
6048       self.needed_locks[locking.LEVEL_NODE] = nodelist
6049
6050     # in case of import lock the source node too
6051     if self.op.mode == constants.INSTANCE_IMPORT:
6052       src_node = getattr(self.op, "src_node", None)
6053       src_path = getattr(self.op, "src_path", None)
6054
6055       if src_path is None:
6056         self.op.src_path = src_path = self.op.instance_name
6057
6058       if src_node is None:
6059         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6060         self.op.src_node = None
6061         if os.path.isabs(src_path):
6062           raise errors.OpPrereqError("Importing an instance from an absolute"
6063                                      " path requires a source node option.",
6064                                      errors.ECODE_INVAL)
6065       else:
6066         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6067         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6068           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6069         if not os.path.isabs(src_path):
6070           self.op.src_path = src_path = \
6071             utils.PathJoin(constants.EXPORT_DIR, src_path)
6072
6073       # On import force_variant must be True, because if we forced it at
6074       # initial install, our only chance when importing it back is that it
6075       # works again!
6076       self.op.force_variant = True
6077
6078       if self.op.no_install:
6079         self.LogInfo("No-installation mode has no effect during import")
6080
6081     else: # INSTANCE_CREATE
6082       if getattr(self.op, "os_type", None) is None:
6083         raise errors.OpPrereqError("No guest OS specified",
6084                                    errors.ECODE_INVAL)
6085       self.op.force_variant = getattr(self.op, "force_variant", False)
6086
6087   def _RunAllocator(self):
6088     """Run the allocator based on input opcode.
6089
6090     """
6091     nics = [n.ToDict() for n in self.nics]
6092     ial = IAllocator(self.cfg, self.rpc,
6093                      mode=constants.IALLOCATOR_MODE_ALLOC,
6094                      name=self.op.instance_name,
6095                      disk_template=self.op.disk_template,
6096                      tags=[],
6097                      os=self.op.os_type,
6098                      vcpus=self.be_full[constants.BE_VCPUS],
6099                      mem_size=self.be_full[constants.BE_MEMORY],
6100                      disks=self.disks,
6101                      nics=nics,
6102                      hypervisor=self.op.hypervisor,
6103                      )
6104
6105     ial.Run(self.op.iallocator)
6106
6107     if not ial.success:
6108       raise errors.OpPrereqError("Can't compute nodes using"
6109                                  " iallocator '%s': %s" %
6110                                  (self.op.iallocator, ial.info),
6111                                  errors.ECODE_NORES)
6112     if len(ial.result) != ial.required_nodes:
6113       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6114                                  " of nodes (%s), required %s" %
6115                                  (self.op.iallocator, len(ial.result),
6116                                   ial.required_nodes), errors.ECODE_FAULT)
6117     self.op.pnode = ial.result[0]
6118     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6119                  self.op.instance_name, self.op.iallocator,
6120                  utils.CommaJoin(ial.result))
6121     if ial.required_nodes == 2:
6122       self.op.snode = ial.result[1]
6123
6124   def BuildHooksEnv(self):
6125     """Build hooks env.
6126
6127     This runs on master, primary and secondary nodes of the instance.
6128
6129     """
6130     env = {
6131       "ADD_MODE": self.op.mode,
6132       }
6133     if self.op.mode == constants.INSTANCE_IMPORT:
6134       env["SRC_NODE"] = self.op.src_node
6135       env["SRC_PATH"] = self.op.src_path
6136       env["SRC_IMAGES"] = self.src_images
6137
6138     env.update(_BuildInstanceHookEnv(
6139       name=self.op.instance_name,
6140       primary_node=self.op.pnode,
6141       secondary_nodes=self.secondaries,
6142       status=self.op.start,
6143       os_type=self.op.os_type,
6144       memory=self.be_full[constants.BE_MEMORY],
6145       vcpus=self.be_full[constants.BE_VCPUS],
6146       nics=_NICListToTuple(self, self.nics),
6147       disk_template=self.op.disk_template,
6148       disks=[(d["size"], d["mode"]) for d in self.disks],
6149       bep=self.be_full,
6150       hvp=self.hv_full,
6151       hypervisor_name=self.op.hypervisor,
6152     ))
6153
6154     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6155           self.secondaries)
6156     return env, nl, nl
6157
6158
6159   def CheckPrereq(self):
6160     """Check prerequisites.
6161
6162     """
6163     if (not self.cfg.GetVGName() and
6164         self.op.disk_template not in constants.DTS_NOT_LVM):
6165       raise errors.OpPrereqError("Cluster does not support lvm-based"
6166                                  " instances", errors.ECODE_STATE)
6167
6168     if self.op.mode == constants.INSTANCE_IMPORT:
6169       src_node = self.op.src_node
6170       src_path = self.op.src_path
6171
6172       if src_node is None:
6173         locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6174         exp_list = self.rpc.call_export_list(locked_nodes)
6175         found = False
6176         for node in exp_list:
6177           if exp_list[node].fail_msg:
6178             continue
6179           if src_path in exp_list[node].payload:
6180             found = True
6181             self.op.src_node = src_node = node
6182             self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6183                                                          src_path)
6184             break
6185         if not found:
6186           raise errors.OpPrereqError("No export found for relative path %s" %
6187                                       src_path, errors.ECODE_INVAL)
6188
6189       _CheckNodeOnline(self, src_node)
6190       result = self.rpc.call_export_info(src_node, src_path)
6191       result.Raise("No export or invalid export found in dir %s" % src_path)
6192
6193       export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6194       if not export_info.has_section(constants.INISECT_EXP):
6195         raise errors.ProgrammerError("Corrupted export config",
6196                                      errors.ECODE_ENVIRON)
6197
6198       ei_version = export_info.get(constants.INISECT_EXP, 'version')
6199       if (int(ei_version) != constants.EXPORT_VERSION):
6200         raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6201                                    (ei_version, constants.EXPORT_VERSION),
6202                                    errors.ECODE_ENVIRON)
6203
6204       # Check that the new instance doesn't have less disks than the export
6205       instance_disks = len(self.disks)
6206       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6207       if instance_disks < export_disks:
6208         raise errors.OpPrereqError("Not enough disks to import."
6209                                    " (instance: %d, export: %d)" %
6210                                    (instance_disks, export_disks),
6211                                    errors.ECODE_INVAL)
6212
6213       self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6214       disk_images = []
6215       for idx in range(export_disks):
6216         option = 'disk%d_dump' % idx
6217         if export_info.has_option(constants.INISECT_INS, option):
6218           # FIXME: are the old os-es, disk sizes, etc. useful?
6219           export_name = export_info.get(constants.INISECT_INS, option)
6220           image = utils.PathJoin(src_path, export_name)
6221           disk_images.append(image)
6222         else:
6223           disk_images.append(False)
6224
6225       self.src_images = disk_images
6226
6227       old_name = export_info.get(constants.INISECT_INS, 'name')
6228       # FIXME: int() here could throw a ValueError on broken exports
6229       exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
6230       if self.op.instance_name == old_name:
6231         for idx, nic in enumerate(self.nics):
6232           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6233             nic_mac_ini = 'nic%d_mac' % idx
6234             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6235
6236     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6237
6238     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6239     if self.op.ip_check:
6240       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6241         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6242                                    (self.check_ip, self.op.instance_name),
6243                                    errors.ECODE_NOTUNIQUE)
6244
6245     #### mac address generation
6246     # By generating here the mac address both the allocator and the hooks get
6247     # the real final mac address rather than the 'auto' or 'generate' value.
6248     # There is a race condition between the generation and the instance object
6249     # creation, which means that we know the mac is valid now, but we're not
6250     # sure it will be when we actually add the instance. If things go bad
6251     # adding the instance will abort because of a duplicate mac, and the
6252     # creation job will fail.
6253     for nic in self.nics:
6254       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6255         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6256
6257     #### allocator run
6258
6259     if self.op.iallocator is not None:
6260       self._RunAllocator()
6261
6262     #### node related checks
6263
6264     # check primary node
6265     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6266     assert self.pnode is not None, \
6267       "Cannot retrieve locked node %s" % self.op.pnode
6268     if pnode.offline:
6269       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6270                                  pnode.name, errors.ECODE_STATE)
6271     if pnode.drained:
6272       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6273                                  pnode.name, errors.ECODE_STATE)
6274
6275     self.secondaries = []
6276
6277     # mirror node verification
6278     if self.op.disk_template in constants.DTS_NET_MIRROR:
6279       if self.op.snode is None:
6280         raise errors.OpPrereqError("The networked disk templates need"
6281                                    " a mirror node", errors.ECODE_INVAL)
6282       if self.op.snode == pnode.name:
6283         raise errors.OpPrereqError("The secondary node cannot be the"
6284                                    " primary node.", errors.ECODE_INVAL)
6285       _CheckNodeOnline(self, self.op.snode)
6286       _CheckNodeNotDrained(self, self.op.snode)
6287       self.secondaries.append(self.op.snode)
6288
6289     nodenames = [pnode.name] + self.secondaries
6290
6291     req_size = _ComputeDiskSize(self.op.disk_template,
6292                                 self.disks)
6293
6294     # Check lv size requirements, if not adopting
6295     if req_size is not None and not self.adopt_disks:
6296       _CheckNodesFreeDisk(self, nodenames, req_size)
6297
6298     if self.adopt_disks: # instead, we must check the adoption data
6299       all_lvs = set([i["adopt"] for i in self.disks])
6300       if len(all_lvs) != len(self.disks):
6301         raise errors.OpPrereqError("Duplicate volume names given for adoption",
6302                                    errors.ECODE_INVAL)
6303       for lv_name in all_lvs:
6304         try:
6305           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6306         except errors.ReservationError:
6307           raise errors.OpPrereqError("LV named %s used by another instance" %
6308                                      lv_name, errors.ECODE_NOTUNIQUE)
6309
6310       node_lvs = self.rpc.call_lv_list([pnode.name],
6311                                        self.cfg.GetVGName())[pnode.name]
6312       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6313       node_lvs = node_lvs.payload
6314       delta = all_lvs.difference(node_lvs.keys())
6315       if delta:
6316         raise errors.OpPrereqError("Missing logical volume(s): %s" %
6317                                    utils.CommaJoin(delta),
6318                                    errors.ECODE_INVAL)
6319       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6320       if online_lvs:
6321         raise errors.OpPrereqError("Online logical volumes found, cannot"
6322                                    " adopt: %s" % utils.CommaJoin(online_lvs),
6323                                    errors.ECODE_STATE)
6324       # update the size of disk based on what is found
6325       for dsk in self.disks:
6326         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6327
6328     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6329
6330     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6331
6332     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6333
6334     # memory check on primary node
6335     if self.op.start:
6336       _CheckNodeFreeMemory(self, self.pnode.name,
6337                            "creating instance %s" % self.op.instance_name,
6338                            self.be_full[constants.BE_MEMORY],
6339                            self.op.hypervisor)
6340
6341     self.dry_run_result = list(nodenames)
6342
6343   def Exec(self, feedback_fn):
6344     """Create and add the instance to the cluster.
6345
6346     """
6347     instance = self.op.instance_name
6348     pnode_name = self.pnode.name
6349
6350     ht_kind = self.op.hypervisor
6351     if ht_kind in constants.HTS_REQ_PORT:
6352       network_port = self.cfg.AllocatePort()
6353     else:
6354       network_port = None
6355
6356     ##if self.op.vnc_bind_address is None:
6357     ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6358
6359     # this is needed because os.path.join does not accept None arguments
6360     if self.op.file_storage_dir is None:
6361       string_file_storage_dir = ""
6362     else:
6363       string_file_storage_dir = self.op.file_storage_dir
6364
6365     # build the full file storage dir path
6366     file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6367                                       string_file_storage_dir, instance)
6368
6369
6370     disks = _GenerateDiskTemplate(self,
6371                                   self.op.disk_template,
6372                                   instance, pnode_name,
6373                                   self.secondaries,
6374                                   self.disks,
6375                                   file_storage_dir,
6376                                   self.op.file_driver,
6377                                   0)
6378
6379     iobj = objects.Instance(name=instance, os=self.op.os_type,
6380                             primary_node=pnode_name,
6381                             nics=self.nics, disks=disks,
6382                             disk_template=self.op.disk_template,
6383                             admin_up=False,
6384                             network_port=network_port,
6385                             beparams=self.op.beparams,
6386                             hvparams=self.op.hvparams,
6387                             hypervisor=self.op.hypervisor,
6388                             )
6389
6390     if self.adopt_disks:
6391       # rename LVs to the newly-generated names; we need to construct
6392       # 'fake' LV disks with the old data, plus the new unique_id
6393       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6394       rename_to = []
6395       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6396         rename_to.append(t_dsk.logical_id)
6397         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6398         self.cfg.SetDiskID(t_dsk, pnode_name)
6399       result = self.rpc.call_blockdev_rename(pnode_name,
6400                                              zip(tmp_disks, rename_to))
6401       result.Raise("Failed to rename adoped LVs")
6402     else:
6403       feedback_fn("* creating instance disks...")
6404       try:
6405         _CreateDisks(self, iobj)
6406       except errors.OpExecError:
6407         self.LogWarning("Device creation failed, reverting...")
6408         try:
6409           _RemoveDisks(self, iobj)
6410         finally:
6411           self.cfg.ReleaseDRBDMinors(instance)
6412           raise
6413
6414     feedback_fn("adding instance %s to cluster config" % instance)
6415
6416     self.cfg.AddInstance(iobj, self.proc.GetECId())
6417
6418     # Declare that we don't want to remove the instance lock anymore, as we've
6419     # added the instance to the config
6420     del self.remove_locks[locking.LEVEL_INSTANCE]
6421     # Unlock all the nodes
6422     if self.op.mode == constants.INSTANCE_IMPORT:
6423       nodes_keep = [self.op.src_node]
6424       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6425                        if node != self.op.src_node]
6426       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6427       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6428     else:
6429       self.context.glm.release(locking.LEVEL_NODE)
6430       del self.acquired_locks[locking.LEVEL_NODE]
6431
6432     if self.op.wait_for_sync:
6433       disk_abort = not _WaitForSync(self, iobj)
6434     elif iobj.disk_template in constants.DTS_NET_MIRROR:
6435       # make sure the disks are not degraded (still sync-ing is ok)
6436       time.sleep(15)
6437       feedback_fn("* checking mirrors status")
6438       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6439     else:
6440       disk_abort = False
6441
6442     if disk_abort:
6443       _RemoveDisks(self, iobj)
6444       self.cfg.RemoveInstance(iobj.name)
6445       # Make sure the instance lock gets removed
6446       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6447       raise errors.OpExecError("There are some degraded disks for"
6448                                " this instance")
6449
6450     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6451       if self.op.mode == constants.INSTANCE_CREATE:
6452         if not self.op.no_install:
6453           feedback_fn("* running the instance OS create scripts...")
6454           # FIXME: pass debug option from opcode to backend
6455           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6456                                                  self.op.debug_level)
6457           result.Raise("Could not add os for instance %s"
6458                        " on node %s" % (instance, pnode_name))
6459
6460       elif self.op.mode == constants.INSTANCE_IMPORT:
6461         feedback_fn("* running the instance OS import scripts...")
6462         src_node = self.op.src_node
6463         src_images = self.src_images
6464         cluster_name = self.cfg.GetClusterName()
6465         # FIXME: pass debug option from opcode to backend
6466         import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6467                                                          src_node, src_images,
6468                                                          cluster_name,
6469                                                          self.op.debug_level)
6470         msg = import_result.fail_msg
6471         if msg:
6472           self.LogWarning("Error while importing the disk images for instance"
6473                           " %s on node %s: %s" % (instance, pnode_name, msg))
6474       else:
6475         # also checked in the prereq part
6476         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6477                                      % self.op.mode)
6478
6479     if self.op.start:
6480       iobj.admin_up = True
6481       self.cfg.Update(iobj, feedback_fn)
6482       logging.info("Starting instance %s on node %s", instance, pnode_name)
6483       feedback_fn("* starting instance...")
6484       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6485       result.Raise("Could not start instance")
6486
6487     return list(iobj.all_nodes)
6488
6489
6490 class LUConnectConsole(NoHooksLU):
6491   """Connect to an instance's console.
6492
6493   This is somewhat special in that it returns the command line that
6494   you need to run on the master node in order to connect to the
6495   console.
6496
6497   """
6498   _OP_REQP = ["instance_name"]
6499   REQ_BGL = False
6500
6501   def ExpandNames(self):
6502     self._ExpandAndLockInstance()
6503
6504   def CheckPrereq(self):
6505     """Check prerequisites.
6506
6507     This checks that the instance is in the cluster.
6508
6509     """
6510     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6511     assert self.instance is not None, \
6512       "Cannot retrieve locked instance %s" % self.op.instance_name
6513     _CheckNodeOnline(self, self.instance.primary_node)
6514
6515   def Exec(self, feedback_fn):
6516     """Connect to the console of an instance
6517
6518     """
6519     instance = self.instance
6520     node = instance.primary_node
6521
6522     node_insts = self.rpc.call_instance_list([node],
6523                                              [instance.hypervisor])[node]
6524     node_insts.Raise("Can't get node information from %s" % node)
6525
6526     if instance.name not in node_insts.payload:
6527       raise errors.OpExecError("Instance %s is not running." % instance.name)
6528
6529     logging.debug("Connecting to console of %s on %s", instance.name, node)
6530
6531     hyper = hypervisor.GetHypervisor(instance.hypervisor)
6532     cluster = self.cfg.GetClusterInfo()
6533     # beparams and hvparams are passed separately, to avoid editing the
6534     # instance and then saving the defaults in the instance itself.
6535     hvparams = cluster.FillHV(instance)
6536     beparams = cluster.FillBE(instance)
6537     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6538
6539     # build ssh cmdline
6540     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6541
6542
6543 class LUReplaceDisks(LogicalUnit):
6544   """Replace the disks of an instance.
6545
6546   """
6547   HPATH = "mirrors-replace"
6548   HTYPE = constants.HTYPE_INSTANCE
6549   _OP_REQP = ["instance_name", "mode", "disks"]
6550   REQ_BGL = False
6551
6552   def CheckArguments(self):
6553     if not hasattr(self.op, "remote_node"):
6554       self.op.remote_node = None
6555     if not hasattr(self.op, "iallocator"):
6556       self.op.iallocator = None
6557     if not hasattr(self.op, "early_release"):
6558       self.op.early_release = False
6559
6560     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6561                                   self.op.iallocator)
6562
6563   def ExpandNames(self):
6564     self._ExpandAndLockInstance()
6565
6566     if self.op.iallocator is not None:
6567       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6568
6569     elif self.op.remote_node is not None:
6570       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6571       self.op.remote_node = remote_node
6572
6573       # Warning: do not remove the locking of the new secondary here
6574       # unless DRBD8.AddChildren is changed to work in parallel;
6575       # currently it doesn't since parallel invocations of
6576       # FindUnusedMinor will conflict
6577       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6578       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6579
6580     else:
6581       self.needed_locks[locking.LEVEL_NODE] = []
6582       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6583
6584     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6585                                    self.op.iallocator, self.op.remote_node,
6586                                    self.op.disks, False, self.op.early_release)
6587
6588     self.tasklets = [self.replacer]
6589
6590   def DeclareLocks(self, level):
6591     # If we're not already locking all nodes in the set we have to declare the
6592     # instance's primary/secondary nodes.
6593     if (level == locking.LEVEL_NODE and
6594         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6595       self._LockInstancesNodes()
6596
6597   def BuildHooksEnv(self):
6598     """Build hooks env.
6599
6600     This runs on the master, the primary and all the secondaries.
6601
6602     """
6603     instance = self.replacer.instance
6604     env = {
6605       "MODE": self.op.mode,
6606       "NEW_SECONDARY": self.op.remote_node,
6607       "OLD_SECONDARY": instance.secondary_nodes[0],
6608       }
6609     env.update(_BuildInstanceHookEnvByObject(self, instance))
6610     nl = [
6611       self.cfg.GetMasterNode(),
6612       instance.primary_node,
6613       ]
6614     if self.op.remote_node is not None:
6615       nl.append(self.op.remote_node)
6616     return env, nl, nl
6617
6618
6619 class LUEvacuateNode(LogicalUnit):
6620   """Relocate the secondary instances from a node.
6621
6622   """
6623   HPATH = "node-evacuate"
6624   HTYPE = constants.HTYPE_NODE
6625   _OP_REQP = ["node_name"]
6626   REQ_BGL = False
6627
6628   def CheckArguments(self):
6629     if not hasattr(self.op, "remote_node"):
6630       self.op.remote_node = None
6631     if not hasattr(self.op, "iallocator"):
6632       self.op.iallocator = None
6633     if not hasattr(self.op, "early_release"):
6634       self.op.early_release = False
6635
6636     TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6637                                   self.op.remote_node,
6638                                   self.op.iallocator)
6639
6640   def ExpandNames(self):
6641     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6642
6643     self.needed_locks = {}
6644
6645     # Declare node locks
6646     if self.op.iallocator is not None:
6647       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6648
6649     elif self.op.remote_node is not None:
6650       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6651
6652       # Warning: do not remove the locking of the new secondary here
6653       # unless DRBD8.AddChildren is changed to work in parallel;
6654       # currently it doesn't since parallel invocations of
6655       # FindUnusedMinor will conflict
6656       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6657       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6658
6659     else:
6660       raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6661
6662     # Create tasklets for replacing disks for all secondary instances on this
6663     # node
6664     names = []
6665     tasklets = []
6666
6667     for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6668       logging.debug("Replacing disks for instance %s", inst.name)
6669       names.append(inst.name)
6670
6671       replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6672                                 self.op.iallocator, self.op.remote_node, [],
6673                                 True, self.op.early_release)
6674       tasklets.append(replacer)
6675
6676     self.tasklets = tasklets
6677     self.instance_names = names
6678
6679     # Declare instance locks
6680     self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6681
6682   def DeclareLocks(self, level):
6683     # If we're not already locking all nodes in the set we have to declare the
6684     # instance's primary/secondary nodes.
6685     if (level == locking.LEVEL_NODE and
6686         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6687       self._LockInstancesNodes()
6688
6689   def BuildHooksEnv(self):
6690     """Build hooks env.
6691
6692     This runs on the master, the primary and all the secondaries.
6693
6694     """
6695     env = {
6696       "NODE_NAME": self.op.node_name,
6697       }
6698
6699     nl = [self.cfg.GetMasterNode()]
6700
6701     if self.op.remote_node is not None:
6702       env["NEW_SECONDARY"] = self.op.remote_node
6703       nl.append(self.op.remote_node)
6704
6705     return (env, nl, nl)
6706
6707
6708 class TLReplaceDisks(Tasklet):
6709   """Replaces disks for an instance.
6710
6711   Note: Locking is not within the scope of this class.
6712
6713   """
6714   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6715                disks, delay_iallocator, early_release):
6716     """Initializes this class.
6717
6718     """
6719     Tasklet.__init__(self, lu)
6720
6721     # Parameters
6722     self.instance_name = instance_name
6723     self.mode = mode
6724     self.iallocator_name = iallocator_name
6725     self.remote_node = remote_node
6726     self.disks = disks
6727     self.delay_iallocator = delay_iallocator
6728     self.early_release = early_release
6729
6730     # Runtime data
6731     self.instance = None
6732     self.new_node = None
6733     self.target_node = None
6734     self.other_node = None
6735     self.remote_node_info = None
6736     self.node_secondary_ip = None
6737
6738   @staticmethod
6739   def CheckArguments(mode, remote_node, iallocator):
6740     """Helper function for users of this class.
6741
6742     """
6743     # check for valid parameter combination
6744     if mode == constants.REPLACE_DISK_CHG:
6745       if remote_node is None and iallocator is None:
6746         raise errors.OpPrereqError("When changing the secondary either an"
6747                                    " iallocator script must be used or the"
6748                                    " new node given", errors.ECODE_INVAL)
6749
6750       if remote_node is not None and iallocator is not None:
6751         raise errors.OpPrereqError("Give either the iallocator or the new"
6752                                    " secondary, not both", errors.ECODE_INVAL)
6753
6754     elif remote_node is not None or iallocator is not None:
6755       # Not replacing the secondary
6756       raise errors.OpPrereqError("The iallocator and new node options can"
6757                                  " only be used when changing the"
6758                                  " secondary node", errors.ECODE_INVAL)
6759
6760   @staticmethod
6761   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6762     """Compute a new secondary node using an IAllocator.
6763
6764     """
6765     ial = IAllocator(lu.cfg, lu.rpc,
6766                      mode=constants.IALLOCATOR_MODE_RELOC,
6767                      name=instance_name,
6768                      relocate_from=relocate_from)
6769
6770     ial.Run(iallocator_name)
6771
6772     if not ial.success:
6773       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6774                                  " %s" % (iallocator_name, ial.info),
6775                                  errors.ECODE_NORES)
6776
6777     if len(ial.result) != ial.required_nodes:
6778       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6779                                  " of nodes (%s), required %s" %
6780                                  (iallocator_name,
6781                                   len(ial.result), ial.required_nodes),
6782                                  errors.ECODE_FAULT)
6783
6784     remote_node_name = ial.result[0]
6785
6786     lu.LogInfo("Selected new secondary for instance '%s': %s",
6787                instance_name, remote_node_name)
6788
6789     return remote_node_name
6790
6791   def _FindFaultyDisks(self, node_name):
6792     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6793                                     node_name, True)
6794
6795   def CheckPrereq(self):
6796     """Check prerequisites.
6797
6798     This checks that the instance is in the cluster.
6799
6800     """
6801     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6802     assert instance is not None, \
6803       "Cannot retrieve locked instance %s" % self.instance_name
6804
6805     if instance.disk_template != constants.DT_DRBD8:
6806       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6807                                  " instances", errors.ECODE_INVAL)
6808
6809     if len(instance.secondary_nodes) != 1:
6810       raise errors.OpPrereqError("The instance has a strange layout,"
6811                                  " expected one secondary but found %d" %
6812                                  len(instance.secondary_nodes),
6813                                  errors.ECODE_FAULT)
6814
6815     if not self.delay_iallocator:
6816       self._CheckPrereq2()
6817
6818   def _CheckPrereq2(self):
6819     """Check prerequisites, second part.
6820
6821     This function should always be part of CheckPrereq. It was separated and is
6822     now called from Exec because during node evacuation iallocator was only
6823     called with an unmodified cluster model, not taking planned changes into
6824     account.
6825
6826     """
6827     instance = self.instance
6828     secondary_node = instance.secondary_nodes[0]
6829
6830     if self.iallocator_name is None:
6831       remote_node = self.remote_node
6832     else:
6833       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6834                                        instance.name, instance.secondary_nodes)
6835
6836     if remote_node is not None:
6837       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6838       assert self.remote_node_info is not None, \
6839         "Cannot retrieve locked node %s" % remote_node
6840     else:
6841       self.remote_node_info = None
6842
6843     if remote_node == self.instance.primary_node:
6844       raise errors.OpPrereqError("The specified node is the primary node of"
6845                                  " the instance.", errors.ECODE_INVAL)
6846
6847     if remote_node == secondary_node:
6848       raise errors.OpPrereqError("The specified node is already the"
6849                                  " secondary node of the instance.",
6850                                  errors.ECODE_INVAL)
6851
6852     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6853                                     constants.REPLACE_DISK_CHG):
6854       raise errors.OpPrereqError("Cannot specify disks to be replaced",
6855                                  errors.ECODE_INVAL)
6856
6857     if self.mode == constants.REPLACE_DISK_AUTO:
6858       faulty_primary = self._FindFaultyDisks(instance.primary_node)
6859       faulty_secondary = self._FindFaultyDisks(secondary_node)
6860
6861       if faulty_primary and faulty_secondary:
6862         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6863                                    " one node and can not be repaired"
6864                                    " automatically" % self.instance_name,
6865                                    errors.ECODE_STATE)
6866
6867       if faulty_primary:
6868         self.disks = faulty_primary
6869         self.target_node = instance.primary_node
6870         self.other_node = secondary_node
6871         check_nodes = [self.target_node, self.other_node]
6872       elif faulty_secondary:
6873         self.disks = faulty_secondary
6874         self.target_node = secondary_node
6875         self.other_node = instance.primary_node
6876         check_nodes = [self.target_node, self.other_node]
6877       else:
6878         self.disks = []
6879         check_nodes = []
6880
6881     else:
6882       # Non-automatic modes
6883       if self.mode == constants.REPLACE_DISK_PRI:
6884         self.target_node = instance.primary_node
6885         self.other_node = secondary_node
6886         check_nodes = [self.target_node, self.other_node]
6887
6888       elif self.mode == constants.REPLACE_DISK_SEC:
6889         self.target_node = secondary_node
6890         self.other_node = instance.primary_node
6891         check_nodes = [self.target_node, self.other_node]
6892
6893       elif self.mode == constants.REPLACE_DISK_CHG:
6894         self.new_node = remote_node
6895         self.other_node = instance.primary_node
6896         self.target_node = secondary_node
6897         check_nodes = [self.new_node, self.other_node]
6898
6899         _CheckNodeNotDrained(self.lu, remote_node)
6900
6901         old_node_info = self.cfg.GetNodeInfo(secondary_node)
6902         assert old_node_info is not None
6903         if old_node_info.offline and not self.early_release:
6904           # doesn't make sense to delay the release
6905           self.early_release = True
6906           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
6907                           " early-release mode", secondary_node)
6908
6909       else:
6910         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6911                                      self.mode)
6912
6913       # If not specified all disks should be replaced
6914       if not self.disks:
6915         self.disks = range(len(self.instance.disks))
6916
6917     for node in check_nodes:
6918       _CheckNodeOnline(self.lu, node)
6919
6920     # Check whether disks are valid
6921     for disk_idx in self.disks:
6922       instance.FindDisk(disk_idx)
6923
6924     # Get secondary node IP addresses
6925     node_2nd_ip = {}
6926
6927     for node_name in [self.target_node, self.other_node, self.new_node]:
6928       if node_name is not None:
6929         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6930
6931     self.node_secondary_ip = node_2nd_ip
6932
6933   def Exec(self, feedback_fn):
6934     """Execute disk replacement.
6935
6936     This dispatches the disk replacement to the appropriate handler.
6937
6938     """
6939     if self.delay_iallocator:
6940       self._CheckPrereq2()
6941
6942     if not self.disks:
6943       feedback_fn("No disks need replacement")
6944       return
6945
6946     feedback_fn("Replacing disk(s) %s for %s" %
6947                 (utils.CommaJoin(self.disks), self.instance.name))
6948
6949     activate_disks = (not self.instance.admin_up)
6950
6951     # Activate the instance disks if we're replacing them on a down instance
6952     if activate_disks:
6953       _StartInstanceDisks(self.lu, self.instance, True)
6954
6955     try:
6956       # Should we replace the secondary node?
6957       if self.new_node is not None:
6958         fn = self._ExecDrbd8Secondary
6959       else:
6960         fn = self._ExecDrbd8DiskOnly
6961
6962       return fn(feedback_fn)
6963
6964     finally:
6965       # Deactivate the instance disks if we're replacing them on a
6966       # down instance
6967       if activate_disks:
6968         _SafeShutdownInstanceDisks(self.lu, self.instance)
6969
6970   def _CheckVolumeGroup(self, nodes):
6971     self.lu.LogInfo("Checking volume groups")
6972
6973     vgname = self.cfg.GetVGName()
6974
6975     # Make sure volume group exists on all involved nodes
6976     results = self.rpc.call_vg_list(nodes)
6977     if not results:
6978       raise errors.OpExecError("Can't list volume groups on the nodes")
6979
6980     for node in nodes:
6981       res = results[node]
6982       res.Raise("Error checking node %s" % node)
6983       if vgname not in res.payload:
6984         raise errors.OpExecError("Volume group '%s' not found on node %s" %
6985                                  (vgname, node))
6986
6987   def _CheckDisksExistence(self, nodes):
6988     # Check disk existence
6989     for idx, dev in enumerate(self.instance.disks):
6990       if idx not in self.disks:
6991         continue
6992
6993       for node in nodes:
6994         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6995         self.cfg.SetDiskID(dev, node)
6996
6997         result = self.rpc.call_blockdev_find(node, dev)
6998
6999         msg = result.fail_msg
7000         if msg or not result.payload:
7001           if not msg:
7002             msg = "disk not found"
7003           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7004                                    (idx, node, msg))
7005
7006   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7007     for idx, dev in enumerate(self.instance.disks):
7008       if idx not in self.disks:
7009         continue
7010
7011       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7012                       (idx, node_name))
7013
7014       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7015                                    ldisk=ldisk):
7016         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7017                                  " replace disks for instance %s" %
7018                                  (node_name, self.instance.name))
7019
7020   def _CreateNewStorage(self, node_name):
7021     vgname = self.cfg.GetVGName()
7022     iv_names = {}
7023
7024     for idx, dev in enumerate(self.instance.disks):
7025       if idx not in self.disks:
7026         continue
7027
7028       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7029
7030       self.cfg.SetDiskID(dev, node_name)
7031
7032       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7033       names = _GenerateUniqueNames(self.lu, lv_names)
7034
7035       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7036                              logical_id=(vgname, names[0]))
7037       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7038                              logical_id=(vgname, names[1]))
7039
7040       new_lvs = [lv_data, lv_meta]
7041       old_lvs = dev.children
7042       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7043
7044       # we pass force_create=True to force the LVM creation
7045       for new_lv in new_lvs:
7046         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7047                         _GetInstanceInfoText(self.instance), False)
7048
7049     return iv_names
7050
7051   def _CheckDevices(self, node_name, iv_names):
7052     for name, (dev, _, _) in iv_names.iteritems():
7053       self.cfg.SetDiskID(dev, node_name)
7054
7055       result = self.rpc.call_blockdev_find(node_name, dev)
7056
7057       msg = result.fail_msg
7058       if msg or not result.payload:
7059         if not msg:
7060           msg = "disk not found"
7061         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7062                                  (name, msg))
7063
7064       if result.payload.is_degraded:
7065         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7066
7067   def _RemoveOldStorage(self, node_name, iv_names):
7068     for name, (_, old_lvs, _) in iv_names.iteritems():
7069       self.lu.LogInfo("Remove logical volumes for %s" % name)
7070
7071       for lv in old_lvs:
7072         self.cfg.SetDiskID(lv, node_name)
7073
7074         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7075         if msg:
7076           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7077                              hint="remove unused LVs manually")
7078
7079   def _ReleaseNodeLock(self, node_name):
7080     """Releases the lock for a given node."""
7081     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7082
7083   def _ExecDrbd8DiskOnly(self, feedback_fn):
7084     """Replace a disk on the primary or secondary for DRBD 8.
7085
7086     The algorithm for replace is quite complicated:
7087
7088       1. for each disk to be replaced:
7089
7090         1. create new LVs on the target node with unique names
7091         1. detach old LVs from the drbd device
7092         1. rename old LVs to name_replaced.<time_t>
7093         1. rename new LVs to old LVs
7094         1. attach the new LVs (with the old names now) to the drbd device
7095
7096       1. wait for sync across all devices
7097
7098       1. for each modified disk:
7099
7100         1. remove old LVs (which have the name name_replaces.<time_t>)
7101
7102     Failures are not very well handled.
7103
7104     """
7105     steps_total = 6
7106
7107     # Step: check device activation
7108     self.lu.LogStep(1, steps_total, "Check device existence")
7109     self._CheckDisksExistence([self.other_node, self.target_node])
7110     self._CheckVolumeGroup([self.target_node, self.other_node])
7111
7112     # Step: check other node consistency
7113     self.lu.LogStep(2, steps_total, "Check peer consistency")
7114     self._CheckDisksConsistency(self.other_node,
7115                                 self.other_node == self.instance.primary_node,
7116                                 False)
7117
7118     # Step: create new storage
7119     self.lu.LogStep(3, steps_total, "Allocate new storage")
7120     iv_names = self._CreateNewStorage(self.target_node)
7121
7122     # Step: for each lv, detach+rename*2+attach
7123     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7124     for dev, old_lvs, new_lvs in iv_names.itervalues():
7125       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7126
7127       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7128                                                      old_lvs)
7129       result.Raise("Can't detach drbd from local storage on node"
7130                    " %s for device %s" % (self.target_node, dev.iv_name))
7131       #dev.children = []
7132       #cfg.Update(instance)
7133
7134       # ok, we created the new LVs, so now we know we have the needed
7135       # storage; as such, we proceed on the target node to rename
7136       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7137       # using the assumption that logical_id == physical_id (which in
7138       # turn is the unique_id on that node)
7139
7140       # FIXME(iustin): use a better name for the replaced LVs
7141       temp_suffix = int(time.time())
7142       ren_fn = lambda d, suff: (d.physical_id[0],
7143                                 d.physical_id[1] + "_replaced-%s" % suff)
7144
7145       # Build the rename list based on what LVs exist on the node
7146       rename_old_to_new = []
7147       for to_ren in old_lvs:
7148         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7149         if not result.fail_msg and result.payload:
7150           # device exists
7151           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7152
7153       self.lu.LogInfo("Renaming the old LVs on the target node")
7154       result = self.rpc.call_blockdev_rename(self.target_node,
7155                                              rename_old_to_new)
7156       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7157
7158       # Now we rename the new LVs to the old LVs
7159       self.lu.LogInfo("Renaming the new LVs on the target node")
7160       rename_new_to_old = [(new, old.physical_id)
7161                            for old, new in zip(old_lvs, new_lvs)]
7162       result = self.rpc.call_blockdev_rename(self.target_node,
7163                                              rename_new_to_old)
7164       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7165
7166       for old, new in zip(old_lvs, new_lvs):
7167         new.logical_id = old.logical_id
7168         self.cfg.SetDiskID(new, self.target_node)
7169
7170       for disk in old_lvs:
7171         disk.logical_id = ren_fn(disk, temp_suffix)
7172         self.cfg.SetDiskID(disk, self.target_node)
7173
7174       # Now that the new lvs have the old name, we can add them to the device
7175       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7176       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7177                                                   new_lvs)
7178       msg = result.fail_msg
7179       if msg:
7180         for new_lv in new_lvs:
7181           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7182                                                new_lv).fail_msg
7183           if msg2:
7184             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7185                                hint=("cleanup manually the unused logical"
7186                                      "volumes"))
7187         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7188
7189       dev.children = new_lvs
7190
7191       self.cfg.Update(self.instance, feedback_fn)
7192
7193     cstep = 5
7194     if self.early_release:
7195       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7196       cstep += 1
7197       self._RemoveOldStorage(self.target_node, iv_names)
7198       # WARNING: we release both node locks here, do not do other RPCs
7199       # than WaitForSync to the primary node
7200       self._ReleaseNodeLock([self.target_node, self.other_node])
7201
7202     # Wait for sync
7203     # This can fail as the old devices are degraded and _WaitForSync
7204     # does a combined result over all disks, so we don't check its return value
7205     self.lu.LogStep(cstep, steps_total, "Sync devices")
7206     cstep += 1
7207     _WaitForSync(self.lu, self.instance)
7208
7209     # Check all devices manually
7210     self._CheckDevices(self.instance.primary_node, iv_names)
7211
7212     # Step: remove old storage
7213     if not self.early_release:
7214       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7215       cstep += 1
7216       self._RemoveOldStorage(self.target_node, iv_names)
7217
7218   def _ExecDrbd8Secondary(self, feedback_fn):
7219     """Replace the secondary node for DRBD 8.
7220
7221     The algorithm for replace is quite complicated:
7222       - for all disks of the instance:
7223         - create new LVs on the new node with same names
7224         - shutdown the drbd device on the old secondary
7225         - disconnect the drbd network on the primary
7226         - create the drbd device on the new secondary
7227         - network attach the drbd on the primary, using an artifice:
7228           the drbd code for Attach() will connect to the network if it
7229           finds a device which is connected to the good local disks but
7230           not network enabled
7231       - wait for sync across all devices
7232       - remove all disks from the old secondary
7233
7234     Failures are not very well handled.
7235
7236     """
7237     steps_total = 6
7238
7239     # Step: check device activation
7240     self.lu.LogStep(1, steps_total, "Check device existence")
7241     self._CheckDisksExistence([self.instance.primary_node])
7242     self._CheckVolumeGroup([self.instance.primary_node])
7243
7244     # Step: check other node consistency
7245     self.lu.LogStep(2, steps_total, "Check peer consistency")
7246     self._CheckDisksConsistency(self.instance.primary_node, True, True)
7247
7248     # Step: create new storage
7249     self.lu.LogStep(3, steps_total, "Allocate new storage")
7250     for idx, dev in enumerate(self.instance.disks):
7251       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7252                       (self.new_node, idx))
7253       # we pass force_create=True to force LVM creation
7254       for new_lv in dev.children:
7255         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7256                         _GetInstanceInfoText(self.instance), False)
7257
7258     # Step 4: dbrd minors and drbd setups changes
7259     # after this, we must manually remove the drbd minors on both the
7260     # error and the success paths
7261     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7262     minors = self.cfg.AllocateDRBDMinor([self.new_node
7263                                          for dev in self.instance.disks],
7264                                         self.instance.name)
7265     logging.debug("Allocated minors %r", minors)
7266
7267     iv_names = {}
7268     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7269       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7270                       (self.new_node, idx))
7271       # create new devices on new_node; note that we create two IDs:
7272       # one without port, so the drbd will be activated without
7273       # networking information on the new node at this stage, and one
7274       # with network, for the latter activation in step 4
7275       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7276       if self.instance.primary_node == o_node1:
7277         p_minor = o_minor1
7278       else:
7279         assert self.instance.primary_node == o_node2, "Three-node instance?"
7280         p_minor = o_minor2
7281
7282       new_alone_id = (self.instance.primary_node, self.new_node, None,
7283                       p_minor, new_minor, o_secret)
7284       new_net_id = (self.instance.primary_node, self.new_node, o_port,
7285                     p_minor, new_minor, o_secret)
7286
7287       iv_names[idx] = (dev, dev.children, new_net_id)
7288       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7289                     new_net_id)
7290       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7291                               logical_id=new_alone_id,
7292                               children=dev.children,
7293                               size=dev.size)
7294       try:
7295         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7296                               _GetInstanceInfoText(self.instance), False)
7297       except errors.GenericError:
7298         self.cfg.ReleaseDRBDMinors(self.instance.name)
7299         raise
7300
7301     # We have new devices, shutdown the drbd on the old secondary
7302     for idx, dev in enumerate(self.instance.disks):
7303       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7304       self.cfg.SetDiskID(dev, self.target_node)
7305       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7306       if msg:
7307         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7308                            "node: %s" % (idx, msg),
7309                            hint=("Please cleanup this device manually as"
7310                                  " soon as possible"))
7311
7312     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7313     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7314                                                self.node_secondary_ip,
7315                                                self.instance.disks)\
7316                                               [self.instance.primary_node]
7317
7318     msg = result.fail_msg
7319     if msg:
7320       # detaches didn't succeed (unlikely)
7321       self.cfg.ReleaseDRBDMinors(self.instance.name)
7322       raise errors.OpExecError("Can't detach the disks from the network on"
7323                                " old node: %s" % (msg,))
7324
7325     # if we managed to detach at least one, we update all the disks of
7326     # the instance to point to the new secondary
7327     self.lu.LogInfo("Updating instance configuration")
7328     for dev, _, new_logical_id in iv_names.itervalues():
7329       dev.logical_id = new_logical_id
7330       self.cfg.SetDiskID(dev, self.instance.primary_node)
7331
7332     self.cfg.Update(self.instance, feedback_fn)
7333
7334     # and now perform the drbd attach
7335     self.lu.LogInfo("Attaching primary drbds to new secondary"
7336                     " (standalone => connected)")
7337     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7338                                             self.new_node],
7339                                            self.node_secondary_ip,
7340                                            self.instance.disks,
7341                                            self.instance.name,
7342                                            False)
7343     for to_node, to_result in result.items():
7344       msg = to_result.fail_msg
7345       if msg:
7346         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7347                            to_node, msg,
7348                            hint=("please do a gnt-instance info to see the"
7349                                  " status of disks"))
7350     cstep = 5
7351     if self.early_release:
7352       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7353       cstep += 1
7354       self._RemoveOldStorage(self.target_node, iv_names)
7355       # WARNING: we release all node locks here, do not do other RPCs
7356       # than WaitForSync to the primary node
7357       self._ReleaseNodeLock([self.instance.primary_node,
7358                              self.target_node,
7359                              self.new_node])
7360
7361     # Wait for sync
7362     # This can fail as the old devices are degraded and _WaitForSync
7363     # does a combined result over all disks, so we don't check its return value
7364     self.lu.LogStep(cstep, steps_total, "Sync devices")
7365     cstep += 1
7366     _WaitForSync(self.lu, self.instance)
7367
7368     # Check all devices manually
7369     self._CheckDevices(self.instance.primary_node, iv_names)
7370
7371     # Step: remove old storage
7372     if not self.early_release:
7373       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7374       self._RemoveOldStorage(self.target_node, iv_names)
7375
7376
7377 class LURepairNodeStorage(NoHooksLU):
7378   """Repairs the volume group on a node.
7379
7380   """
7381   _OP_REQP = ["node_name"]
7382   REQ_BGL = False
7383
7384   def CheckArguments(self):
7385     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7386
7387   def ExpandNames(self):
7388     self.needed_locks = {
7389       locking.LEVEL_NODE: [self.op.node_name],
7390       }
7391
7392   def _CheckFaultyDisks(self, instance, node_name):
7393     """Ensure faulty disks abort the opcode or at least warn."""
7394     try:
7395       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7396                                   node_name, True):
7397         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7398                                    " node '%s'" % (instance.name, node_name),
7399                                    errors.ECODE_STATE)
7400     except errors.OpPrereqError, err:
7401       if self.op.ignore_consistency:
7402         self.proc.LogWarning(str(err.args[0]))
7403       else:
7404         raise
7405
7406   def CheckPrereq(self):
7407     """Check prerequisites.
7408
7409     """
7410     storage_type = self.op.storage_type
7411
7412     if (constants.SO_FIX_CONSISTENCY not in
7413         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7414       raise errors.OpPrereqError("Storage units of type '%s' can not be"
7415                                  " repaired" % storage_type,
7416                                  errors.ECODE_INVAL)
7417
7418     # Check whether any instance on this node has faulty disks
7419     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7420       if not inst.admin_up:
7421         continue
7422       check_nodes = set(inst.all_nodes)
7423       check_nodes.discard(self.op.node_name)
7424       for inst_node_name in check_nodes:
7425         self._CheckFaultyDisks(inst, inst_node_name)
7426
7427   def Exec(self, feedback_fn):
7428     feedback_fn("Repairing storage unit '%s' on %s ..." %
7429                 (self.op.name, self.op.node_name))
7430
7431     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7432     result = self.rpc.call_storage_execute(self.op.node_name,
7433                                            self.op.storage_type, st_args,
7434                                            self.op.name,
7435                                            constants.SO_FIX_CONSISTENCY)
7436     result.Raise("Failed to repair storage unit '%s' on %s" %
7437                  (self.op.name, self.op.node_name))
7438
7439
7440 class LUNodeEvacuationStrategy(NoHooksLU):
7441   """Computes the node evacuation strategy.
7442
7443   """
7444   _OP_REQP = ["nodes"]
7445   REQ_BGL = False
7446
7447   def CheckArguments(self):
7448     if not hasattr(self.op, "remote_node"):
7449       self.op.remote_node = None
7450     if not hasattr(self.op, "iallocator"):
7451       self.op.iallocator = None
7452     if self.op.remote_node is not None and self.op.iallocator is not None:
7453       raise errors.OpPrereqError("Give either the iallocator or the new"
7454                                  " secondary, not both", errors.ECODE_INVAL)
7455
7456   def ExpandNames(self):
7457     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7458     self.needed_locks = locks = {}
7459     if self.op.remote_node is None:
7460       locks[locking.LEVEL_NODE] = locking.ALL_SET
7461     else:
7462       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7463       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7464
7465   def CheckPrereq(self):
7466     pass
7467
7468   def Exec(self, feedback_fn):
7469     if self.op.remote_node is not None:
7470       instances = []
7471       for node in self.op.nodes:
7472         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7473       result = []
7474       for i in instances:
7475         if i.primary_node == self.op.remote_node:
7476           raise errors.OpPrereqError("Node %s is the primary node of"
7477                                      " instance %s, cannot use it as"
7478                                      " secondary" %
7479                                      (self.op.remote_node, i.name),
7480                                      errors.ECODE_INVAL)
7481         result.append([i.name, self.op.remote_node])
7482     else:
7483       ial = IAllocator(self.cfg, self.rpc,
7484                        mode=constants.IALLOCATOR_MODE_MEVAC,
7485                        evac_nodes=self.op.nodes)
7486       ial.Run(self.op.iallocator, validate=True)
7487       if not ial.success:
7488         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7489                                  errors.ECODE_NORES)
7490       result = ial.result
7491     return result
7492
7493
7494 class LUGrowDisk(LogicalUnit):
7495   """Grow a disk of an instance.
7496
7497   """
7498   HPATH = "disk-grow"
7499   HTYPE = constants.HTYPE_INSTANCE
7500   _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7501   REQ_BGL = False
7502
7503   def ExpandNames(self):
7504     self._ExpandAndLockInstance()
7505     self.needed_locks[locking.LEVEL_NODE] = []
7506     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7507
7508   def DeclareLocks(self, level):
7509     if level == locking.LEVEL_NODE:
7510       self._LockInstancesNodes()
7511
7512   def BuildHooksEnv(self):
7513     """Build hooks env.
7514
7515     This runs on the master, the primary and all the secondaries.
7516
7517     """
7518     env = {
7519       "DISK": self.op.disk,
7520       "AMOUNT": self.op.amount,
7521       }
7522     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7523     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7524     return env, nl, nl
7525
7526   def CheckPrereq(self):
7527     """Check prerequisites.
7528
7529     This checks that the instance is in the cluster.
7530
7531     """
7532     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7533     assert instance is not None, \
7534       "Cannot retrieve locked instance %s" % self.op.instance_name
7535     nodenames = list(instance.all_nodes)
7536     for node in nodenames:
7537       _CheckNodeOnline(self, node)
7538
7539
7540     self.instance = instance
7541
7542     if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
7543       raise errors.OpPrereqError("Instance's disk layout does not support"
7544                                  " growing.", errors.ECODE_INVAL)
7545
7546     self.disk = instance.FindDisk(self.op.disk)
7547
7548     _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7549
7550   def Exec(self, feedback_fn):
7551     """Execute disk grow.
7552
7553     """
7554     instance = self.instance
7555     disk = self.disk
7556     for node in instance.all_nodes:
7557       self.cfg.SetDiskID(disk, node)
7558       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7559       result.Raise("Grow request failed to node %s" % node)
7560
7561       # TODO: Rewrite code to work properly
7562       # DRBD goes into sync mode for a short amount of time after executing the
7563       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7564       # calling "resize" in sync mode fails. Sleeping for a short amount of
7565       # time is a work-around.
7566       time.sleep(5)
7567
7568     disk.RecordGrow(self.op.amount)
7569     self.cfg.Update(instance, feedback_fn)
7570     if self.op.wait_for_sync:
7571       disk_abort = not _WaitForSync(self, instance)
7572       if disk_abort:
7573         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7574                              " status.\nPlease check the instance.")
7575
7576
7577 class LUQueryInstanceData(NoHooksLU):
7578   """Query runtime instance data.
7579
7580   """
7581   _OP_REQP = ["instances", "static"]
7582   REQ_BGL = False
7583
7584   def ExpandNames(self):
7585     self.needed_locks = {}
7586     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7587
7588     if not isinstance(self.op.instances, list):
7589       raise errors.OpPrereqError("Invalid argument type 'instances'",
7590                                  errors.ECODE_INVAL)
7591
7592     if self.op.instances:
7593       self.wanted_names = []
7594       for name in self.op.instances:
7595         full_name = _ExpandInstanceName(self.cfg, name)
7596         self.wanted_names.append(full_name)
7597       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7598     else:
7599       self.wanted_names = None
7600       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7601
7602     self.needed_locks[locking.LEVEL_NODE] = []
7603     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7604
7605   def DeclareLocks(self, level):
7606     if level == locking.LEVEL_NODE:
7607       self._LockInstancesNodes()
7608
7609   def CheckPrereq(self):
7610     """Check prerequisites.
7611
7612     This only checks the optional instance list against the existing names.
7613
7614     """
7615     if self.wanted_names is None:
7616       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7617
7618     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7619                              in self.wanted_names]
7620     return
7621
7622   def _ComputeBlockdevStatus(self, node, instance_name, dev):
7623     """Returns the status of a block device
7624
7625     """
7626     if self.op.static or not node:
7627       return None
7628
7629     self.cfg.SetDiskID(dev, node)
7630
7631     result = self.rpc.call_blockdev_find(node, dev)
7632     if result.offline:
7633       return None
7634
7635     result.Raise("Can't compute disk status for %s" % instance_name)
7636
7637     status = result.payload
7638     if status is None:
7639       return None
7640
7641     return (status.dev_path, status.major, status.minor,
7642             status.sync_percent, status.estimated_time,
7643             status.is_degraded, status.ldisk_status)
7644
7645   def _ComputeDiskStatus(self, instance, snode, dev):
7646     """Compute block device status.
7647
7648     """
7649     if dev.dev_type in constants.LDS_DRBD:
7650       # we change the snode then (otherwise we use the one passed in)
7651       if dev.logical_id[0] == instance.primary_node:
7652         snode = dev.logical_id[1]
7653       else:
7654         snode = dev.logical_id[0]
7655
7656     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7657                                               instance.name, dev)
7658     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7659
7660     if dev.children:
7661       dev_children = [self._ComputeDiskStatus(instance, snode, child)
7662                       for child in dev.children]
7663     else:
7664       dev_children = []
7665
7666     data = {
7667       "iv_name": dev.iv_name,
7668       "dev_type": dev.dev_type,
7669       "logical_id": dev.logical_id,
7670       "physical_id": dev.physical_id,
7671       "pstatus": dev_pstatus,
7672       "sstatus": dev_sstatus,
7673       "children": dev_children,
7674       "mode": dev.mode,
7675       "size": dev.size,
7676       }
7677
7678     return data
7679
7680   def Exec(self, feedback_fn):
7681     """Gather and return data"""
7682     result = {}
7683
7684     cluster = self.cfg.GetClusterInfo()
7685
7686     for instance in self.wanted_instances:
7687       if not self.op.static:
7688         remote_info = self.rpc.call_instance_info(instance.primary_node,
7689                                                   instance.name,
7690                                                   instance.hypervisor)
7691         remote_info.Raise("Error checking node %s" % instance.primary_node)
7692         remote_info = remote_info.payload
7693         if remote_info and "state" in remote_info:
7694           remote_state = "up"
7695         else:
7696           remote_state = "down"
7697       else:
7698         remote_state = None
7699       if instance.admin_up:
7700         config_state = "up"
7701       else:
7702         config_state = "down"
7703
7704       disks = [self._ComputeDiskStatus(instance, None, device)
7705                for device in instance.disks]
7706
7707       idict = {
7708         "name": instance.name,
7709         "config_state": config_state,
7710         "run_state": remote_state,
7711         "pnode": instance.primary_node,
7712         "snodes": instance.secondary_nodes,
7713         "os": instance.os,
7714         # this happens to be the same format used for hooks
7715         "nics": _NICListToTuple(self, instance.nics),
7716         "disks": disks,
7717         "hypervisor": instance.hypervisor,
7718         "network_port": instance.network_port,
7719         "hv_instance": instance.hvparams,
7720         "hv_actual": cluster.FillHV(instance, skip_globals=True),
7721         "be_instance": instance.beparams,
7722         "be_actual": cluster.FillBE(instance),
7723         "serial_no": instance.serial_no,
7724         "mtime": instance.mtime,
7725         "ctime": instance.ctime,
7726         "uuid": instance.uuid,
7727         }
7728
7729       result[instance.name] = idict
7730
7731     return result
7732
7733
7734 class LUSetInstanceParams(LogicalUnit):
7735   """Modifies an instances's parameters.
7736
7737   """
7738   HPATH = "instance-modify"
7739   HTYPE = constants.HTYPE_INSTANCE
7740   _OP_REQP = ["instance_name"]
7741   REQ_BGL = False
7742
7743   def CheckArguments(self):
7744     if not hasattr(self.op, 'nics'):
7745       self.op.nics = []
7746     if not hasattr(self.op, 'disks'):
7747       self.op.disks = []
7748     if not hasattr(self.op, 'beparams'):
7749       self.op.beparams = {}
7750     if not hasattr(self.op, 'hvparams'):
7751       self.op.hvparams = {}
7752     if not hasattr(self.op, "disk_template"):
7753       self.op.disk_template = None
7754     if not hasattr(self.op, "remote_node"):
7755       self.op.remote_node = None
7756     if not hasattr(self.op, "os_name"):
7757       self.op.os_name = None
7758     if not hasattr(self.op, "force_variant"):
7759       self.op.force_variant = False
7760     self.op.force = getattr(self.op, "force", False)
7761     if not (self.op.nics or self.op.disks or self.op.disk_template or
7762             self.op.hvparams or self.op.beparams or self.op.os_name):
7763       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7764
7765     if self.op.hvparams:
7766       _CheckGlobalHvParams(self.op.hvparams)
7767
7768     # Disk validation
7769     disk_addremove = 0
7770     for disk_op, disk_dict in self.op.disks:
7771       if disk_op == constants.DDM_REMOVE:
7772         disk_addremove += 1
7773         continue
7774       elif disk_op == constants.DDM_ADD:
7775         disk_addremove += 1
7776       else:
7777         if not isinstance(disk_op, int):
7778           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7779         if not isinstance(disk_dict, dict):
7780           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7781           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7782
7783       if disk_op == constants.DDM_ADD:
7784         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7785         if mode not in constants.DISK_ACCESS_SET:
7786           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7787                                      errors.ECODE_INVAL)
7788         size = disk_dict.get('size', None)
7789         if size is None:
7790           raise errors.OpPrereqError("Required disk parameter size missing",
7791                                      errors.ECODE_INVAL)
7792         try:
7793           size = int(size)
7794         except (TypeError, ValueError), err:
7795           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7796                                      str(err), errors.ECODE_INVAL)
7797         disk_dict['size'] = size
7798       else:
7799         # modification of disk
7800         if 'size' in disk_dict:
7801           raise errors.OpPrereqError("Disk size change not possible, use"
7802                                      " grow-disk", errors.ECODE_INVAL)
7803
7804     if disk_addremove > 1:
7805       raise errors.OpPrereqError("Only one disk add or remove operation"
7806                                  " supported at a time", errors.ECODE_INVAL)
7807
7808     if self.op.disks and self.op.disk_template is not None:
7809       raise errors.OpPrereqError("Disk template conversion and other disk"
7810                                  " changes not supported at the same time",
7811                                  errors.ECODE_INVAL)
7812
7813     if self.op.disk_template:
7814       _CheckDiskTemplate(self.op.disk_template)
7815       if (self.op.disk_template in constants.DTS_NET_MIRROR and
7816           self.op.remote_node is None):
7817         raise errors.OpPrereqError("Changing the disk template to a mirrored"
7818                                    " one requires specifying a secondary node",
7819                                    errors.ECODE_INVAL)
7820
7821     # NIC validation
7822     nic_addremove = 0
7823     for nic_op, nic_dict in self.op.nics:
7824       if nic_op == constants.DDM_REMOVE:
7825         nic_addremove += 1
7826         continue
7827       elif nic_op == constants.DDM_ADD:
7828         nic_addremove += 1
7829       else:
7830         if not isinstance(nic_op, int):
7831           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
7832         if not isinstance(nic_dict, dict):
7833           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7834           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7835
7836       # nic_dict should be a dict
7837       nic_ip = nic_dict.get('ip', None)
7838       if nic_ip is not None:
7839         if nic_ip.lower() == constants.VALUE_NONE:
7840           nic_dict['ip'] = None
7841         else:
7842           if not utils.IsValidIP(nic_ip):
7843             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
7844                                        errors.ECODE_INVAL)
7845
7846       nic_bridge = nic_dict.get('bridge', None)
7847       nic_link = nic_dict.get('link', None)
7848       if nic_bridge and nic_link:
7849         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7850                                    " at the same time", errors.ECODE_INVAL)
7851       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7852         nic_dict['bridge'] = None
7853       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7854         nic_dict['link'] = None
7855
7856       if nic_op == constants.DDM_ADD:
7857         nic_mac = nic_dict.get('mac', None)
7858         if nic_mac is None:
7859           nic_dict['mac'] = constants.VALUE_AUTO
7860
7861       if 'mac' in nic_dict:
7862         nic_mac = nic_dict['mac']
7863         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7864           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
7865
7866         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7867           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7868                                      " modifying an existing nic",
7869                                      errors.ECODE_INVAL)
7870
7871     if nic_addremove > 1:
7872       raise errors.OpPrereqError("Only one NIC add or remove operation"
7873                                  " supported at a time", errors.ECODE_INVAL)
7874
7875   def ExpandNames(self):
7876     self._ExpandAndLockInstance()
7877     self.needed_locks[locking.LEVEL_NODE] = []
7878     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7879
7880   def DeclareLocks(self, level):
7881     if level == locking.LEVEL_NODE:
7882       self._LockInstancesNodes()
7883       if self.op.disk_template and self.op.remote_node:
7884         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7885         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
7886
7887   def BuildHooksEnv(self):
7888     """Build hooks env.
7889
7890     This runs on the master, primary and secondaries.
7891
7892     """
7893     args = dict()
7894     if constants.BE_MEMORY in self.be_new:
7895       args['memory'] = self.be_new[constants.BE_MEMORY]
7896     if constants.BE_VCPUS in self.be_new:
7897       args['vcpus'] = self.be_new[constants.BE_VCPUS]
7898     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7899     # information at all.
7900     if self.op.nics:
7901       args['nics'] = []
7902       nic_override = dict(self.op.nics)
7903       c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7904       for idx, nic in enumerate(self.instance.nics):
7905         if idx in nic_override:
7906           this_nic_override = nic_override[idx]
7907         else:
7908           this_nic_override = {}
7909         if 'ip' in this_nic_override:
7910           ip = this_nic_override['ip']
7911         else:
7912           ip = nic.ip
7913         if 'mac' in this_nic_override:
7914           mac = this_nic_override['mac']
7915         else:
7916           mac = nic.mac
7917         if idx in self.nic_pnew:
7918           nicparams = self.nic_pnew[idx]
7919         else:
7920           nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7921         mode = nicparams[constants.NIC_MODE]
7922         link = nicparams[constants.NIC_LINK]
7923         args['nics'].append((ip, mac, mode, link))
7924       if constants.DDM_ADD in nic_override:
7925         ip = nic_override[constants.DDM_ADD].get('ip', None)
7926         mac = nic_override[constants.DDM_ADD]['mac']
7927         nicparams = self.nic_pnew[constants.DDM_ADD]
7928         mode = nicparams[constants.NIC_MODE]
7929         link = nicparams[constants.NIC_LINK]
7930         args['nics'].append((ip, mac, mode, link))
7931       elif constants.DDM_REMOVE in nic_override:
7932         del args['nics'][-1]
7933
7934     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7935     if self.op.disk_template:
7936       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
7937     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7938     return env, nl, nl
7939
7940   @staticmethod
7941   def _GetUpdatedParams(old_params, update_dict,
7942                         default_values, parameter_types):
7943     """Return the new params dict for the given params.
7944
7945     @type old_params: dict
7946     @param old_params: old parameters
7947     @type update_dict: dict
7948     @param update_dict: dict containing new parameter values,
7949                         or constants.VALUE_DEFAULT to reset the
7950                         parameter to its default value
7951     @type default_values: dict
7952     @param default_values: default values for the filled parameters
7953     @type parameter_types: dict
7954     @param parameter_types: dict mapping target dict keys to types
7955                             in constants.ENFORCEABLE_TYPES
7956     @rtype: (dict, dict)
7957     @return: (new_parameters, filled_parameters)
7958
7959     """
7960     params_copy = copy.deepcopy(old_params)
7961     for key, val in update_dict.iteritems():
7962       if val == constants.VALUE_DEFAULT:
7963         try:
7964           del params_copy[key]
7965         except KeyError:
7966           pass
7967       else:
7968         params_copy[key] = val
7969     utils.ForceDictType(params_copy, parameter_types)
7970     params_filled = objects.FillDict(default_values, params_copy)
7971     return (params_copy, params_filled)
7972
7973   def CheckPrereq(self):
7974     """Check prerequisites.
7975
7976     This only checks the instance list against the existing names.
7977
7978     """
7979     self.force = self.op.force
7980
7981     # checking the new params on the primary/secondary nodes
7982
7983     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7984     cluster = self.cluster = self.cfg.GetClusterInfo()
7985     assert self.instance is not None, \
7986       "Cannot retrieve locked instance %s" % self.op.instance_name
7987     pnode = instance.primary_node
7988     nodelist = list(instance.all_nodes)
7989
7990     if self.op.disk_template:
7991       if instance.disk_template == self.op.disk_template:
7992         raise errors.OpPrereqError("Instance already has disk template %s" %
7993                                    instance.disk_template, errors.ECODE_INVAL)
7994
7995       if (instance.disk_template,
7996           self.op.disk_template) not in self._DISK_CONVERSIONS:
7997         raise errors.OpPrereqError("Unsupported disk template conversion from"
7998                                    " %s to %s" % (instance.disk_template,
7999                                                   self.op.disk_template),
8000                                    errors.ECODE_INVAL)
8001       if self.op.disk_template in constants.DTS_NET_MIRROR:
8002         _CheckNodeOnline(self, self.op.remote_node)
8003         _CheckNodeNotDrained(self, self.op.remote_node)
8004         disks = [{"size": d.size} for d in instance.disks]
8005         required = _ComputeDiskSize(self.op.disk_template, disks)
8006         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8007         _CheckInstanceDown(self, instance, "cannot change disk template")
8008
8009     # hvparams processing
8010     if self.op.hvparams:
8011       i_hvdict, hv_new = self._GetUpdatedParams(
8012                              instance.hvparams, self.op.hvparams,
8013                              cluster.hvparams[instance.hypervisor],
8014                              constants.HVS_PARAMETER_TYPES)
8015       # local check
8016       hypervisor.GetHypervisor(
8017         instance.hypervisor).CheckParameterSyntax(hv_new)
8018       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8019       self.hv_new = hv_new # the new actual values
8020       self.hv_inst = i_hvdict # the new dict (without defaults)
8021     else:
8022       self.hv_new = self.hv_inst = {}
8023
8024     # beparams processing
8025     if self.op.beparams:
8026       i_bedict, be_new = self._GetUpdatedParams(
8027                              instance.beparams, self.op.beparams,
8028                              cluster.beparams[constants.PP_DEFAULT],
8029                              constants.BES_PARAMETER_TYPES)
8030       self.be_new = be_new # the new actual values
8031       self.be_inst = i_bedict # the new dict (without defaults)
8032     else:
8033       self.be_new = self.be_inst = {}
8034
8035     self.warn = []
8036
8037     if constants.BE_MEMORY in self.op.beparams and not self.force:
8038       mem_check_list = [pnode]
8039       if be_new[constants.BE_AUTO_BALANCE]:
8040         # either we changed auto_balance to yes or it was from before
8041         mem_check_list.extend(instance.secondary_nodes)
8042       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8043                                                   instance.hypervisor)
8044       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8045                                          instance.hypervisor)
8046       pninfo = nodeinfo[pnode]
8047       msg = pninfo.fail_msg
8048       if msg:
8049         # Assume the primary node is unreachable and go ahead
8050         self.warn.append("Can't get info from primary node %s: %s" %
8051                          (pnode,  msg))
8052       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8053         self.warn.append("Node data from primary node %s doesn't contain"
8054                          " free memory information" % pnode)
8055       elif instance_info.fail_msg:
8056         self.warn.append("Can't get instance runtime information: %s" %
8057                         instance_info.fail_msg)
8058       else:
8059         if instance_info.payload:
8060           current_mem = int(instance_info.payload['memory'])
8061         else:
8062           # Assume instance not running
8063           # (there is a slight race condition here, but it's not very probable,
8064           # and we have no other way to check)
8065           current_mem = 0
8066         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8067                     pninfo.payload['memory_free'])
8068         if miss_mem > 0:
8069           raise errors.OpPrereqError("This change will prevent the instance"
8070                                      " from starting, due to %d MB of memory"
8071                                      " missing on its primary node" % miss_mem,
8072                                      errors.ECODE_NORES)
8073
8074       if be_new[constants.BE_AUTO_BALANCE]:
8075         for node, nres in nodeinfo.items():
8076           if node not in instance.secondary_nodes:
8077             continue
8078           msg = nres.fail_msg
8079           if msg:
8080             self.warn.append("Can't get info from secondary node %s: %s" %
8081                              (node, msg))
8082           elif not isinstance(nres.payload.get('memory_free', None), int):
8083             self.warn.append("Secondary node %s didn't return free"
8084                              " memory information" % node)
8085           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8086             self.warn.append("Not enough memory to failover instance to"
8087                              " secondary node %s" % node)
8088
8089     # NIC processing
8090     self.nic_pnew = {}
8091     self.nic_pinst = {}
8092     for nic_op, nic_dict in self.op.nics:
8093       if nic_op == constants.DDM_REMOVE:
8094         if not instance.nics:
8095           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8096                                      errors.ECODE_INVAL)
8097         continue
8098       if nic_op != constants.DDM_ADD:
8099         # an existing nic
8100         if not instance.nics:
8101           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8102                                      " no NICs" % nic_op,
8103                                      errors.ECODE_INVAL)
8104         if nic_op < 0 or nic_op >= len(instance.nics):
8105           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8106                                      " are 0 to %d" %
8107                                      (nic_op, len(instance.nics) - 1),
8108                                      errors.ECODE_INVAL)
8109         old_nic_params = instance.nics[nic_op].nicparams
8110         old_nic_ip = instance.nics[nic_op].ip
8111       else:
8112         old_nic_params = {}
8113         old_nic_ip = None
8114
8115       update_params_dict = dict([(key, nic_dict[key])
8116                                  for key in constants.NICS_PARAMETERS
8117                                  if key in nic_dict])
8118
8119       if 'bridge' in nic_dict:
8120         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8121
8122       new_nic_params, new_filled_nic_params = \
8123           self._GetUpdatedParams(old_nic_params, update_params_dict,
8124                                  cluster.nicparams[constants.PP_DEFAULT],
8125                                  constants.NICS_PARAMETER_TYPES)
8126       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8127       self.nic_pinst[nic_op] = new_nic_params
8128       self.nic_pnew[nic_op] = new_filled_nic_params
8129       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8130
8131       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8132         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8133         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8134         if msg:
8135           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8136           if self.force:
8137             self.warn.append(msg)
8138           else:
8139             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8140       if new_nic_mode == constants.NIC_MODE_ROUTED:
8141         if 'ip' in nic_dict:
8142           nic_ip = nic_dict['ip']
8143         else:
8144           nic_ip = old_nic_ip
8145         if nic_ip is None:
8146           raise errors.OpPrereqError('Cannot set the nic ip to None'
8147                                      ' on a routed nic', errors.ECODE_INVAL)
8148       if 'mac' in nic_dict:
8149         nic_mac = nic_dict['mac']
8150         if nic_mac is None:
8151           raise errors.OpPrereqError('Cannot set the nic mac to None',
8152                                      errors.ECODE_INVAL)
8153         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8154           # otherwise generate the mac
8155           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8156         else:
8157           # or validate/reserve the current one
8158           try:
8159             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8160           except errors.ReservationError:
8161             raise errors.OpPrereqError("MAC address %s already in use"
8162                                        " in cluster" % nic_mac,
8163                                        errors.ECODE_NOTUNIQUE)
8164
8165     # DISK processing
8166     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8167       raise errors.OpPrereqError("Disk operations not supported for"
8168                                  " diskless instances",
8169                                  errors.ECODE_INVAL)
8170     for disk_op, _ in self.op.disks:
8171       if disk_op == constants.DDM_REMOVE:
8172         if len(instance.disks) == 1:
8173           raise errors.OpPrereqError("Cannot remove the last disk of"
8174                                      " an instance", errors.ECODE_INVAL)
8175         _CheckInstanceDown(self, instance, "cannot remove disks")
8176
8177       if (disk_op == constants.DDM_ADD and
8178           len(instance.nics) >= constants.MAX_DISKS):
8179         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8180                                    " add more" % constants.MAX_DISKS,
8181                                    errors.ECODE_STATE)
8182       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8183         # an existing disk
8184         if disk_op < 0 or disk_op >= len(instance.disks):
8185           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8186                                      " are 0 to %d" %
8187                                      (disk_op, len(instance.disks)),
8188                                      errors.ECODE_INVAL)
8189
8190     # OS change
8191     if self.op.os_name and not self.op.force:
8192       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8193                       self.op.force_variant)
8194
8195     return
8196
8197   def _ConvertPlainToDrbd(self, feedback_fn):
8198     """Converts an instance from plain to drbd.
8199
8200     """
8201     feedback_fn("Converting template to drbd")
8202     instance = self.instance
8203     pnode = instance.primary_node
8204     snode = self.op.remote_node
8205
8206     # create a fake disk info for _GenerateDiskTemplate
8207     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8208     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8209                                       instance.name, pnode, [snode],
8210                                       disk_info, None, None, 0)
8211     info = _GetInstanceInfoText(instance)
8212     feedback_fn("Creating aditional volumes...")
8213     # first, create the missing data and meta devices
8214     for disk in new_disks:
8215       # unfortunately this is... not too nice
8216       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8217                             info, True)
8218       for child in disk.children:
8219         _CreateSingleBlockDev(self, snode, instance, child, info, True)
8220     # at this stage, all new LVs have been created, we can rename the
8221     # old ones
8222     feedback_fn("Renaming original volumes...")
8223     rename_list = [(o, n.children[0].logical_id)
8224                    for (o, n) in zip(instance.disks, new_disks)]
8225     result = self.rpc.call_blockdev_rename(pnode, rename_list)
8226     result.Raise("Failed to rename original LVs")
8227
8228     feedback_fn("Initializing DRBD devices...")
8229     # all child devices are in place, we can now create the DRBD devices
8230     for disk in new_disks:
8231       for node in [pnode, snode]:
8232         f_create = node == pnode
8233         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8234
8235     # at this point, the instance has been modified
8236     instance.disk_template = constants.DT_DRBD8
8237     instance.disks = new_disks
8238     self.cfg.Update(instance, feedback_fn)
8239
8240     # disks are created, waiting for sync
8241     disk_abort = not _WaitForSync(self, instance)
8242     if disk_abort:
8243       raise errors.OpExecError("There are some degraded disks for"
8244                                " this instance, please cleanup manually")
8245
8246   def _ConvertDrbdToPlain(self, feedback_fn):
8247     """Converts an instance from drbd to plain.
8248
8249     """
8250     instance = self.instance
8251     assert len(instance.secondary_nodes) == 1
8252     pnode = instance.primary_node
8253     snode = instance.secondary_nodes[0]
8254     feedback_fn("Converting template to plain")
8255
8256     old_disks = instance.disks
8257     new_disks = [d.children[0] for d in old_disks]
8258
8259     # copy over size and mode
8260     for parent, child in zip(old_disks, new_disks):
8261       child.size = parent.size
8262       child.mode = parent.mode
8263
8264     # update instance structure
8265     instance.disks = new_disks
8266     instance.disk_template = constants.DT_PLAIN
8267     self.cfg.Update(instance, feedback_fn)
8268
8269     feedback_fn("Removing volumes on the secondary node...")
8270     for disk in old_disks:
8271       self.cfg.SetDiskID(disk, snode)
8272       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8273       if msg:
8274         self.LogWarning("Could not remove block device %s on node %s,"
8275                         " continuing anyway: %s", disk.iv_name, snode, msg)
8276
8277     feedback_fn("Removing unneeded volumes on the primary node...")
8278     for idx, disk in enumerate(old_disks):
8279       meta = disk.children[1]
8280       self.cfg.SetDiskID(meta, pnode)
8281       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8282       if msg:
8283         self.LogWarning("Could not remove metadata for disk %d on node %s,"
8284                         " continuing anyway: %s", idx, pnode, msg)
8285
8286
8287   def Exec(self, feedback_fn):
8288     """Modifies an instance.
8289
8290     All parameters take effect only at the next restart of the instance.
8291
8292     """
8293     # Process here the warnings from CheckPrereq, as we don't have a
8294     # feedback_fn there.
8295     for warn in self.warn:
8296       feedback_fn("WARNING: %s" % warn)
8297
8298     result = []
8299     instance = self.instance
8300     # disk changes
8301     for disk_op, disk_dict in self.op.disks:
8302       if disk_op == constants.DDM_REMOVE:
8303         # remove the last disk
8304         device = instance.disks.pop()
8305         device_idx = len(instance.disks)
8306         for node, disk in device.ComputeNodeTree(instance.primary_node):
8307           self.cfg.SetDiskID(disk, node)
8308           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8309           if msg:
8310             self.LogWarning("Could not remove disk/%d on node %s: %s,"
8311                             " continuing anyway", device_idx, node, msg)
8312         result.append(("disk/%d" % device_idx, "remove"))
8313       elif disk_op == constants.DDM_ADD:
8314         # add a new disk
8315         if instance.disk_template == constants.DT_FILE:
8316           file_driver, file_path = instance.disks[0].logical_id
8317           file_path = os.path.dirname(file_path)
8318         else:
8319           file_driver = file_path = None
8320         disk_idx_base = len(instance.disks)
8321         new_disk = _GenerateDiskTemplate(self,
8322                                          instance.disk_template,
8323                                          instance.name, instance.primary_node,
8324                                          instance.secondary_nodes,
8325                                          [disk_dict],
8326                                          file_path,
8327                                          file_driver,
8328                                          disk_idx_base)[0]
8329         instance.disks.append(new_disk)
8330         info = _GetInstanceInfoText(instance)
8331
8332         logging.info("Creating volume %s for instance %s",
8333                      new_disk.iv_name, instance.name)
8334         # Note: this needs to be kept in sync with _CreateDisks
8335         #HARDCODE
8336         for node in instance.all_nodes:
8337           f_create = node == instance.primary_node
8338           try:
8339             _CreateBlockDev(self, node, instance, new_disk,
8340                             f_create, info, f_create)
8341           except errors.OpExecError, err:
8342             self.LogWarning("Failed to create volume %s (%s) on"
8343                             " node %s: %s",
8344                             new_disk.iv_name, new_disk, node, err)
8345         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8346                        (new_disk.size, new_disk.mode)))
8347       else:
8348         # change a given disk
8349         instance.disks[disk_op].mode = disk_dict['mode']
8350         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8351
8352     if self.op.disk_template:
8353       r_shut = _ShutdownInstanceDisks(self, instance)
8354       if not r_shut:
8355         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8356                                  " proceed with disk template conversion")
8357       mode = (instance.disk_template, self.op.disk_template)
8358       try:
8359         self._DISK_CONVERSIONS[mode](self, feedback_fn)
8360       except:
8361         self.cfg.ReleaseDRBDMinors(instance.name)
8362         raise
8363       result.append(("disk_template", self.op.disk_template))
8364
8365     # NIC changes
8366     for nic_op, nic_dict in self.op.nics:
8367       if nic_op == constants.DDM_REMOVE:
8368         # remove the last nic
8369         del instance.nics[-1]
8370         result.append(("nic.%d" % len(instance.nics), "remove"))
8371       elif nic_op == constants.DDM_ADD:
8372         # mac and bridge should be set, by now
8373         mac = nic_dict['mac']
8374         ip = nic_dict.get('ip', None)
8375         nicparams = self.nic_pinst[constants.DDM_ADD]
8376         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8377         instance.nics.append(new_nic)
8378         result.append(("nic.%d" % (len(instance.nics) - 1),
8379                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
8380                        (new_nic.mac, new_nic.ip,
8381                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8382                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8383                        )))
8384       else:
8385         for key in 'mac', 'ip':
8386           if key in nic_dict:
8387             setattr(instance.nics[nic_op], key, nic_dict[key])
8388         if nic_op in self.nic_pinst:
8389           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8390         for key, val in nic_dict.iteritems():
8391           result.append(("nic.%s/%d" % (key, nic_op), val))
8392
8393     # hvparams changes
8394     if self.op.hvparams:
8395       instance.hvparams = self.hv_inst
8396       for key, val in self.op.hvparams.iteritems():
8397         result.append(("hv/%s" % key, val))
8398
8399     # beparams changes
8400     if self.op.beparams:
8401       instance.beparams = self.be_inst
8402       for key, val in self.op.beparams.iteritems():
8403         result.append(("be/%s" % key, val))
8404
8405     # OS change
8406     if self.op.os_name:
8407       instance.os = self.op.os_name
8408
8409     self.cfg.Update(instance, feedback_fn)
8410
8411     return result
8412
8413   _DISK_CONVERSIONS = {
8414     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8415     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8416     }
8417
8418 class LUQueryExports(NoHooksLU):
8419   """Query the exports list
8420
8421   """
8422   _OP_REQP = ['nodes']
8423   REQ_BGL = False
8424
8425   def ExpandNames(self):
8426     self.needed_locks = {}
8427     self.share_locks[locking.LEVEL_NODE] = 1
8428     if not self.op.nodes:
8429       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8430     else:
8431       self.needed_locks[locking.LEVEL_NODE] = \
8432         _GetWantedNodes(self, self.op.nodes)
8433
8434   def CheckPrereq(self):
8435     """Check prerequisites.
8436
8437     """
8438     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8439
8440   def Exec(self, feedback_fn):
8441     """Compute the list of all the exported system images.
8442
8443     @rtype: dict
8444     @return: a dictionary with the structure node->(export-list)
8445         where export-list is a list of the instances exported on
8446         that node.
8447
8448     """
8449     rpcresult = self.rpc.call_export_list(self.nodes)
8450     result = {}
8451     for node in rpcresult:
8452       if rpcresult[node].fail_msg:
8453         result[node] = False
8454       else:
8455         result[node] = rpcresult[node].payload
8456
8457     return result
8458
8459
8460 class LUExportInstance(LogicalUnit):
8461   """Export an instance to an image in the cluster.
8462
8463   """
8464   HPATH = "instance-export"
8465   HTYPE = constants.HTYPE_INSTANCE
8466   _OP_REQP = ["instance_name", "target_node", "shutdown"]
8467   REQ_BGL = False
8468
8469   def CheckArguments(self):
8470     """Check the arguments.
8471
8472     """
8473     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8474                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
8475
8476   def ExpandNames(self):
8477     self._ExpandAndLockInstance()
8478     # FIXME: lock only instance primary and destination node
8479     #
8480     # Sad but true, for now we have do lock all nodes, as we don't know where
8481     # the previous export might be, and and in this LU we search for it and
8482     # remove it from its current node. In the future we could fix this by:
8483     #  - making a tasklet to search (share-lock all), then create the new one,
8484     #    then one to remove, after
8485     #  - removing the removal operation altogether
8486     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8487
8488   def DeclareLocks(self, level):
8489     """Last minute lock declaration."""
8490     # All nodes are locked anyway, so nothing to do here.
8491
8492   def BuildHooksEnv(self):
8493     """Build hooks env.
8494
8495     This will run on the master, primary node and target node.
8496
8497     """
8498     env = {
8499       "EXPORT_NODE": self.op.target_node,
8500       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8501       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8502       }
8503     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8504     nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8505           self.op.target_node]
8506     return env, nl, nl
8507
8508   def CheckPrereq(self):
8509     """Check prerequisites.
8510
8511     This checks that the instance and node names are valid.
8512
8513     """
8514     instance_name = self.op.instance_name
8515     self.instance = self.cfg.GetInstanceInfo(instance_name)
8516     assert self.instance is not None, \
8517           "Cannot retrieve locked instance %s" % self.op.instance_name
8518     _CheckNodeOnline(self, self.instance.primary_node)
8519
8520     self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8521     self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8522     assert self.dst_node is not None
8523
8524     _CheckNodeOnline(self, self.dst_node.name)
8525     _CheckNodeNotDrained(self, self.dst_node.name)
8526
8527     # instance disk type verification
8528     for disk in self.instance.disks:
8529       if disk.dev_type == constants.LD_FILE:
8530         raise errors.OpPrereqError("Export not supported for instances with"
8531                                    " file-based disks", errors.ECODE_INVAL)
8532
8533   def Exec(self, feedback_fn):
8534     """Export an instance to an image in the cluster.
8535
8536     """
8537     instance = self.instance
8538     dst_node = self.dst_node
8539     src_node = instance.primary_node
8540
8541     if self.op.shutdown:
8542       # shutdown the instance, but not the disks
8543       feedback_fn("Shutting down instance %s" % instance.name)
8544       result = self.rpc.call_instance_shutdown(src_node, instance,
8545                                                self.shutdown_timeout)
8546       result.Raise("Could not shutdown instance %s on"
8547                    " node %s" % (instance.name, src_node))
8548
8549     vgname = self.cfg.GetVGName()
8550
8551     snap_disks = []
8552
8553     # set the disks ID correctly since call_instance_start needs the
8554     # correct drbd minor to create the symlinks
8555     for disk in instance.disks:
8556       self.cfg.SetDiskID(disk, src_node)
8557
8558     activate_disks = (not instance.admin_up)
8559
8560     if activate_disks:
8561       # Activate the instance disks if we'exporting a stopped instance
8562       feedback_fn("Activating disks for %s" % instance.name)
8563       _StartInstanceDisks(self, instance, None)
8564
8565     try:
8566       # per-disk results
8567       dresults = []
8568       try:
8569         for idx, disk in enumerate(instance.disks):
8570           feedback_fn("Creating a snapshot of disk/%s on node %s" %
8571                       (idx, src_node))
8572
8573           # result.payload will be a snapshot of an lvm leaf of the one we
8574           # passed
8575           result = self.rpc.call_blockdev_snapshot(src_node, disk)
8576           msg = result.fail_msg
8577           if msg:
8578             self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8579                             idx, src_node, msg)
8580             snap_disks.append(False)
8581           else:
8582             disk_id = (vgname, result.payload)
8583             new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8584                                    logical_id=disk_id, physical_id=disk_id,
8585                                    iv_name=disk.iv_name)
8586             snap_disks.append(new_dev)
8587
8588       finally:
8589         if self.op.shutdown and instance.admin_up:
8590           feedback_fn("Starting instance %s" % instance.name)
8591           result = self.rpc.call_instance_start(src_node, instance, None, None)
8592           msg = result.fail_msg
8593           if msg:
8594             _ShutdownInstanceDisks(self, instance)
8595             raise errors.OpExecError("Could not start instance: %s" % msg)
8596
8597       # TODO: check for size
8598
8599       cluster_name = self.cfg.GetClusterName()
8600       for idx, dev in enumerate(snap_disks):
8601         feedback_fn("Exporting snapshot %s from %s to %s" %
8602                     (idx, src_node, dst_node.name))
8603         if dev:
8604           # FIXME: pass debug from opcode to backend
8605           result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8606                                                  instance, cluster_name,
8607                                                  idx, self.op.debug_level)
8608           msg = result.fail_msg
8609           if msg:
8610             self.LogWarning("Could not export disk/%s from node %s to"
8611                             " node %s: %s", idx, src_node, dst_node.name, msg)
8612             dresults.append(False)
8613           else:
8614             dresults.append(True)
8615           msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8616           if msg:
8617             self.LogWarning("Could not remove snapshot for disk/%d from node"
8618                             " %s: %s", idx, src_node, msg)
8619         else:
8620           dresults.append(False)
8621
8622       feedback_fn("Finalizing export on %s" % dst_node.name)
8623       result = self.rpc.call_finalize_export(dst_node.name, instance,
8624                                              snap_disks)
8625       fin_resu = True
8626       msg = result.fail_msg
8627       if msg:
8628         self.LogWarning("Could not finalize export for instance %s"
8629                         " on node %s: %s", instance.name, dst_node.name, msg)
8630         fin_resu = False
8631
8632     finally:
8633       if activate_disks:
8634         feedback_fn("Deactivating disks for %s" % instance.name)
8635         _ShutdownInstanceDisks(self, instance)
8636
8637     nodelist = self.cfg.GetNodeList()
8638     nodelist.remove(dst_node.name)
8639
8640     # on one-node clusters nodelist will be empty after the removal
8641     # if we proceed the backup would be removed because OpQueryExports
8642     # substitutes an empty list with the full cluster node list.
8643     iname = instance.name
8644     if nodelist:
8645       feedback_fn("Removing old exports for instance %s" % iname)
8646       exportlist = self.rpc.call_export_list(nodelist)
8647       for node in exportlist:
8648         if exportlist[node].fail_msg:
8649           continue
8650         if iname in exportlist[node].payload:
8651           msg = self.rpc.call_export_remove(node, iname).fail_msg
8652           if msg:
8653             self.LogWarning("Could not remove older export for instance %s"
8654                             " on node %s: %s", iname, node, msg)
8655     return fin_resu, dresults
8656
8657
8658 class LURemoveExport(NoHooksLU):
8659   """Remove exports related to the named instance.
8660
8661   """
8662   _OP_REQP = ["instance_name"]
8663   REQ_BGL = False
8664
8665   def ExpandNames(self):
8666     self.needed_locks = {}
8667     # We need all nodes to be locked in order for RemoveExport to work, but we
8668     # don't need to lock the instance itself, as nothing will happen to it (and
8669     # we can remove exports also for a removed instance)
8670     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8671
8672   def CheckPrereq(self):
8673     """Check prerequisites.
8674     """
8675     pass
8676
8677   def Exec(self, feedback_fn):
8678     """Remove any export.
8679
8680     """
8681     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8682     # If the instance was not found we'll try with the name that was passed in.
8683     # This will only work if it was an FQDN, though.
8684     fqdn_warn = False
8685     if not instance_name:
8686       fqdn_warn = True
8687       instance_name = self.op.instance_name
8688
8689     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8690     exportlist = self.rpc.call_export_list(locked_nodes)
8691     found = False
8692     for node in exportlist:
8693       msg = exportlist[node].fail_msg
8694       if msg:
8695         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8696         continue
8697       if instance_name in exportlist[node].payload:
8698         found = True
8699         result = self.rpc.call_export_remove(node, instance_name)
8700         msg = result.fail_msg
8701         if msg:
8702           logging.error("Could not remove export for instance %s"
8703                         " on node %s: %s", instance_name, node, msg)
8704
8705     if fqdn_warn and not found:
8706       feedback_fn("Export not found. If trying to remove an export belonging"
8707                   " to a deleted instance please use its Fully Qualified"
8708                   " Domain Name.")
8709
8710
8711 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8712   """Generic tags LU.
8713
8714   This is an abstract class which is the parent of all the other tags LUs.
8715
8716   """
8717
8718   def ExpandNames(self):
8719     self.needed_locks = {}
8720     if self.op.kind == constants.TAG_NODE:
8721       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
8722       self.needed_locks[locking.LEVEL_NODE] = self.op.name
8723     elif self.op.kind == constants.TAG_INSTANCE:
8724       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
8725       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
8726
8727   def CheckPrereq(self):
8728     """Check prerequisites.
8729
8730     """
8731     if self.op.kind == constants.TAG_CLUSTER:
8732       self.target = self.cfg.GetClusterInfo()
8733     elif self.op.kind == constants.TAG_NODE:
8734       self.target = self.cfg.GetNodeInfo(self.op.name)
8735     elif self.op.kind == constants.TAG_INSTANCE:
8736       self.target = self.cfg.GetInstanceInfo(self.op.name)
8737     else:
8738       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8739                                  str(self.op.kind), errors.ECODE_INVAL)
8740
8741
8742 class LUGetTags(TagsLU):
8743   """Returns the tags of a given object.
8744
8745   """
8746   _OP_REQP = ["kind", "name"]
8747   REQ_BGL = False
8748
8749   def Exec(self, feedback_fn):
8750     """Returns the tag list.
8751
8752     """
8753     return list(self.target.GetTags())
8754
8755
8756 class LUSearchTags(NoHooksLU):
8757   """Searches the tags for a given pattern.
8758
8759   """
8760   _OP_REQP = ["pattern"]
8761   REQ_BGL = False
8762
8763   def ExpandNames(self):
8764     self.needed_locks = {}
8765
8766   def CheckPrereq(self):
8767     """Check prerequisites.
8768
8769     This checks the pattern passed for validity by compiling it.
8770
8771     """
8772     try:
8773       self.re = re.compile(self.op.pattern)
8774     except re.error, err:
8775       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8776                                  (self.op.pattern, err), errors.ECODE_INVAL)
8777
8778   def Exec(self, feedback_fn):
8779     """Returns the tag list.
8780
8781     """
8782     cfg = self.cfg
8783     tgts = [("/cluster", cfg.GetClusterInfo())]
8784     ilist = cfg.GetAllInstancesInfo().values()
8785     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8786     nlist = cfg.GetAllNodesInfo().values()
8787     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8788     results = []
8789     for path, target in tgts:
8790       for tag in target.GetTags():
8791         if self.re.search(tag):
8792           results.append((path, tag))
8793     return results
8794
8795
8796 class LUAddTags(TagsLU):
8797   """Sets a tag on a given object.
8798
8799   """
8800   _OP_REQP = ["kind", "name", "tags"]
8801   REQ_BGL = False
8802
8803   def CheckPrereq(self):
8804     """Check prerequisites.
8805
8806     This checks the type and length of the tag name and value.
8807
8808     """
8809     TagsLU.CheckPrereq(self)
8810     for tag in self.op.tags:
8811       objects.TaggableObject.ValidateTag(tag)
8812
8813   def Exec(self, feedback_fn):
8814     """Sets the tag.
8815
8816     """
8817     try:
8818       for tag in self.op.tags:
8819         self.target.AddTag(tag)
8820     except errors.TagError, err:
8821       raise errors.OpExecError("Error while setting tag: %s" % str(err))
8822     self.cfg.Update(self.target, feedback_fn)
8823
8824
8825 class LUDelTags(TagsLU):
8826   """Delete a list of tags from a given object.
8827
8828   """
8829   _OP_REQP = ["kind", "name", "tags"]
8830   REQ_BGL = False
8831
8832   def CheckPrereq(self):
8833     """Check prerequisites.
8834
8835     This checks that we have the given tag.
8836
8837     """
8838     TagsLU.CheckPrereq(self)
8839     for tag in self.op.tags:
8840       objects.TaggableObject.ValidateTag(tag)
8841     del_tags = frozenset(self.op.tags)
8842     cur_tags = self.target.GetTags()
8843     if not del_tags <= cur_tags:
8844       diff_tags = del_tags - cur_tags
8845       diff_names = ["'%s'" % tag for tag in diff_tags]
8846       diff_names.sort()
8847       raise errors.OpPrereqError("Tag(s) %s not found" %
8848                                  (",".join(diff_names)), errors.ECODE_NOENT)
8849
8850   def Exec(self, feedback_fn):
8851     """Remove the tag from the object.
8852
8853     """
8854     for tag in self.op.tags:
8855       self.target.RemoveTag(tag)
8856     self.cfg.Update(self.target, feedback_fn)
8857
8858
8859 class LUTestDelay(NoHooksLU):
8860   """Sleep for a specified amount of time.
8861
8862   This LU sleeps on the master and/or nodes for a specified amount of
8863   time.
8864
8865   """
8866   _OP_REQP = ["duration", "on_master", "on_nodes"]
8867   REQ_BGL = False
8868
8869   def ExpandNames(self):
8870     """Expand names and set required locks.
8871
8872     This expands the node list, if any.
8873
8874     """
8875     self.needed_locks = {}
8876     if self.op.on_nodes:
8877       # _GetWantedNodes can be used here, but is not always appropriate to use
8878       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8879       # more information.
8880       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8881       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8882
8883   def CheckPrereq(self):
8884     """Check prerequisites.
8885
8886     """
8887
8888   def Exec(self, feedback_fn):
8889     """Do the actual sleep.
8890
8891     """
8892     if self.op.on_master:
8893       if not utils.TestDelay(self.op.duration):
8894         raise errors.OpExecError("Error during master delay test")
8895     if self.op.on_nodes:
8896       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8897       for node, node_result in result.items():
8898         node_result.Raise("Failure during rpc call to node %s" % node)
8899
8900
8901 class IAllocator(object):
8902   """IAllocator framework.
8903
8904   An IAllocator instance has three sets of attributes:
8905     - cfg that is needed to query the cluster
8906     - input data (all members of the _KEYS class attribute are required)
8907     - four buffer attributes (in|out_data|text), that represent the
8908       input (to the external script) in text and data structure format,
8909       and the output from it, again in two formats
8910     - the result variables from the script (success, info, nodes) for
8911       easy usage
8912
8913   """
8914   # pylint: disable-msg=R0902
8915   # lots of instance attributes
8916   _ALLO_KEYS = [
8917     "name", "mem_size", "disks", "disk_template",
8918     "os", "tags", "nics", "vcpus", "hypervisor",
8919     ]
8920   _RELO_KEYS = [
8921     "name", "relocate_from",
8922     ]
8923   _EVAC_KEYS = [
8924     "evac_nodes",
8925     ]
8926
8927   def __init__(self, cfg, rpc, mode, **kwargs):
8928     self.cfg = cfg
8929     self.rpc = rpc
8930     # init buffer variables
8931     self.in_text = self.out_text = self.in_data = self.out_data = None
8932     # init all input fields so that pylint is happy
8933     self.mode = mode
8934     self.mem_size = self.disks = self.disk_template = None
8935     self.os = self.tags = self.nics = self.vcpus = None
8936     self.hypervisor = None
8937     self.relocate_from = None
8938     self.name = None
8939     self.evac_nodes = None
8940     # computed fields
8941     self.required_nodes = None
8942     # init result fields
8943     self.success = self.info = self.result = None
8944     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8945       keyset = self._ALLO_KEYS
8946       fn = self._AddNewInstance
8947     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8948       keyset = self._RELO_KEYS
8949       fn = self._AddRelocateInstance
8950     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8951       keyset = self._EVAC_KEYS
8952       fn = self._AddEvacuateNodes
8953     else:
8954       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8955                                    " IAllocator" % self.mode)
8956     for key in kwargs:
8957       if key not in keyset:
8958         raise errors.ProgrammerError("Invalid input parameter '%s' to"
8959                                      " IAllocator" % key)
8960       setattr(self, key, kwargs[key])
8961
8962     for key in keyset:
8963       if key not in kwargs:
8964         raise errors.ProgrammerError("Missing input parameter '%s' to"
8965                                      " IAllocator" % key)
8966     self._BuildInputData(fn)
8967
8968   def _ComputeClusterData(self):
8969     """Compute the generic allocator input data.
8970
8971     This is the data that is independent of the actual operation.
8972
8973     """
8974     cfg = self.cfg
8975     cluster_info = cfg.GetClusterInfo()
8976     # cluster data
8977     data = {
8978       "version": constants.IALLOCATOR_VERSION,
8979       "cluster_name": cfg.GetClusterName(),
8980       "cluster_tags": list(cluster_info.GetTags()),
8981       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8982       # we don't have job IDs
8983       }
8984     iinfo = cfg.GetAllInstancesInfo().values()
8985     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8986
8987     # node data
8988     node_results = {}
8989     node_list = cfg.GetNodeList()
8990
8991     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8992       hypervisor_name = self.hypervisor
8993     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8994       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8995     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8996       hypervisor_name = cluster_info.enabled_hypervisors[0]
8997
8998     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8999                                         hypervisor_name)
9000     node_iinfo = \
9001       self.rpc.call_all_instances_info(node_list,
9002                                        cluster_info.enabled_hypervisors)
9003     for nname, nresult in node_data.items():
9004       # first fill in static (config-based) values
9005       ninfo = cfg.GetNodeInfo(nname)
9006       pnr = {
9007         "tags": list(ninfo.GetTags()),
9008         "primary_ip": ninfo.primary_ip,
9009         "secondary_ip": ninfo.secondary_ip,
9010         "offline": ninfo.offline,
9011         "drained": ninfo.drained,
9012         "master_candidate": ninfo.master_candidate,
9013         }
9014
9015       if not (ninfo.offline or ninfo.drained):
9016         nresult.Raise("Can't get data for node %s" % nname)
9017         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9018                                 nname)
9019         remote_info = nresult.payload
9020
9021         for attr in ['memory_total', 'memory_free', 'memory_dom0',
9022                      'vg_size', 'vg_free', 'cpu_total']:
9023           if attr not in remote_info:
9024             raise errors.OpExecError("Node '%s' didn't return attribute"
9025                                      " '%s'" % (nname, attr))
9026           if not isinstance(remote_info[attr], int):
9027             raise errors.OpExecError("Node '%s' returned invalid value"
9028                                      " for '%s': %s" %
9029                                      (nname, attr, remote_info[attr]))
9030         # compute memory used by primary instances
9031         i_p_mem = i_p_up_mem = 0
9032         for iinfo, beinfo in i_list:
9033           if iinfo.primary_node == nname:
9034             i_p_mem += beinfo[constants.BE_MEMORY]
9035             if iinfo.name not in node_iinfo[nname].payload:
9036               i_used_mem = 0
9037             else:
9038               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9039             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9040             remote_info['memory_free'] -= max(0, i_mem_diff)
9041
9042             if iinfo.admin_up:
9043               i_p_up_mem += beinfo[constants.BE_MEMORY]
9044
9045         # compute memory used by instances
9046         pnr_dyn = {
9047           "total_memory": remote_info['memory_total'],
9048           "reserved_memory": remote_info['memory_dom0'],
9049           "free_memory": remote_info['memory_free'],
9050           "total_disk": remote_info['vg_size'],
9051           "free_disk": remote_info['vg_free'],
9052           "total_cpus": remote_info['cpu_total'],
9053           "i_pri_memory": i_p_mem,
9054           "i_pri_up_memory": i_p_up_mem,
9055           }
9056         pnr.update(pnr_dyn)
9057
9058       node_results[nname] = pnr
9059     data["nodes"] = node_results
9060
9061     # instance data
9062     instance_data = {}
9063     for iinfo, beinfo in i_list:
9064       nic_data = []
9065       for nic in iinfo.nics:
9066         filled_params = objects.FillDict(
9067             cluster_info.nicparams[constants.PP_DEFAULT],
9068             nic.nicparams)
9069         nic_dict = {"mac": nic.mac,
9070                     "ip": nic.ip,
9071                     "mode": filled_params[constants.NIC_MODE],
9072                     "link": filled_params[constants.NIC_LINK],
9073                    }
9074         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9075           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9076         nic_data.append(nic_dict)
9077       pir = {
9078         "tags": list(iinfo.GetTags()),
9079         "admin_up": iinfo.admin_up,
9080         "vcpus": beinfo[constants.BE_VCPUS],
9081         "memory": beinfo[constants.BE_MEMORY],
9082         "os": iinfo.os,
9083         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9084         "nics": nic_data,
9085         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9086         "disk_template": iinfo.disk_template,
9087         "hypervisor": iinfo.hypervisor,
9088         }
9089       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9090                                                  pir["disks"])
9091       instance_data[iinfo.name] = pir
9092
9093     data["instances"] = instance_data
9094
9095     self.in_data = data
9096
9097   def _AddNewInstance(self):
9098     """Add new instance data to allocator structure.
9099
9100     This in combination with _AllocatorGetClusterData will create the
9101     correct structure needed as input for the allocator.
9102
9103     The checks for the completeness of the opcode must have already been
9104     done.
9105
9106     """
9107     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9108
9109     if self.disk_template in constants.DTS_NET_MIRROR:
9110       self.required_nodes = 2
9111     else:
9112       self.required_nodes = 1
9113     request = {
9114       "name": self.name,
9115       "disk_template": self.disk_template,
9116       "tags": self.tags,
9117       "os": self.os,
9118       "vcpus": self.vcpus,
9119       "memory": self.mem_size,
9120       "disks": self.disks,
9121       "disk_space_total": disk_space,
9122       "nics": self.nics,
9123       "required_nodes": self.required_nodes,
9124       }
9125     return request
9126
9127   def _AddRelocateInstance(self):
9128     """Add relocate instance data to allocator structure.
9129
9130     This in combination with _IAllocatorGetClusterData will create the
9131     correct structure needed as input for the allocator.
9132
9133     The checks for the completeness of the opcode must have already been
9134     done.
9135
9136     """
9137     instance = self.cfg.GetInstanceInfo(self.name)
9138     if instance is None:
9139       raise errors.ProgrammerError("Unknown instance '%s' passed to"
9140                                    " IAllocator" % self.name)
9141
9142     if instance.disk_template not in constants.DTS_NET_MIRROR:
9143       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9144                                  errors.ECODE_INVAL)
9145
9146     if len(instance.secondary_nodes) != 1:
9147       raise errors.OpPrereqError("Instance has not exactly one secondary node",
9148                                  errors.ECODE_STATE)
9149
9150     self.required_nodes = 1
9151     disk_sizes = [{'size': disk.size} for disk in instance.disks]
9152     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9153
9154     request = {
9155       "name": self.name,
9156       "disk_space_total": disk_space,
9157       "required_nodes": self.required_nodes,
9158       "relocate_from": self.relocate_from,
9159       }
9160     return request
9161
9162   def _AddEvacuateNodes(self):
9163     """Add evacuate nodes data to allocator structure.
9164
9165     """
9166     request = {
9167       "evac_nodes": self.evac_nodes
9168       }
9169     return request
9170
9171   def _BuildInputData(self, fn):
9172     """Build input data structures.
9173
9174     """
9175     self._ComputeClusterData()
9176
9177     request = fn()
9178     request["type"] = self.mode
9179     self.in_data["request"] = request
9180
9181     self.in_text = serializer.Dump(self.in_data)
9182
9183   def Run(self, name, validate=True, call_fn=None):
9184     """Run an instance allocator and return the results.
9185
9186     """
9187     if call_fn is None:
9188       call_fn = self.rpc.call_iallocator_runner
9189
9190     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9191     result.Raise("Failure while running the iallocator script")
9192
9193     self.out_text = result.payload
9194     if validate:
9195       self._ValidateResult()
9196
9197   def _ValidateResult(self):
9198     """Process the allocator results.
9199
9200     This will process and if successful save the result in
9201     self.out_data and the other parameters.
9202
9203     """
9204     try:
9205       rdict = serializer.Load(self.out_text)
9206     except Exception, err:
9207       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9208
9209     if not isinstance(rdict, dict):
9210       raise errors.OpExecError("Can't parse iallocator results: not a dict")
9211
9212     # TODO: remove backwards compatiblity in later versions
9213     if "nodes" in rdict and "result" not in rdict:
9214       rdict["result"] = rdict["nodes"]
9215       del rdict["nodes"]
9216
9217     for key in "success", "info", "result":
9218       if key not in rdict:
9219         raise errors.OpExecError("Can't parse iallocator results:"
9220                                  " missing key '%s'" % key)
9221       setattr(self, key, rdict[key])
9222
9223     if not isinstance(rdict["result"], list):
9224       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9225                                " is not a list")
9226     self.out_data = rdict
9227
9228
9229 class LUTestAllocator(NoHooksLU):
9230   """Run allocator tests.
9231
9232   This LU runs the allocator tests
9233
9234   """
9235   _OP_REQP = ["direction", "mode", "name"]
9236
9237   def CheckPrereq(self):
9238     """Check prerequisites.
9239
9240     This checks the opcode parameters depending on the director and mode test.
9241
9242     """
9243     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9244       for attr in ["name", "mem_size", "disks", "disk_template",
9245                    "os", "tags", "nics", "vcpus"]:
9246         if not hasattr(self.op, attr):
9247           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9248                                      attr, errors.ECODE_INVAL)
9249       iname = self.cfg.ExpandInstanceName(self.op.name)
9250       if iname is not None:
9251         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9252                                    iname, errors.ECODE_EXISTS)
9253       if not isinstance(self.op.nics, list):
9254         raise errors.OpPrereqError("Invalid parameter 'nics'",
9255                                    errors.ECODE_INVAL)
9256       for row in self.op.nics:
9257         if (not isinstance(row, dict) or
9258             "mac" not in row or
9259             "ip" not in row or
9260             "bridge" not in row):
9261           raise errors.OpPrereqError("Invalid contents of the 'nics'"
9262                                      " parameter", errors.ECODE_INVAL)
9263       if not isinstance(self.op.disks, list):
9264         raise errors.OpPrereqError("Invalid parameter 'disks'",
9265                                    errors.ECODE_INVAL)
9266       for row in self.op.disks:
9267         if (not isinstance(row, dict) or
9268             "size" not in row or
9269             not isinstance(row["size"], int) or
9270             "mode" not in row or
9271             row["mode"] not in ['r', 'w']):
9272           raise errors.OpPrereqError("Invalid contents of the 'disks'"
9273                                      " parameter", errors.ECODE_INVAL)
9274       if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9275         self.op.hypervisor = self.cfg.GetHypervisorType()
9276     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9277       if not hasattr(self.op, "name"):
9278         raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9279                                    errors.ECODE_INVAL)
9280       fname = _ExpandInstanceName(self.cfg, self.op.name)
9281       self.op.name = fname
9282       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9283     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9284       if not hasattr(self.op, "evac_nodes"):
9285         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9286                                    " opcode input", errors.ECODE_INVAL)
9287     else:
9288       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9289                                  self.op.mode, errors.ECODE_INVAL)
9290
9291     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9292       if not hasattr(self.op, "allocator") or self.op.allocator is None:
9293         raise errors.OpPrereqError("Missing allocator name",
9294                                    errors.ECODE_INVAL)
9295     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9296       raise errors.OpPrereqError("Wrong allocator test '%s'" %
9297                                  self.op.direction, errors.ECODE_INVAL)
9298
9299   def Exec(self, feedback_fn):
9300     """Run the allocator test.
9301
9302     """
9303     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9304       ial = IAllocator(self.cfg, self.rpc,
9305                        mode=self.op.mode,
9306                        name=self.op.name,
9307                        mem_size=self.op.mem_size,
9308                        disks=self.op.disks,
9309                        disk_template=self.op.disk_template,
9310                        os=self.op.os,
9311                        tags=self.op.tags,
9312                        nics=self.op.nics,
9313                        vcpus=self.op.vcpus,
9314                        hypervisor=self.op.hypervisor,
9315                        )
9316     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9317       ial = IAllocator(self.cfg, self.rpc,
9318                        mode=self.op.mode,
9319                        name=self.op.name,
9320                        relocate_from=list(self.relocate_from),
9321                        )
9322     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9323       ial = IAllocator(self.cfg, self.rpc,
9324                        mode=self.op.mode,
9325                        evac_nodes=self.op.evac_nodes)
9326     else:
9327       raise errors.ProgrammerError("Uncatched mode %s in"
9328                                    " LUTestAllocator.Exec", self.op.mode)
9329
9330     if self.op.direction == constants.IALLOCATOR_DIR_IN:
9331       result = ial.in_text
9332     else:
9333       ial.Run(self.op.allocator, validate=False)
9334       result = ial.out_text
9335     return result