code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 import os
  30 import os.path
  31 import time
  32 import re
  33 import platform
  34 import logging
  35 import copy
  36 import OpenSSL
  37
  38 from ganeti import ssh
  39 from ganeti import utils
  40 from ganeti import errors
  41 from ganeti import hypervisor
  42 from ganeti import locking
  43 from ganeti import constants
  44 from ganeti import objects
  45 from ganeti import serializer
  46 from ganeti import ssconf
  47 from ganeti import uidpool
  48 from ganeti import compat
  49
  50
  51 class LogicalUnit(object):
  52   """Logical Unit base class.
  53
  54   Subclasses must follow these rules:
  55     - implement ExpandNames
  56     - implement CheckPrereq (except when tasklets are used)
  57     - implement Exec (except when tasklets are used)
  58     - implement BuildHooksEnv
  59     - redefine HPATH and HTYPE
  60     - optionally redefine their run requirements:
  61         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  62
  63   Note that all commands require root permissions.
  64
  65   @ivar dry_run_result: the value (if any) that will be returned to the caller
  66       in dry-run mode (signalled by opcode dry_run parameter)
  67
  68   """
  69   HPATH = None
  70   HTYPE = None
  71   _OP_REQP = []
  72   REQ_BGL = True
  73
  74   def __init__(self, processor, op, context, rpc):
  75     """Constructor for LogicalUnit.
  76
  77     This needs to be overridden in derived classes in order to check op
  78     validity.
  79
  80     """
  81     self.proc = processor
  82     self.op = op
  83     self.cfg = context.cfg
  84     self.context = context
  85     self.rpc = rpc
  86     # Dicts used to declare locking needs to mcpu
  87     self.needed_locks = None
  88     self.acquired_locks = {}
  89     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
  90     self.add_locks = {}
  91     self.remove_locks = {}
  92     # Used to force good behavior when calling helper functions
  93     self.recalculate_locks = {}
  94     self.__ssh = None
  95     # logging
  96     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
  97     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
  98     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
  99     # support for dry-run
 100     self.dry_run_result = None
 101     # support for generic debug attribute
 102     if (not hasattr(self.op, "debug_level") or
 103         not isinstance(self.op.debug_level, int)):
 104       self.op.debug_level = 0
 105
 106     # Tasklets
 107     self.tasklets = None
 108
 109     for attr_name in self._OP_REQP:
 110       attr_val = getattr(op, attr_name, None)
 111       if attr_val is None:
 112         raise errors.OpPrereqError("Required parameter '%s' missing" %
 113                                    attr_name, errors.ECODE_INVAL)
 114
 115     self.CheckArguments()
 116
 117   def __GetSSH(self):
 118     """Returns the SshRunner object
 119
 120     """
 121     if not self.__ssh:
 122       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 123     return self.__ssh
 124
 125   ssh = property(fget=__GetSSH)
 126
 127   def CheckArguments(self):
 128     """Check syntactic validity for the opcode arguments.
 129
 130     This method is for doing a simple syntactic check and ensure
 131     validity of opcode parameters, without any cluster-related
 132     checks. While the same can be accomplished in ExpandNames and/or
 133     CheckPrereq, doing these separate is better because:
 134
 135       - ExpandNames is left as as purely a lock-related function
 136       - CheckPrereq is run after we have acquired locks (and possible
 137         waited for them)
 138
 139     The function is allowed to change the self.op attribute so that
 140     later methods can no longer worry about missing parameters.
 141
 142     """
 143     pass
 144
 145   def ExpandNames(self):
 146     """Expand names for this LU.
 147
 148     This method is called before starting to execute the opcode, and it should
 149     update all the parameters of the opcode to their canonical form (e.g. a
 150     short node name must be fully expanded after this method has successfully
 151     completed). This way locking, hooks, logging, ecc. can work correctly.
 152
 153     LUs which implement this method must also populate the self.needed_locks
 154     member, as a dict with lock levels as keys, and a list of needed lock names
 155     as values. Rules:
 156
 157       - use an empty dict if you don't need any lock
 158       - if you don't need any lock at a particular level omit that level
 159       - don't put anything for the BGL level
 160       - if you want all locks at a level use locking.ALL_SET as a value
 161
 162     If you need to share locks (rather than acquire them exclusively) at one
 163     level you can modify self.share_locks, setting a true value (usually 1) for
 164     that level. By default locks are not shared.
 165
 166     This function can also define a list of tasklets, which then will be
 167     executed in order instead of the usual LU-level CheckPrereq and Exec
 168     functions, if those are not defined by the LU.
 169
 170     Examples::
 171
 172       # Acquire all nodes and one instance
 173       self.needed_locks = {
 174         locking.LEVEL_NODE: locking.ALL_SET,
 175         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 176       }
 177       # Acquire just two nodes
 178       self.needed_locks = {
 179         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 180       }
 181       # Acquire no locks
 182       self.needed_locks = {} # No, you can't leave it to the default value None
 183
 184     """
 185     # The implementation of this method is mandatory only if the new LU is
 186     # concurrent, so that old LUs don't need to be changed all at the same
 187     # time.
 188     if self.REQ_BGL:
 189       self.needed_locks = {} # Exclusive LUs don't need locks.
 190     else:
 191       raise NotImplementedError
 192
 193   def DeclareLocks(self, level):
 194     """Declare LU locking needs for a level
 195
 196     While most LUs can just declare their locking needs at ExpandNames time,
 197     sometimes there's the need to calculate some locks after having acquired
 198     the ones before. This function is called just before acquiring locks at a
 199     particular level, but after acquiring the ones at lower levels, and permits
 200     such calculations. It can be used to modify self.needed_locks, and by
 201     default it does nothing.
 202
 203     This function is only called if you have something already set in
 204     self.needed_locks for the level.
 205
 206     @param level: Locking level which is going to be locked
 207     @type level: member of ganeti.locking.LEVELS
 208
 209     """
 210
 211   def CheckPrereq(self):
 212     """Check prerequisites for this LU.
 213
 214     This method should check that the prerequisites for the execution
 215     of this LU are fulfilled. It can do internode communication, but
 216     it should be idempotent - no cluster or system changes are
 217     allowed.
 218
 219     The method should raise errors.OpPrereqError in case something is
 220     not fulfilled. Its return value is ignored.
 221
 222     This method should also update all the parameters of the opcode to
 223     their canonical form if it hasn't been done by ExpandNames before.
 224
 225     """
 226     if self.tasklets is not None:
 227       for (idx, tl) in enumerate(self.tasklets):
 228         logging.debug("Checking prerequisites for tasklet %s/%s",
 229                       idx + 1, len(self.tasklets))
 230         tl.CheckPrereq()
 231     else:
 232       raise NotImplementedError
 233
 234   def Exec(self, feedback_fn):
 235     """Execute the LU.
 236
 237     This method should implement the actual work. It should raise
 238     errors.OpExecError for failures that are somewhat dealt with in
 239     code, or expected.
 240
 241     """
 242     if self.tasklets is not None:
 243       for (idx, tl) in enumerate(self.tasklets):
 244         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 245         tl.Exec(feedback_fn)
 246     else:
 247       raise NotImplementedError
 248
 249   def BuildHooksEnv(self):
 250     """Build hooks environment for this LU.
 251
 252     This method should return a three-node tuple consisting of: a dict
 253     containing the environment that will be used for running the
 254     specific hook for this LU, a list of node names on which the hook
 255     should run before the execution, and a list of node names on which
 256     the hook should run after the execution.
 257
 258     The keys of the dict must not have 'GANETI_' prefixed as this will
 259     be handled in the hooks runner. Also note additional keys will be
 260     added by the hooks runner. If the LU doesn't define any
 261     environment, an empty dict (and not None) should be returned.
 262
 263     No nodes should be returned as an empty list (and not None).
 264
 265     Note that if the HPATH for a LU class is None, this function will
 266     not be called.
 267
 268     """
 269     raise NotImplementedError
 270
 271   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 272     """Notify the LU about the results of its hooks.
 273
 274     This method is called every time a hooks phase is executed, and notifies
 275     the Logical Unit about the hooks' result. The LU can then use it to alter
 276     its result based on the hooks.  By default the method does nothing and the
 277     previous result is passed back unchanged but any LU can define it if it
 278     wants to use the local cluster hook-scripts somehow.
 279
 280     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 281         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 282     @param hook_results: the results of the multi-node hooks rpc call
 283     @param feedback_fn: function used send feedback back to the caller
 284     @param lu_result: the previous Exec result this LU had, or None
 285         in the PRE phase
 286     @return: the new Exec result, based on the previous result
 287         and hook results
 288
 289     """
 290     # API must be kept, thus we ignore the unused argument and could
 291     # be a function warnings
 292     # pylint: disable-msg=W0613,R0201
 293     return lu_result
 294
 295   def _ExpandAndLockInstance(self):
 296     """Helper function to expand and lock an instance.
 297
 298     Many LUs that work on an instance take its name in self.op.instance_name
 299     and need to expand it and then declare the expanded name for locking. This
 300     function does it, and then updates self.op.instance_name to the expanded
 301     name. It also initializes needed_locks as a dict, if this hasn't been done
 302     before.
 303
 304     """
 305     if self.needed_locks is None:
 306       self.needed_locks = {}
 307     else:
 308       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 309         "_ExpandAndLockInstance called with instance-level locks set"
 310     self.op.instance_name = _ExpandInstanceName(self.cfg,
 311                                                 self.op.instance_name)
 312     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 313
 314   def _LockInstancesNodes(self, primary_only=False):
 315     """Helper function to declare instances' nodes for locking.
 316
 317     This function should be called after locking one or more instances to lock
 318     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 319     with all primary or secondary nodes for instances already locked and
 320     present in self.needed_locks[locking.LEVEL_INSTANCE].
 321
 322     It should be called from DeclareLocks, and for safety only works if
 323     self.recalculate_locks[locking.LEVEL_NODE] is set.
 324
 325     In the future it may grow parameters to just lock some instance's nodes, or
 326     to just lock primaries or secondary nodes, if needed.
 327
 328     If should be called in DeclareLocks in a way similar to::
 329
 330       if level == locking.LEVEL_NODE:
 331         self._LockInstancesNodes()
 332
 333     @type primary_only: boolean
 334     @param primary_only: only lock primary nodes of locked instances
 335
 336     """
 337     assert locking.LEVEL_NODE in self.recalculate_locks, \
 338       "_LockInstancesNodes helper function called with no nodes to recalculate"
 339
 340     # TODO: check if we're really been called with the instance locks held
 341
 342     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 343     # future we might want to have different behaviors depending on the value
 344     # of self.recalculate_locks[locking.LEVEL_NODE]
 345     wanted_nodes = []
 346     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 347       instance = self.context.cfg.GetInstanceInfo(instance_name)
 348       wanted_nodes.append(instance.primary_node)
 349       if not primary_only:
 350         wanted_nodes.extend(instance.secondary_nodes)
 351
 352     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 353       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 354     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 355       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 356
 357     del self.recalculate_locks[locking.LEVEL_NODE]
 358
 359
 360 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 361   """Simple LU which runs no hooks.
 362
 363   This LU is intended as a parent for other LogicalUnits which will
 364   run no hooks, in order to reduce duplicate code.
 365
 366   """
 367   HPATH = None
 368   HTYPE = None
 369
 370   def BuildHooksEnv(self):
 371     """Empty BuildHooksEnv for NoHooksLu.
 372
 373     This just raises an error.
 374
 375     """
 376     assert False, "BuildHooksEnv called for NoHooksLUs"
 377
 378
 379 class Tasklet:
 380   """Tasklet base class.
 381
 382   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 383   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 384   tasklets know nothing about locks.
 385
 386   Subclasses must follow these rules:
 387     - Implement CheckPrereq
 388     - Implement Exec
 389
 390   """
 391   def __init__(self, lu):
 392     self.lu = lu
 393
 394     # Shortcuts
 395     self.cfg = lu.cfg
 396     self.rpc = lu.rpc
 397
 398   def CheckPrereq(self):
 399     """Check prerequisites for this tasklets.
 400
 401     This method should check whether the prerequisites for the execution of
 402     this tasklet are fulfilled. It can do internode communication, but it
 403     should be idempotent - no cluster or system changes are allowed.
 404
 405     The method should raise errors.OpPrereqError in case something is not
 406     fulfilled. Its return value is ignored.
 407
 408     This method should also update all parameters to their canonical form if it
 409     hasn't been done before.
 410
 411     """
 412     raise NotImplementedError
 413
 414   def Exec(self, feedback_fn):
 415     """Execute the tasklet.
 416
 417     This method should implement the actual work. It should raise
 418     errors.OpExecError for failures that are somewhat dealt with in code, or
 419     expected.
 420
 421     """
 422     raise NotImplementedError
 423
 424
 425 def _GetWantedNodes(lu, nodes):
 426   """Returns list of checked and expanded node names.
 427
 428   @type lu: L{LogicalUnit}
 429   @param lu: the logical unit on whose behalf we execute
 430   @type nodes: list
 431   @param nodes: list of node names or None for all nodes
 432   @rtype: list
 433   @return: the list of nodes, sorted
 434   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 435
 436   """
 437   if not isinstance(nodes, list):
 438     raise errors.OpPrereqError("Invalid argument type 'nodes'",
 439                                errors.ECODE_INVAL)
 440
 441   if not nodes:
 442     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 443       " non-empty list of nodes whose name is to be expanded.")
 444
 445   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 446   return utils.NiceSort(wanted)
 447
 448
 449 def _GetWantedInstances(lu, instances):
 450   """Returns list of checked and expanded instance names.
 451
 452   @type lu: L{LogicalUnit}
 453   @param lu: the logical unit on whose behalf we execute
 454   @type instances: list
 455   @param instances: list of instance names or None for all instances
 456   @rtype: list
 457   @return: the list of instances, sorted
 458   @raise errors.OpPrereqError: if the instances parameter is wrong type
 459   @raise errors.OpPrereqError: if any of the passed instances is not found
 460
 461   """
 462   if not isinstance(instances, list):
 463     raise errors.OpPrereqError("Invalid argument type 'instances'",
 464                                errors.ECODE_INVAL)
 465
 466   if instances:
 467     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 468   else:
 469     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 470   return wanted
 471
 472
 473 def _CheckOutputFields(static, dynamic, selected):
 474   """Checks whether all selected fields are valid.
 475
 476   @type static: L{utils.FieldSet}
 477   @param static: static fields set
 478   @type dynamic: L{utils.FieldSet}
 479   @param dynamic: dynamic fields set
 480
 481   """
 482   f = utils.FieldSet()
 483   f.Extend(static)
 484   f.Extend(dynamic)
 485
 486   delta = f.NonMatching(selected)
 487   if delta:
 488     raise errors.OpPrereqError("Unknown output fields selected: %s"
 489                                % ",".join(delta), errors.ECODE_INVAL)
 490
 491
 492 def _CheckBooleanOpField(op, name):
 493   """Validates boolean opcode parameters.
 494
 495   This will ensure that an opcode parameter is either a boolean value,
 496   or None (but that it always exists).
 497
 498   """
 499   val = getattr(op, name, None)
 500   if not (val is None or isinstance(val, bool)):
 501     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 502                                (name, str(val)), errors.ECODE_INVAL)
 503   setattr(op, name, val)
 504
 505
 506 def _CheckGlobalHvParams(params):
 507   """Validates that given hypervisor params are not global ones.
 508
 509   This will ensure that instances don't get customised versions of
 510   global params.
 511
 512   """
 513   used_globals = constants.HVC_GLOBALS.intersection(params)
 514   if used_globals:
 515     msg = ("The following hypervisor parameters are global and cannot"
 516            " be customized at instance level, please modify them at"
 517            " cluster level: %s" % utils.CommaJoin(used_globals))
 518     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 519
 520
 521 def _CheckNodeOnline(lu, node):
 522   """Ensure that a given node is online.
 523
 524   @param lu: the LU on behalf of which we make the check
 525   @param node: the node to check
 526   @raise errors.OpPrereqError: if the node is offline
 527
 528   """
 529   if lu.cfg.GetNodeInfo(node).offline:
 530     raise errors.OpPrereqError("Can't use offline node %s" % node,
 531                                errors.ECODE_INVAL)
 532
 533
 534 def _CheckNodeNotDrained(lu, node):
 535   """Ensure that a given node is not drained.
 536
 537   @param lu: the LU on behalf of which we make the check
 538   @param node: the node to check
 539   @raise errors.OpPrereqError: if the node is drained
 540
 541   """
 542   if lu.cfg.GetNodeInfo(node).drained:
 543     raise errors.OpPrereqError("Can't use drained node %s" % node,
 544                                errors.ECODE_INVAL)
 545
 546
 547 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 548   """Ensure that a node supports a given OS.
 549
 550   @param lu: the LU on behalf of which we make the check
 551   @param node: the node to check
 552   @param os_name: the OS to query about
 553   @param force_variant: whether to ignore variant errors
 554   @raise errors.OpPrereqError: if the node is not supporting the OS
 555
 556   """
 557   result = lu.rpc.call_os_get(node, os_name)
 558   result.Raise("OS '%s' not in supported OS list for node %s" %
 559                (os_name, node),
 560                prereq=True, ecode=errors.ECODE_INVAL)
 561   if not force_variant:
 562     _CheckOSVariant(result.payload, os_name)
 563
 564
 565 def _RequireFileStorage():
 566   """Checks that file storage is enabled.
 567
 568   @raise errors.OpPrereqError: when file storage is disabled
 569
 570   """
 571   if not constants.ENABLE_FILE_STORAGE:
 572     raise errors.OpPrereqError("File storage disabled at configure time",
 573                                errors.ECODE_INVAL)
 574
 575
 576 def _CheckDiskTemplate(template):
 577   """Ensure a given disk template is valid.
 578
 579   """
 580   if template not in constants.DISK_TEMPLATES:
 581     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 582            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 583     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 584   if template == constants.DT_FILE:
 585     _RequireFileStorage()
 586
 587
 588 def _CheckStorageType(storage_type):
 589   """Ensure a given storage type is valid.
 590
 591   """
 592   if storage_type not in constants.VALID_STORAGE_TYPES:
 593     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 594                                errors.ECODE_INVAL)
 595   if storage_type == constants.ST_FILE:
 596     _RequireFileStorage()
 597
 598
 599
 600 def _CheckInstanceDown(lu, instance, reason):
 601   """Ensure that an instance is not running."""
 602   if instance.admin_up:
 603     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 604                                (instance.name, reason), errors.ECODE_STATE)
 605
 606   pnode = instance.primary_node
 607   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 608   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 609               prereq=True, ecode=errors.ECODE_ENVIRON)
 610
 611   if instance.name in ins_l.payload:
 612     raise errors.OpPrereqError("Instance %s is running, %s" %
 613                                (instance.name, reason), errors.ECODE_STATE)
 614
 615
 616 def _ExpandItemName(fn, name, kind):
 617   """Expand an item name.
 618
 619   @param fn: the function to use for expansion
 620   @param name: requested item name
 621   @param kind: text description ('Node' or 'Instance')
 622   @return: the resolved (full) name
 623   @raise errors.OpPrereqError: if the item is not found
 624
 625   """
 626   full_name = fn(name)
 627   if full_name is None:
 628     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 629                                errors.ECODE_NOENT)
 630   return full_name
 631
 632
 633 def _ExpandNodeName(cfg, name):
 634   """Wrapper over L{_ExpandItemName} for nodes."""
 635   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 636
 637
 638 def _ExpandInstanceName(cfg, name):
 639   """Wrapper over L{_ExpandItemName} for instance."""
 640   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 641
 642
 643 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 644                           memory, vcpus, nics, disk_template, disks,
 645                           bep, hvp, hypervisor_name):
 646   """Builds instance related env variables for hooks
 647
 648   This builds the hook environment from individual variables.
 649
 650   @type name: string
 651   @param name: the name of the instance
 652   @type primary_node: string
 653   @param primary_node: the name of the instance's primary node
 654   @type secondary_nodes: list
 655   @param secondary_nodes: list of secondary nodes as strings
 656   @type os_type: string
 657   @param os_type: the name of the instance's OS
 658   @type status: boolean
 659   @param status: the should_run status of the instance
 660   @type memory: string
 661   @param memory: the memory size of the instance
 662   @type vcpus: string
 663   @param vcpus: the count of VCPUs the instance has
 664   @type nics: list
 665   @param nics: list of tuples (ip, mac, mode, link) representing
 666       the NICs the instance has
 667   @type disk_template: string
 668   @param disk_template: the disk template of the instance
 669   @type disks: list
 670   @param disks: the list of (size, mode) pairs
 671   @type bep: dict
 672   @param bep: the backend parameters for the instance
 673   @type hvp: dict
 674   @param hvp: the hypervisor parameters for the instance
 675   @type hypervisor_name: string
 676   @param hypervisor_name: the hypervisor for the instance
 677   @rtype: dict
 678   @return: the hook environment for this instance
 679
 680   """
 681   if status:
 682     str_status = "up"
 683   else:
 684     str_status = "down"
 685   env = {
 686     "OP_TARGET": name,
 687     "INSTANCE_NAME": name,
 688     "INSTANCE_PRIMARY": primary_node,
 689     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 690     "INSTANCE_OS_TYPE": os_type,
 691     "INSTANCE_STATUS": str_status,
 692     "INSTANCE_MEMORY": memory,
 693     "INSTANCE_VCPUS": vcpus,
 694     "INSTANCE_DISK_TEMPLATE": disk_template,
 695     "INSTANCE_HYPERVISOR": hypervisor_name,
 696   }
 697
 698   if nics:
 699     nic_count = len(nics)
 700     for idx, (ip, mac, mode, link) in enumerate(nics):
 701       if ip is None:
 702         ip = ""
 703       env["INSTANCE_NIC%d_IP" % idx] = ip
 704       env["INSTANCE_NIC%d_MAC" % idx] = mac
 705       env["INSTANCE_NIC%d_MODE" % idx] = mode
 706       env["INSTANCE_NIC%d_LINK" % idx] = link
 707       if mode == constants.NIC_MODE_BRIDGED:
 708         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 709   else:
 710     nic_count = 0
 711
 712   env["INSTANCE_NIC_COUNT"] = nic_count
 713
 714   if disks:
 715     disk_count = len(disks)
 716     for idx, (size, mode) in enumerate(disks):
 717       env["INSTANCE_DISK%d_SIZE" % idx] = size
 718       env["INSTANCE_DISK%d_MODE" % idx] = mode
 719   else:
 720     disk_count = 0
 721
 722   env["INSTANCE_DISK_COUNT"] = disk_count
 723
 724   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 725     for key, value in source.items():
 726       env["INSTANCE_%s_%s" % (kind, key)] = value
 727
 728   return env
 729
 730
 731 def _NICListToTuple(lu, nics):
 732   """Build a list of nic information tuples.
 733
 734   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 735   value in LUQueryInstanceData.
 736
 737   @type lu:  L{LogicalUnit}
 738   @param lu: the logical unit on whose behalf we execute
 739   @type nics: list of L{objects.NIC}
 740   @param nics: list of nics to convert to hooks tuples
 741
 742   """
 743   hooks_nics = []
 744   c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
 745   for nic in nics:
 746     ip = nic.ip
 747     mac = nic.mac
 748     filled_params = objects.FillDict(c_nicparams, nic.nicparams)
 749     mode = filled_params[constants.NIC_MODE]
 750     link = filled_params[constants.NIC_LINK]
 751     hooks_nics.append((ip, mac, mode, link))
 752   return hooks_nics
 753
 754
 755 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 756   """Builds instance related env variables for hooks from an object.
 757
 758   @type lu: L{LogicalUnit}
 759   @param lu: the logical unit on whose behalf we execute
 760   @type instance: L{objects.Instance}
 761   @param instance: the instance for which we should build the
 762       environment
 763   @type override: dict
 764   @param override: dictionary with key/values that will override
 765       our values
 766   @rtype: dict
 767   @return: the hook environment dictionary
 768
 769   """
 770   cluster = lu.cfg.GetClusterInfo()
 771   bep = cluster.FillBE(instance)
 772   hvp = cluster.FillHV(instance)
 773   args = {
 774     'name': instance.name,
 775     'primary_node': instance.primary_node,
 776     'secondary_nodes': instance.secondary_nodes,
 777     'os_type': instance.os,
 778     'status': instance.admin_up,
 779     'memory': bep[constants.BE_MEMORY],
 780     'vcpus': bep[constants.BE_VCPUS],
 781     'nics': _NICListToTuple(lu, instance.nics),
 782     'disk_template': instance.disk_template,
 783     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 784     'bep': bep,
 785     'hvp': hvp,
 786     'hypervisor_name': instance.hypervisor,
 787   }
 788   if override:
 789     args.update(override)
 790   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 791
 792
 793 def _AdjustCandidatePool(lu, exceptions):
 794   """Adjust the candidate pool after node operations.
 795
 796   """
 797   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 798   if mod_list:
 799     lu.LogInfo("Promoted nodes to master candidate role: %s",
 800                utils.CommaJoin(node.name for node in mod_list))
 801     for name in mod_list:
 802       lu.context.ReaddNode(name)
 803   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 804   if mc_now > mc_max:
 805     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 806                (mc_now, mc_max))
 807
 808
 809 def _DecideSelfPromotion(lu, exceptions=None):
 810   """Decide whether I should promote myself as a master candidate.
 811
 812   """
 813   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 814   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 815   # the new node will increase mc_max with one, so:
 816   mc_should = min(mc_should + 1, cp_size)
 817   return mc_now < mc_should
 818
 819
 820 def _CheckNicsBridgesExist(lu, target_nics, target_node,
 821                                profile=constants.PP_DEFAULT):
 822   """Check that the brigdes needed by a list of nics exist.
 823
 824   """
 825   c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
 826   paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
 827                 for nic in target_nics]
 828   brlist = [params[constants.NIC_LINK] for params in paramslist
 829             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 830   if brlist:
 831     result = lu.rpc.call_bridges_exist(target_node, brlist)
 832     result.Raise("Error checking bridges on destination node '%s'" %
 833                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 834
 835
 836 def _CheckInstanceBridgesExist(lu, instance, node=None):
 837   """Check that the brigdes needed by an instance exist.
 838
 839   """
 840   if node is None:
 841     node = instance.primary_node
 842   _CheckNicsBridgesExist(lu, instance.nics, node)
 843
 844
 845 def _CheckOSVariant(os_obj, name):
 846   """Check whether an OS name conforms to the os variants specification.
 847
 848   @type os_obj: L{objects.OS}
 849   @param os_obj: OS object to check
 850   @type name: string
 851   @param name: OS name passed by the user, to check for validity
 852
 853   """
 854   if not os_obj.supported_variants:
 855     return
 856   try:
 857     variant = name.split("+", 1)[1]
 858   except IndexError:
 859     raise errors.OpPrereqError("OS name must include a variant",
 860                                errors.ECODE_INVAL)
 861
 862   if variant not in os_obj.supported_variants:
 863     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
 864
 865
 866 def _GetNodeInstancesInner(cfg, fn):
 867   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 868
 869
 870 def _GetNodeInstances(cfg, node_name):
 871   """Returns a list of all primary and secondary instances on a node.
 872
 873   """
 874
 875   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
 876
 877
 878 def _GetNodePrimaryInstances(cfg, node_name):
 879   """Returns primary instances on a node.
 880
 881   """
 882   return _GetNodeInstancesInner(cfg,
 883                                 lambda inst: node_name == inst.primary_node)
 884
 885
 886 def _GetNodeSecondaryInstances(cfg, node_name):
 887   """Returns secondary instances on a node.
 888
 889   """
 890   return _GetNodeInstancesInner(cfg,
 891                                 lambda inst: node_name in inst.secondary_nodes)
 892
 893
 894 def _GetStorageTypeArgs(cfg, storage_type):
 895   """Returns the arguments for a storage type.
 896
 897   """
 898   # Special case for file storage
 899   if storage_type == constants.ST_FILE:
 900     # storage.FileStorage wants a list of storage directories
 901     return [[cfg.GetFileStorageDir()]]
 902
 903   return []
 904
 905
 906 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
 907   faulty = []
 908
 909   for dev in instance.disks:
 910     cfg.SetDiskID(dev, node_name)
 911
 912   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
 913   result.Raise("Failed to get disk status from node %s" % node_name,
 914                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 915
 916   for idx, bdev_status in enumerate(result.payload):
 917     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
 918       faulty.append(idx)
 919
 920   return faulty
 921
 922
 923 def _FormatTimestamp(secs):
 924   """Formats a Unix timestamp with the local timezone.
 925
 926   """
 927   return time.strftime("%F %T %Z", time.gmtime(secs))
 928
 929
 930 class LUPostInitCluster(LogicalUnit):
 931   """Logical unit for running hooks after cluster initialization.
 932
 933   """
 934   HPATH = "cluster-init"
 935   HTYPE = constants.HTYPE_CLUSTER
 936   _OP_REQP = []
 937
 938   def BuildHooksEnv(self):
 939     """Build hooks env.
 940
 941     """
 942     env = {"OP_TARGET": self.cfg.GetClusterName()}
 943     mn = self.cfg.GetMasterNode()
 944     return env, [], [mn]
 945
 946   def CheckPrereq(self):
 947     """No prerequisites to check.
 948
 949     """
 950     return True
 951
 952   def Exec(self, feedback_fn):
 953     """Nothing to do.
 954
 955     """
 956     return True
 957
 958
 959 class LUDestroyCluster(LogicalUnit):
 960   """Logical unit for destroying the cluster.
 961
 962   """
 963   HPATH = "cluster-destroy"
 964   HTYPE = constants.HTYPE_CLUSTER
 965   _OP_REQP = []
 966
 967   def BuildHooksEnv(self):
 968     """Build hooks env.
 969
 970     """
 971     env = {"OP_TARGET": self.cfg.GetClusterName()}
 972     return env, [], []
 973
 974   def CheckPrereq(self):
 975     """Check prerequisites.
 976
 977     This checks whether the cluster is empty.
 978
 979     Any errors are signaled by raising errors.OpPrereqError.
 980
 981     """
 982     master = self.cfg.GetMasterNode()
 983
 984     nodelist = self.cfg.GetNodeList()
 985     if len(nodelist) != 1 or nodelist[0] != master:
 986       raise errors.OpPrereqError("There are still %d node(s) in"
 987                                  " this cluster." % (len(nodelist) - 1),
 988                                  errors.ECODE_INVAL)
 989     instancelist = self.cfg.GetInstanceList()
 990     if instancelist:
 991       raise errors.OpPrereqError("There are still %d instance(s) in"
 992                                  " this cluster." % len(instancelist),
 993                                  errors.ECODE_INVAL)
 994
 995   def Exec(self, feedback_fn):
 996     """Destroys the cluster.
 997
 998     """
 999     master = self.cfg.GetMasterNode()
1000     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1001
1002     # Run post hooks on master node before it's removed
1003     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1004     try:
1005       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1006     except:
1007       # pylint: disable-msg=W0702
1008       self.LogWarning("Errors occurred running hooks on %s" % master)
1009
1010     result = self.rpc.call_node_stop_master(master, False)
1011     result.Raise("Could not disable the master role")
1012
1013     if modify_ssh_setup:
1014       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1015       utils.CreateBackup(priv_key)
1016       utils.CreateBackup(pub_key)
1017
1018     return master
1019
1020
1021 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
1022                             warn_days=constants.SSL_CERT_EXPIRATION_WARN,
1023                             error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1024   """Verifies certificate details for LUVerifyCluster.
1025
1026   """
1027   if expired:
1028     msg = "Certificate %s is expired" % filename
1029
1030     if not_before is not None and not_after is not None:
1031       msg += (" (valid from %s to %s)" %
1032               (_FormatTimestamp(not_before),
1033                _FormatTimestamp(not_after)))
1034     elif not_before is not None:
1035       msg += " (valid from %s)" % _FormatTimestamp(not_before)
1036     elif not_after is not None:
1037       msg += " (valid until %s)" % _FormatTimestamp(not_after)
1038
1039     return (LUVerifyCluster.ETYPE_ERROR, msg)
1040
1041   elif not_before is not None and not_before > now:
1042     return (LUVerifyCluster.ETYPE_WARNING,
1043             "Certificate %s not yet valid (valid from %s)" %
1044             (filename, _FormatTimestamp(not_before)))
1045
1046   elif not_after is not None:
1047     remaining_days = int((not_after - now) / (24 * 3600))
1048
1049     msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1050
1051     if remaining_days <= error_days:
1052       return (LUVerifyCluster.ETYPE_ERROR, msg)
1053
1054     if remaining_days <= warn_days:
1055       return (LUVerifyCluster.ETYPE_WARNING, msg)
1056
1057   return (None, None)
1058
1059
1060 def _VerifyCertificate(filename):
1061   """Verifies a certificate for LUVerifyCluster.
1062
1063   @type filename: string
1064   @param filename: Path to PEM file
1065
1066   """
1067   try:
1068     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1069                                            utils.ReadFile(filename))
1070   except Exception, err: # pylint: disable-msg=W0703
1071     return (LUVerifyCluster.ETYPE_ERROR,
1072             "Failed to load X509 certificate %s: %s" % (filename, err))
1073
1074   # Depending on the pyOpenSSL version, this can just return (None, None)
1075   (not_before, not_after) = utils.GetX509CertValidity(cert)
1076
1077   return _VerifyCertificateInner(filename, cert.has_expired(),
1078                                  not_before, not_after, time.time())
1079
1080
1081 class LUVerifyCluster(LogicalUnit):
1082   """Verifies the cluster status.
1083
1084   """
1085   HPATH = "cluster-verify"
1086   HTYPE = constants.HTYPE_CLUSTER
1087   _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1088   REQ_BGL = False
1089
1090   TCLUSTER = "cluster"
1091   TNODE = "node"
1092   TINSTANCE = "instance"
1093
1094   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1095   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1096   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1097   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1098   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1099   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1100   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1101   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1102   ENODEDRBD = (TNODE, "ENODEDRBD")
1103   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1104   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1105   ENODEHV = (TNODE, "ENODEHV")
1106   ENODELVM = (TNODE, "ENODELVM")
1107   ENODEN1 = (TNODE, "ENODEN1")
1108   ENODENET = (TNODE, "ENODENET")
1109   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1110   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1111   ENODERPC = (TNODE, "ENODERPC")
1112   ENODESSH = (TNODE, "ENODESSH")
1113   ENODEVERSION = (TNODE, "ENODEVERSION")
1114   ENODESETUP = (TNODE, "ENODESETUP")
1115   ENODETIME = (TNODE, "ENODETIME")
1116
1117   ETYPE_FIELD = "code"
1118   ETYPE_ERROR = "ERROR"
1119   ETYPE_WARNING = "WARNING"
1120
1121   class NodeImage(object):
1122     """A class representing the logical and physical status of a node.
1123
1124     @ivar volumes: a structure as returned from
1125         L{ganeti.backend.GetVolumeList} (runtime)
1126     @ivar instances: a list of running instances (runtime)
1127     @ivar pinst: list of configured primary instances (config)
1128     @ivar sinst: list of configured secondary instances (config)
1129     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1130         of this node (config)
1131     @ivar mfree: free memory, as reported by hypervisor (runtime)
1132     @ivar dfree: free disk, as reported by the node (runtime)
1133     @ivar offline: the offline status (config)
1134     @type rpc_fail: boolean
1135     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1136         not whether the individual keys were correct) (runtime)
1137     @type lvm_fail: boolean
1138     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1139     @type hyp_fail: boolean
1140     @ivar hyp_fail: whether the RPC call didn't return the instance list
1141     @type ghost: boolean
1142     @ivar ghost: whether this is a known node or not (config)
1143
1144     """
1145     def __init__(self, offline=False):
1146       self.volumes = {}
1147       self.instances = []
1148       self.pinst = []
1149       self.sinst = []
1150       self.sbp = {}
1151       self.mfree = 0
1152       self.dfree = 0
1153       self.offline = offline
1154       self.rpc_fail = False
1155       self.lvm_fail = False
1156       self.hyp_fail = False
1157       self.ghost = False
1158
1159   def ExpandNames(self):
1160     self.needed_locks = {
1161       locking.LEVEL_NODE: locking.ALL_SET,
1162       locking.LEVEL_INSTANCE: locking.ALL_SET,
1163     }
1164     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1165
1166   def _Error(self, ecode, item, msg, *args, **kwargs):
1167     """Format an error message.
1168
1169     Based on the opcode's error_codes parameter, either format a
1170     parseable error code, or a simpler error string.
1171
1172     This must be called only from Exec and functions called from Exec.
1173
1174     """
1175     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1176     itype, etxt = ecode
1177     # first complete the msg
1178     if args:
1179       msg = msg % args
1180     # then format the whole message
1181     if self.op.error_codes:
1182       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1183     else:
1184       if item:
1185         item = " " + item
1186       else:
1187         item = ""
1188       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1189     # and finally report it via the feedback_fn
1190     self._feedback_fn("  - %s" % msg)
1191
1192   def _ErrorIf(self, cond, *args, **kwargs):
1193     """Log an error message if the passed condition is True.
1194
1195     """
1196     cond = bool(cond) or self.op.debug_simulate_errors
1197     if cond:
1198       self._Error(*args, **kwargs)
1199     # do not mark the operation as failed for WARN cases only
1200     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1201       self.bad = self.bad or cond
1202
1203   def _VerifyNode(self, ninfo, nresult):
1204     """Run multiple tests against a node.
1205
1206     Test list:
1207
1208       - compares ganeti version
1209       - checks vg existence and size > 20G
1210       - checks config file checksum
1211       - checks ssh to other nodes
1212
1213     @type ninfo: L{objects.Node}
1214     @param ninfo: the node to check
1215     @param nresult: the results from the node
1216     @rtype: boolean
1217     @return: whether overall this call was successful (and we can expect
1218          reasonable values in the respose)
1219
1220     """
1221     node = ninfo.name
1222     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1223
1224     # main result, nresult should be a non-empty dict
1225     test = not nresult or not isinstance(nresult, dict)
1226     _ErrorIf(test, self.ENODERPC, node,
1227                   "unable to verify node: no data returned")
1228     if test:
1229       return False
1230
1231     # compares ganeti version
1232     local_version = constants.PROTOCOL_VERSION
1233     remote_version = nresult.get("version", None)
1234     test = not (remote_version and
1235                 isinstance(remote_version, (list, tuple)) and
1236                 len(remote_version) == 2)
1237     _ErrorIf(test, self.ENODERPC, node,
1238              "connection to node returned invalid data")
1239     if test:
1240       return False
1241
1242     test = local_version != remote_version[0]
1243     _ErrorIf(test, self.ENODEVERSION, node,
1244              "incompatible protocol versions: master %s,"
1245              " node %s", local_version, remote_version[0])
1246     if test:
1247       return False
1248
1249     # node seems compatible, we can actually try to look into its results
1250
1251     # full package version
1252     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1253                   self.ENODEVERSION, node,
1254                   "software version mismatch: master %s, node %s",
1255                   constants.RELEASE_VERSION, remote_version[1],
1256                   code=self.ETYPE_WARNING)
1257
1258     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1259     if isinstance(hyp_result, dict):
1260       for hv_name, hv_result in hyp_result.iteritems():
1261         test = hv_result is not None
1262         _ErrorIf(test, self.ENODEHV, node,
1263                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1264
1265
1266     test = nresult.get(constants.NV_NODESETUP,
1267                            ["Missing NODESETUP results"])
1268     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1269              "; ".join(test))
1270
1271     return True
1272
1273   def _VerifyNodeTime(self, ninfo, nresult,
1274                       nvinfo_starttime, nvinfo_endtime):
1275     """Check the node time.
1276
1277     @type ninfo: L{objects.Node}
1278     @param ninfo: the node to check
1279     @param nresult: the remote results for the node
1280     @param nvinfo_starttime: the start time of the RPC call
1281     @param nvinfo_endtime: the end time of the RPC call
1282
1283     """
1284     node = ninfo.name
1285     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1286
1287     ntime = nresult.get(constants.NV_TIME, None)
1288     try:
1289       ntime_merged = utils.MergeTime(ntime)
1290     except (ValueError, TypeError):
1291       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1292       return
1293
1294     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1295       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1296     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1297       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1298     else:
1299       ntime_diff = None
1300
1301     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1302              "Node time diverges by at least %s from master node time",
1303              ntime_diff)
1304
1305   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1306     """Check the node time.
1307
1308     @type ninfo: L{objects.Node}
1309     @param ninfo: the node to check
1310     @param nresult: the remote results for the node
1311     @param vg_name: the configured VG name
1312
1313     """
1314     if vg_name is None:
1315       return
1316
1317     node = ninfo.name
1318     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1319
1320     # checks vg existence and size > 20G
1321     vglist = nresult.get(constants.NV_VGLIST, None)
1322     test = not vglist
1323     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1324     if not test:
1325       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1326                                             constants.MIN_VG_SIZE)
1327       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1328
1329     # check pv names
1330     pvlist = nresult.get(constants.NV_PVLIST, None)
1331     test = pvlist is None
1332     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1333     if not test:
1334       # check that ':' is not present in PV names, since it's a
1335       # special character for lvcreate (denotes the range of PEs to
1336       # use on the PV)
1337       for _, pvname, owner_vg in pvlist:
1338         test = ":" in pvname
1339         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1340                  " '%s' of VG '%s'", pvname, owner_vg)
1341
1342   def _VerifyNodeNetwork(self, ninfo, nresult):
1343     """Check the node time.
1344
1345     @type ninfo: L{objects.Node}
1346     @param ninfo: the node to check
1347     @param nresult: the remote results for the node
1348
1349     """
1350     node = ninfo.name
1351     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1352
1353     test = constants.NV_NODELIST not in nresult
1354     _ErrorIf(test, self.ENODESSH, node,
1355              "node hasn't returned node ssh connectivity data")
1356     if not test:
1357       if nresult[constants.NV_NODELIST]:
1358         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1359           _ErrorIf(True, self.ENODESSH, node,
1360                    "ssh communication with node '%s': %s", a_node, a_msg)
1361
1362     test = constants.NV_NODENETTEST not in nresult
1363     _ErrorIf(test, self.ENODENET, node,
1364              "node hasn't returned node tcp connectivity data")
1365     if not test:
1366       if nresult[constants.NV_NODENETTEST]:
1367         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1368         for anode in nlist:
1369           _ErrorIf(True, self.ENODENET, node,
1370                    "tcp communication with node '%s': %s",
1371                    anode, nresult[constants.NV_NODENETTEST][anode])
1372
1373   def _VerifyInstance(self, instance, instanceconfig, node_image):
1374     """Verify an instance.
1375
1376     This function checks to see if the required block devices are
1377     available on the instance's node.
1378
1379     """
1380     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1381     node_current = instanceconfig.primary_node
1382
1383     node_vol_should = {}
1384     instanceconfig.MapLVsByNode(node_vol_should)
1385
1386     for node in node_vol_should:
1387       n_img = node_image[node]
1388       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1389         # ignore missing volumes on offline or broken nodes
1390         continue
1391       for volume in node_vol_should[node]:
1392         test = volume not in n_img.volumes
1393         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1394                  "volume %s missing on node %s", volume, node)
1395
1396     if instanceconfig.admin_up:
1397       pri_img = node_image[node_current]
1398       test = instance not in pri_img.instances and not pri_img.offline
1399       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1400                "instance not running on its primary node %s",
1401                node_current)
1402
1403     for node, n_img in node_image.items():
1404       if (not node == node_current):
1405         test = instance in n_img.instances
1406         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1407                  "instance should not run on node %s", node)
1408
1409   def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1410     """Verify if there are any unknown volumes in the cluster.
1411
1412     The .os, .swap and backup volumes are ignored. All other volumes are
1413     reported as unknown.
1414
1415     """
1416     for node, n_img in node_image.items():
1417       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1418         # skip non-healthy nodes
1419         continue
1420       for volume in n_img.volumes:
1421         test = (node not in node_vol_should or
1422                 volume not in node_vol_should[node])
1423         self._ErrorIf(test, self.ENODEORPHANLV, node,
1424                       "volume %s is unknown", volume)
1425
1426   def _VerifyOrphanInstances(self, instancelist, node_image):
1427     """Verify the list of running instances.
1428
1429     This checks what instances are running but unknown to the cluster.
1430
1431     """
1432     for node, n_img in node_image.items():
1433       for o_inst in n_img.instances:
1434         test = o_inst not in instancelist
1435         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1436                       "instance %s on node %s should not exist", o_inst, node)
1437
1438   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1439     """Verify N+1 Memory Resilience.
1440
1441     Check that if one single node dies we can still start all the
1442     instances it was primary for.
1443
1444     """
1445     for node, n_img in node_image.items():
1446       # This code checks that every node which is now listed as
1447       # secondary has enough memory to host all instances it is
1448       # supposed to should a single other node in the cluster fail.
1449       # FIXME: not ready for failover to an arbitrary node
1450       # FIXME: does not support file-backed instances
1451       # WARNING: we currently take into account down instances as well
1452       # as up ones, considering that even if they're down someone
1453       # might want to start them even in the event of a node failure.
1454       for prinode, instances in n_img.sbp.items():
1455         needed_mem = 0
1456         for instance in instances:
1457           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1458           if bep[constants.BE_AUTO_BALANCE]:
1459             needed_mem += bep[constants.BE_MEMORY]
1460         test = n_img.mfree < needed_mem
1461         self._ErrorIf(test, self.ENODEN1, node,
1462                       "not enough memory on to accommodate"
1463                       " failovers should peer node %s fail", prinode)
1464
1465   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1466                        master_files):
1467     """Verifies and computes the node required file checksums.
1468
1469     @type ninfo: L{objects.Node}
1470     @param ninfo: the node to check
1471     @param nresult: the remote results for the node
1472     @param file_list: required list of files
1473     @param local_cksum: dictionary of local files and their checksums
1474     @param master_files: list of files that only masters should have
1475
1476     """
1477     node = ninfo.name
1478     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1479
1480     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1481     test = not isinstance(remote_cksum, dict)
1482     _ErrorIf(test, self.ENODEFILECHECK, node,
1483              "node hasn't returned file checksum data")
1484     if test:
1485       return
1486
1487     for file_name in file_list:
1488       node_is_mc = ninfo.master_candidate
1489       must_have = (file_name not in master_files) or node_is_mc
1490       # missing
1491       test1 = file_name not in remote_cksum
1492       # invalid checksum
1493       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1494       # existing and good
1495       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1496       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1497                "file '%s' missing", file_name)
1498       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1499                "file '%s' has wrong checksum", file_name)
1500       # not candidate and this is not a must-have file
1501       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1502                "file '%s' should not exist on non master"
1503                " candidates (and the file is outdated)", file_name)
1504       # all good, except non-master/non-must have combination
1505       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1506                "file '%s' should not exist"
1507                " on non master candidates", file_name)
1508
1509   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1510     """Verifies and the node DRBD status.
1511
1512     @type ninfo: L{objects.Node}
1513     @param ninfo: the node to check
1514     @param nresult: the remote results for the node
1515     @param instanceinfo: the dict of instances
1516     @param drbd_map: the DRBD map as returned by
1517         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1518
1519     """
1520     node = ninfo.name
1521     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1522
1523     # compute the DRBD minors
1524     node_drbd = {}
1525     for minor, instance in drbd_map[node].items():
1526       test = instance not in instanceinfo
1527       _ErrorIf(test, self.ECLUSTERCFG, None,
1528                "ghost instance '%s' in temporary DRBD map", instance)
1529         # ghost instance should not be running, but otherwise we
1530         # don't give double warnings (both ghost instance and
1531         # unallocated minor in use)
1532       if test:
1533         node_drbd[minor] = (instance, False)
1534       else:
1535         instance = instanceinfo[instance]
1536         node_drbd[minor] = (instance.name, instance.admin_up)
1537
1538     # and now check them
1539     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1540     test = not isinstance(used_minors, (tuple, list))
1541     _ErrorIf(test, self.ENODEDRBD, node,
1542              "cannot parse drbd status file: %s", str(used_minors))
1543     if test:
1544       # we cannot check drbd status
1545       return
1546
1547     for minor, (iname, must_exist) in node_drbd.items():
1548       test = minor not in used_minors and must_exist
1549       _ErrorIf(test, self.ENODEDRBD, node,
1550                "drbd minor %d of instance %s is not active", minor, iname)
1551     for minor in used_minors:
1552       test = minor not in node_drbd
1553       _ErrorIf(test, self.ENODEDRBD, node,
1554                "unallocated drbd minor %d is in use", minor)
1555
1556   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1557     """Verifies and updates the node volume data.
1558
1559     This function will update a L{NodeImage}'s internal structures
1560     with data from the remote call.
1561
1562     @type ninfo: L{objects.Node}
1563     @param ninfo: the node to check
1564     @param nresult: the remote results for the node
1565     @param nimg: the node image object
1566     @param vg_name: the configured VG name
1567
1568     """
1569     node = ninfo.name
1570     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1571
1572     nimg.lvm_fail = True
1573     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1574     if vg_name is None:
1575       pass
1576     elif isinstance(lvdata, basestring):
1577       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1578                utils.SafeEncode(lvdata))
1579     elif not isinstance(lvdata, dict):
1580       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1581     else:
1582       nimg.volumes = lvdata
1583       nimg.lvm_fail = False
1584
1585   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1586     """Verifies and updates the node instance list.
1587
1588     If the listing was successful, then updates this node's instance
1589     list. Otherwise, it marks the RPC call as failed for the instance
1590     list key.
1591
1592     @type ninfo: L{objects.Node}
1593     @param ninfo: the node to check
1594     @param nresult: the remote results for the node
1595     @param nimg: the node image object
1596
1597     """
1598     idata = nresult.get(constants.NV_INSTANCELIST, None)
1599     test = not isinstance(idata, list)
1600     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1601                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1602     if test:
1603       nimg.hyp_fail = True
1604     else:
1605       nimg.instances = idata
1606
1607   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1608     """Verifies and computes a node information map
1609
1610     @type ninfo: L{objects.Node}
1611     @param ninfo: the node to check
1612     @param nresult: the remote results for the node
1613     @param nimg: the node image object
1614     @param vg_name: the configured VG name
1615
1616     """
1617     node = ninfo.name
1618     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1619
1620     # try to read free memory (from the hypervisor)
1621     hv_info = nresult.get(constants.NV_HVINFO, None)
1622     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1623     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1624     if not test:
1625       try:
1626         nimg.mfree = int(hv_info["memory_free"])
1627       except (ValueError, TypeError):
1628         _ErrorIf(True, self.ENODERPC, node,
1629                  "node returned invalid nodeinfo, check hypervisor")
1630
1631     # FIXME: devise a free space model for file based instances as well
1632     if vg_name is not None:
1633       test = (constants.NV_VGLIST not in nresult or
1634               vg_name not in nresult[constants.NV_VGLIST])
1635       _ErrorIf(test, self.ENODELVM, node,
1636                "node didn't return data for the volume group '%s'"
1637                " - it is either missing or broken", vg_name)
1638       if not test:
1639         try:
1640           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1641         except (ValueError, TypeError):
1642           _ErrorIf(True, self.ENODERPC, node,
1643                    "node returned invalid LVM info, check LVM status")
1644
1645   def CheckPrereq(self):
1646     """Check prerequisites.
1647
1648     Transform the list of checks we're going to skip into a set and check that
1649     all its members are valid.
1650
1651     """
1652     self.skip_set = frozenset(self.op.skip_checks)
1653     if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1654       raise errors.OpPrereqError("Invalid checks to be skipped specified",
1655                                  errors.ECODE_INVAL)
1656
1657   def BuildHooksEnv(self):
1658     """Build hooks env.
1659
1660     Cluster-Verify hooks just ran in the post phase and their failure makes
1661     the output be logged in the verify output and the verification to fail.
1662
1663     """
1664     all_nodes = self.cfg.GetNodeList()
1665     env = {
1666       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1667       }
1668     for node in self.cfg.GetAllNodesInfo().values():
1669       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1670
1671     return env, [], all_nodes
1672
1673   def Exec(self, feedback_fn):
1674     """Verify integrity of cluster, performing various test on nodes.
1675
1676     """
1677     self.bad = False
1678     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1679     verbose = self.op.verbose
1680     self._feedback_fn = feedback_fn
1681     feedback_fn("* Verifying global settings")
1682     for msg in self.cfg.VerifyConfig():
1683       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1684
1685     # Check the cluster certificates
1686     for cert_filename in constants.ALL_CERT_FILES:
1687       (errcode, msg) = _VerifyCertificate(cert_filename)
1688       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1689
1690     vg_name = self.cfg.GetVGName()
1691     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1692     cluster = self.cfg.GetClusterInfo()
1693     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1694     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1695     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1696     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1697                         for iname in instancelist)
1698     i_non_redundant = [] # Non redundant instances
1699     i_non_a_balanced = [] # Non auto-balanced instances
1700     n_offline = 0 # Count of offline nodes
1701     n_drained = 0 # Count of nodes being drained
1702     node_vol_should = {}
1703
1704     # FIXME: verify OS list
1705     # do local checksums
1706     master_files = [constants.CLUSTER_CONF_FILE]
1707
1708     file_names = ssconf.SimpleStore().GetFileList()
1709     file_names.extend(constants.ALL_CERT_FILES)
1710     file_names.extend(master_files)
1711     if cluster.modify_etc_hosts:
1712       file_names.append(constants.ETC_HOSTS)
1713
1714     local_checksums = utils.FingerprintFiles(file_names)
1715
1716     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1717     node_verify_param = {
1718       constants.NV_FILELIST: file_names,
1719       constants.NV_NODELIST: [node.name for node in nodeinfo
1720                               if not node.offline],
1721       constants.NV_HYPERVISOR: hypervisors,
1722       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1723                                   node.secondary_ip) for node in nodeinfo
1724                                  if not node.offline],
1725       constants.NV_INSTANCELIST: hypervisors,
1726       constants.NV_VERSION: None,
1727       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1728       constants.NV_NODESETUP: None,
1729       constants.NV_TIME: None,
1730       }
1731
1732     if vg_name is not None:
1733       node_verify_param[constants.NV_VGLIST] = None
1734       node_verify_param[constants.NV_LVLIST] = vg_name
1735       node_verify_param[constants.NV_PVLIST] = [vg_name]
1736       node_verify_param[constants.NV_DRBDLIST] = None
1737
1738     # Build our expected cluster state
1739     node_image = dict((node.name, self.NodeImage(offline=node.offline))
1740                       for node in nodeinfo)
1741
1742     for instance in instancelist:
1743       inst_config = instanceinfo[instance]
1744
1745       for nname in inst_config.all_nodes:
1746         if nname not in node_image:
1747           # ghost node
1748           gnode = self.NodeImage()
1749           gnode.ghost = True
1750           node_image[nname] = gnode
1751
1752       inst_config.MapLVsByNode(node_vol_should)
1753
1754       pnode = inst_config.primary_node
1755       node_image[pnode].pinst.append(instance)
1756
1757       for snode in inst_config.secondary_nodes:
1758         nimg = node_image[snode]
1759         nimg.sinst.append(instance)
1760         if pnode not in nimg.sbp:
1761           nimg.sbp[pnode] = []
1762         nimg.sbp[pnode].append(instance)
1763
1764     # At this point, we have the in-memory data structures complete,
1765     # except for the runtime information, which we'll gather next
1766
1767     # Due to the way our RPC system works, exact response times cannot be
1768     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1769     # time before and after executing the request, we can at least have a time
1770     # window.
1771     nvinfo_starttime = time.time()
1772     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1773                                            self.cfg.GetClusterName())
1774     nvinfo_endtime = time.time()
1775
1776     master_node = self.cfg.GetMasterNode()
1777     all_drbd_map = self.cfg.ComputeDRBDMap()
1778
1779     feedback_fn("* Verifying node status")
1780     for node_i in nodeinfo:
1781       node = node_i.name
1782       nimg = node_image[node]
1783
1784       if node_i.offline:
1785         if verbose:
1786           feedback_fn("* Skipping offline node %s" % (node,))
1787         n_offline += 1
1788         continue
1789
1790       if node == master_node:
1791         ntype = "master"
1792       elif node_i.master_candidate:
1793         ntype = "master candidate"
1794       elif node_i.drained:
1795         ntype = "drained"
1796         n_drained += 1
1797       else:
1798         ntype = "regular"
1799       if verbose:
1800         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1801
1802       msg = all_nvinfo[node].fail_msg
1803       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1804       if msg:
1805         nimg.rpc_fail = True
1806         continue
1807
1808       nresult = all_nvinfo[node].payload
1809
1810       nimg.call_ok = self._VerifyNode(node_i, nresult)
1811       self._VerifyNodeNetwork(node_i, nresult)
1812       self._VerifyNodeLVM(node_i, nresult, vg_name)
1813       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1814                             master_files)
1815       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1816       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1817
1818       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1819       self._UpdateNodeInstances(node_i, nresult, nimg)
1820       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1821
1822     feedback_fn("* Verifying instance status")
1823     for instance in instancelist:
1824       if verbose:
1825         feedback_fn("* Verifying instance %s" % instance)
1826       inst_config = instanceinfo[instance]
1827       self._VerifyInstance(instance, inst_config, node_image)
1828       inst_nodes_offline = []
1829
1830       pnode = inst_config.primary_node
1831       pnode_img = node_image[pnode]
1832       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1833                self.ENODERPC, pnode, "instance %s, connection to"
1834                " primary node failed", instance)
1835
1836       if pnode_img.offline:
1837         inst_nodes_offline.append(pnode)
1838
1839       # If the instance is non-redundant we cannot survive losing its primary
1840       # node, so we are not N+1 compliant. On the other hand we have no disk
1841       # templates with more than one secondary so that situation is not well
1842       # supported either.
1843       # FIXME: does not support file-backed instances
1844       if not inst_config.secondary_nodes:
1845         i_non_redundant.append(instance)
1846       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1847                instance, "instance has multiple secondary nodes: %s",
1848                utils.CommaJoin(inst_config.secondary_nodes),
1849                code=self.ETYPE_WARNING)
1850
1851       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1852         i_non_a_balanced.append(instance)
1853
1854       for snode in inst_config.secondary_nodes:
1855         s_img = node_image[snode]
1856         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1857                  "instance %s, connection to secondary node failed", instance)
1858
1859         if s_img.offline:
1860           inst_nodes_offline.append(snode)
1861
1862       # warn that the instance lives on offline nodes
1863       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1864                "instance lives on offline node(s) %s",
1865                utils.CommaJoin(inst_nodes_offline))
1866       # ... or ghost nodes
1867       for node in inst_config.all_nodes:
1868         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1869                  "instance lives on ghost node %s", node)
1870
1871     feedback_fn("* Verifying orphan volumes")
1872     self._VerifyOrphanVolumes(node_vol_should, node_image)
1873
1874     feedback_fn("* Verifying orphan instances")
1875     self._VerifyOrphanInstances(instancelist, node_image)
1876
1877     if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1878       feedback_fn("* Verifying N+1 Memory redundancy")
1879       self._VerifyNPlusOneMemory(node_image, instanceinfo)
1880
1881     feedback_fn("* Other Notes")
1882     if i_non_redundant:
1883       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1884                   % len(i_non_redundant))
1885
1886     if i_non_a_balanced:
1887       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1888                   % len(i_non_a_balanced))
1889
1890     if n_offline:
1891       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
1892
1893     if n_drained:
1894       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
1895
1896     return not self.bad
1897
1898   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1899     """Analyze the post-hooks' result
1900
1901     This method analyses the hook result, handles it, and sends some
1902     nicely-formatted feedback back to the user.
1903
1904     @param phase: one of L{constants.HOOKS_PHASE_POST} or
1905         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1906     @param hooks_results: the results of the multi-node hooks rpc call
1907     @param feedback_fn: function used send feedback back to the caller
1908     @param lu_result: previous Exec result
1909     @return: the new Exec result, based on the previous result
1910         and hook results
1911
1912     """
1913     # We only really run POST phase hooks, and are only interested in
1914     # their results
1915     if phase == constants.HOOKS_PHASE_POST:
1916       # Used to change hooks' output to proper indentation
1917       indent_re = re.compile('^', re.M)
1918       feedback_fn("* Hooks Results")
1919       assert hooks_results, "invalid result from hooks"
1920
1921       for node_name in hooks_results:
1922         res = hooks_results[node_name]
1923         msg = res.fail_msg
1924         test = msg and not res.offline
1925         self._ErrorIf(test, self.ENODEHOOKS, node_name,
1926                       "Communication failure in hooks execution: %s", msg)
1927         if res.offline or msg:
1928           # No need to investigate payload if node is offline or gave an error.
1929           # override manually lu_result here as _ErrorIf only
1930           # overrides self.bad
1931           lu_result = 1
1932           continue
1933         for script, hkr, output in res.payload:
1934           test = hkr == constants.HKR_FAIL
1935           self._ErrorIf(test, self.ENODEHOOKS, node_name,
1936                         "Script %s failed, output:", script)
1937           if test:
1938             output = indent_re.sub('      ', output)
1939             feedback_fn("%s" % output)
1940             lu_result = 0
1941
1942       return lu_result
1943
1944
1945 class LUVerifyDisks(NoHooksLU):
1946   """Verifies the cluster disks status.
1947
1948   """
1949   _OP_REQP = []
1950   REQ_BGL = False
1951
1952   def ExpandNames(self):
1953     self.needed_locks = {
1954       locking.LEVEL_NODE: locking.ALL_SET,
1955       locking.LEVEL_INSTANCE: locking.ALL_SET,
1956     }
1957     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1958
1959   def CheckPrereq(self):
1960     """Check prerequisites.
1961
1962     This has no prerequisites.
1963
1964     """
1965     pass
1966
1967   def Exec(self, feedback_fn):
1968     """Verify integrity of cluster disks.
1969
1970     @rtype: tuple of three items
1971     @return: a tuple of (dict of node-to-node_error, list of instances
1972         which need activate-disks, dict of instance: (node, volume) for
1973         missing volumes
1974
1975     """
1976     result = res_nodes, res_instances, res_missing = {}, [], {}
1977
1978     vg_name = self.cfg.GetVGName()
1979     nodes = utils.NiceSort(self.cfg.GetNodeList())
1980     instances = [self.cfg.GetInstanceInfo(name)
1981                  for name in self.cfg.GetInstanceList()]
1982
1983     nv_dict = {}
1984     for inst in instances:
1985       inst_lvs = {}
1986       if (not inst.admin_up or
1987           inst.disk_template not in constants.DTS_NET_MIRROR):
1988         continue
1989       inst.MapLVsByNode(inst_lvs)
1990       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1991       for node, vol_list in inst_lvs.iteritems():
1992         for vol in vol_list:
1993           nv_dict[(node, vol)] = inst
1994
1995     if not nv_dict:
1996       return result
1997
1998     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1999
2000     for node in nodes:
2001       # node_volume
2002       node_res = node_lvs[node]
2003       if node_res.offline:
2004         continue
2005       msg = node_res.fail_msg
2006       if msg:
2007         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2008         res_nodes[node] = msg
2009         continue
2010
2011       lvs = node_res.payload
2012       for lv_name, (_, _, lv_online) in lvs.items():
2013         inst = nv_dict.pop((node, lv_name), None)
2014         if (not lv_online and inst is not None
2015             and inst.name not in res_instances):
2016           res_instances.append(inst.name)
2017
2018     # any leftover items in nv_dict are missing LVs, let's arrange the
2019     # data better
2020     for key, inst in nv_dict.iteritems():
2021       if inst.name not in res_missing:
2022         res_missing[inst.name] = []
2023       res_missing[inst.name].append(key)
2024
2025     return result
2026
2027
2028 class LURepairDiskSizes(NoHooksLU):
2029   """Verifies the cluster disks sizes.
2030
2031   """
2032   _OP_REQP = ["instances"]
2033   REQ_BGL = False
2034
2035   def ExpandNames(self):
2036     if not isinstance(self.op.instances, list):
2037       raise errors.OpPrereqError("Invalid argument type 'instances'",
2038                                  errors.ECODE_INVAL)
2039
2040     if self.op.instances:
2041       self.wanted_names = []
2042       for name in self.op.instances:
2043         full_name = _ExpandInstanceName(self.cfg, name)
2044         self.wanted_names.append(full_name)
2045       self.needed_locks = {
2046         locking.LEVEL_NODE: [],
2047         locking.LEVEL_INSTANCE: self.wanted_names,
2048         }
2049       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2050     else:
2051       self.wanted_names = None
2052       self.needed_locks = {
2053         locking.LEVEL_NODE: locking.ALL_SET,
2054         locking.LEVEL_INSTANCE: locking.ALL_SET,
2055         }
2056     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2057
2058   def DeclareLocks(self, level):
2059     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2060       self._LockInstancesNodes(primary_only=True)
2061
2062   def CheckPrereq(self):
2063     """Check prerequisites.
2064
2065     This only checks the optional instance list against the existing names.
2066
2067     """
2068     if self.wanted_names is None:
2069       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2070
2071     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2072                              in self.wanted_names]
2073
2074   def _EnsureChildSizes(self, disk):
2075     """Ensure children of the disk have the needed disk size.
2076
2077     This is valid mainly for DRBD8 and fixes an issue where the
2078     children have smaller disk size.
2079
2080     @param disk: an L{ganeti.objects.Disk} object
2081
2082     """
2083     if disk.dev_type == constants.LD_DRBD8:
2084       assert disk.children, "Empty children for DRBD8?"
2085       fchild = disk.children[0]
2086       mismatch = fchild.size < disk.size
2087       if mismatch:
2088         self.LogInfo("Child disk has size %d, parent %d, fixing",
2089                      fchild.size, disk.size)
2090         fchild.size = disk.size
2091
2092       # and we recurse on this child only, not on the metadev
2093       return self._EnsureChildSizes(fchild) or mismatch
2094     else:
2095       return False
2096
2097   def Exec(self, feedback_fn):
2098     """Verify the size of cluster disks.
2099
2100     """
2101     # TODO: check child disks too
2102     # TODO: check differences in size between primary/secondary nodes
2103     per_node_disks = {}
2104     for instance in self.wanted_instances:
2105       pnode = instance.primary_node
2106       if pnode not in per_node_disks:
2107         per_node_disks[pnode] = []
2108       for idx, disk in enumerate(instance.disks):
2109         per_node_disks[pnode].append((instance, idx, disk))
2110
2111     changed = []
2112     for node, dskl in per_node_disks.items():
2113       newl = [v[2].Copy() for v in dskl]
2114       for dsk in newl:
2115         self.cfg.SetDiskID(dsk, node)
2116       result = self.rpc.call_blockdev_getsizes(node, newl)
2117       if result.fail_msg:
2118         self.LogWarning("Failure in blockdev_getsizes call to node"
2119                         " %s, ignoring", node)
2120         continue
2121       if len(result.data) != len(dskl):
2122         self.LogWarning("Invalid result from node %s, ignoring node results",
2123                         node)
2124         continue
2125       for ((instance, idx, disk), size) in zip(dskl, result.data):
2126         if size is None:
2127           self.LogWarning("Disk %d of instance %s did not return size"
2128                           " information, ignoring", idx, instance.name)
2129           continue
2130         if not isinstance(size, (int, long)):
2131           self.LogWarning("Disk %d of instance %s did not return valid"
2132                           " size information, ignoring", idx, instance.name)
2133           continue
2134         size = size >> 20
2135         if size != disk.size:
2136           self.LogInfo("Disk %d of instance %s has mismatched size,"
2137                        " correcting: recorded %d, actual %d", idx,
2138                        instance.name, disk.size, size)
2139           disk.size = size
2140           self.cfg.Update(instance, feedback_fn)
2141           changed.append((instance.name, idx, size))
2142         if self._EnsureChildSizes(disk):
2143           self.cfg.Update(instance, feedback_fn)
2144           changed.append((instance.name, idx, disk.size))
2145     return changed
2146
2147
2148 class LURenameCluster(LogicalUnit):
2149   """Rename the cluster.
2150
2151   """
2152   HPATH = "cluster-rename"
2153   HTYPE = constants.HTYPE_CLUSTER
2154   _OP_REQP = ["name"]
2155
2156   def BuildHooksEnv(self):
2157     """Build hooks env.
2158
2159     """
2160     env = {
2161       "OP_TARGET": self.cfg.GetClusterName(),
2162       "NEW_NAME": self.op.name,
2163       }
2164     mn = self.cfg.GetMasterNode()
2165     all_nodes = self.cfg.GetNodeList()
2166     return env, [mn], all_nodes
2167
2168   def CheckPrereq(self):
2169     """Verify that the passed name is a valid one.
2170
2171     """
2172     hostname = utils.GetHostInfo(self.op.name)
2173
2174     new_name = hostname.name
2175     self.ip = new_ip = hostname.ip
2176     old_name = self.cfg.GetClusterName()
2177     old_ip = self.cfg.GetMasterIP()
2178     if new_name == old_name and new_ip == old_ip:
2179       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2180                                  " cluster has changed",
2181                                  errors.ECODE_INVAL)
2182     if new_ip != old_ip:
2183       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2184         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2185                                    " reachable on the network. Aborting." %
2186                                    new_ip, errors.ECODE_NOTUNIQUE)
2187
2188     self.op.name = new_name
2189
2190   def Exec(self, feedback_fn):
2191     """Rename the cluster.
2192
2193     """
2194     clustername = self.op.name
2195     ip = self.ip
2196
2197     # shutdown the master IP
2198     master = self.cfg.GetMasterNode()
2199     result = self.rpc.call_node_stop_master(master, False)
2200     result.Raise("Could not disable the master role")
2201
2202     try:
2203       cluster = self.cfg.GetClusterInfo()
2204       cluster.cluster_name = clustername
2205       cluster.master_ip = ip
2206       self.cfg.Update(cluster, feedback_fn)
2207
2208       # update the known hosts file
2209       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2210       node_list = self.cfg.GetNodeList()
2211       try:
2212         node_list.remove(master)
2213       except ValueError:
2214         pass
2215       result = self.rpc.call_upload_file(node_list,
2216                                          constants.SSH_KNOWN_HOSTS_FILE)
2217       for to_node, to_result in result.iteritems():
2218         msg = to_result.fail_msg
2219         if msg:
2220           msg = ("Copy of file %s to node %s failed: %s" %
2221                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2222           self.proc.LogWarning(msg)
2223
2224     finally:
2225       result = self.rpc.call_node_start_master(master, False, False)
2226       msg = result.fail_msg
2227       if msg:
2228         self.LogWarning("Could not re-enable the master role on"
2229                         " the master, please restart manually: %s", msg)
2230
2231
2232 def _RecursiveCheckIfLVMBased(disk):
2233   """Check if the given disk or its children are lvm-based.
2234
2235   @type disk: L{objects.Disk}
2236   @param disk: the disk to check
2237   @rtype: boolean
2238   @return: boolean indicating whether a LD_LV dev_type was found or not
2239
2240   """
2241   if disk.children:
2242     for chdisk in disk.children:
2243       if _RecursiveCheckIfLVMBased(chdisk):
2244         return True
2245   return disk.dev_type == constants.LD_LV
2246
2247
2248 class LUSetClusterParams(LogicalUnit):
2249   """Change the parameters of the cluster.
2250
2251   """
2252   HPATH = "cluster-modify"
2253   HTYPE = constants.HTYPE_CLUSTER
2254   _OP_REQP = []
2255   REQ_BGL = False
2256
2257   def CheckArguments(self):
2258     """Check parameters
2259
2260     """
2261     for attr in ["candidate_pool_size",
2262                  "uid_pool", "add_uids", "remove_uids"]:
2263       if not hasattr(self.op, attr):
2264         setattr(self.op, attr, None)
2265
2266     if self.op.candidate_pool_size is not None:
2267       try:
2268         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2269       except (ValueError, TypeError), err:
2270         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2271                                    str(err), errors.ECODE_INVAL)
2272       if self.op.candidate_pool_size < 1:
2273         raise errors.OpPrereqError("At least one master candidate needed",
2274                                    errors.ECODE_INVAL)
2275
2276     _CheckBooleanOpField(self.op, "maintain_node_health")
2277
2278     if self.op.uid_pool:
2279       uidpool.CheckUidPool(self.op.uid_pool)
2280
2281     if self.op.add_uids:
2282       uidpool.CheckUidPool(self.op.add_uids)
2283
2284     if self.op.remove_uids:
2285       uidpool.CheckUidPool(self.op.remove_uids)
2286
2287   def ExpandNames(self):
2288     # FIXME: in the future maybe other cluster params won't require checking on
2289     # all nodes to be modified.
2290     self.needed_locks = {
2291       locking.LEVEL_NODE: locking.ALL_SET,
2292     }
2293     self.share_locks[locking.LEVEL_NODE] = 1
2294
2295   def BuildHooksEnv(self):
2296     """Build hooks env.
2297
2298     """
2299     env = {
2300       "OP_TARGET": self.cfg.GetClusterName(),
2301       "NEW_VG_NAME": self.op.vg_name,
2302       }
2303     mn = self.cfg.GetMasterNode()
2304     return env, [mn], [mn]
2305
2306   def CheckPrereq(self):
2307     """Check prerequisites.
2308
2309     This checks whether the given params don't conflict and
2310     if the given volume group is valid.
2311
2312     """
2313     if self.op.vg_name is not None and not self.op.vg_name:
2314       instances = self.cfg.GetAllInstancesInfo().values()
2315       for inst in instances:
2316         for disk in inst.disks:
2317           if _RecursiveCheckIfLVMBased(disk):
2318             raise errors.OpPrereqError("Cannot disable lvm storage while"
2319                                        " lvm-based instances exist",
2320                                        errors.ECODE_INVAL)
2321
2322     node_list = self.acquired_locks[locking.LEVEL_NODE]
2323
2324     # if vg_name not None, checks given volume group on all nodes
2325     if self.op.vg_name:
2326       vglist = self.rpc.call_vg_list(node_list)
2327       for node in node_list:
2328         msg = vglist[node].fail_msg
2329         if msg:
2330           # ignoring down node
2331           self.LogWarning("Error while gathering data on node %s"
2332                           " (ignoring node): %s", node, msg)
2333           continue
2334         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2335                                               self.op.vg_name,
2336                                               constants.MIN_VG_SIZE)
2337         if vgstatus:
2338           raise errors.OpPrereqError("Error on node '%s': %s" %
2339                                      (node, vgstatus), errors.ECODE_ENVIRON)
2340
2341     self.cluster = cluster = self.cfg.GetClusterInfo()
2342     # validate params changes
2343     if self.op.beparams:
2344       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2345       self.new_beparams = objects.FillDict(
2346         cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2347
2348     if self.op.nicparams:
2349       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2350       self.new_nicparams = objects.FillDict(
2351         cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2352       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2353       nic_errors = []
2354
2355       # check all instances for consistency
2356       for instance in self.cfg.GetAllInstancesInfo().values():
2357         for nic_idx, nic in enumerate(instance.nics):
2358           params_copy = copy.deepcopy(nic.nicparams)
2359           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2360
2361           # check parameter syntax
2362           try:
2363             objects.NIC.CheckParameterSyntax(params_filled)
2364           except errors.ConfigurationError, err:
2365             nic_errors.append("Instance %s, nic/%d: %s" %
2366                               (instance.name, nic_idx, err))
2367
2368           # if we're moving instances to routed, check that they have an ip
2369           target_mode = params_filled[constants.NIC_MODE]
2370           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2371             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2372                               (instance.name, nic_idx))
2373       if nic_errors:
2374         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2375                                    "\n".join(nic_errors))
2376
2377     # hypervisor list/parameters
2378     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2379     if self.op.hvparams:
2380       if not isinstance(self.op.hvparams, dict):
2381         raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2382                                    errors.ECODE_INVAL)
2383       for hv_name, hv_dict in self.op.hvparams.items():
2384         if hv_name not in self.new_hvparams:
2385           self.new_hvparams[hv_name] = hv_dict
2386         else:
2387           self.new_hvparams[hv_name].update(hv_dict)
2388
2389     # os hypervisor parameters
2390     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2391     if self.op.os_hvp:
2392       if not isinstance(self.op.os_hvp, dict):
2393         raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2394                                    errors.ECODE_INVAL)
2395       for os_name, hvs in self.op.os_hvp.items():
2396         if not isinstance(hvs, dict):
2397           raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2398                                       " input"), errors.ECODE_INVAL)
2399         if os_name not in self.new_os_hvp:
2400           self.new_os_hvp[os_name] = hvs
2401         else:
2402           for hv_name, hv_dict in hvs.items():
2403             if hv_name not in self.new_os_hvp[os_name]:
2404               self.new_os_hvp[os_name][hv_name] = hv_dict
2405             else:
2406               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2407
2408     # changes to the hypervisor list
2409     if self.op.enabled_hypervisors is not None:
2410       self.hv_list = self.op.enabled_hypervisors
2411       if not self.hv_list:
2412         raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2413                                    " least one member",
2414                                    errors.ECODE_INVAL)
2415       invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2416       if invalid_hvs:
2417         raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2418                                    " entries: %s" %
2419                                    utils.CommaJoin(invalid_hvs),
2420                                    errors.ECODE_INVAL)
2421       for hv in self.hv_list:
2422         # if the hypervisor doesn't already exist in the cluster
2423         # hvparams, we initialize it to empty, and then (in both
2424         # cases) we make sure to fill the defaults, as we might not
2425         # have a complete defaults list if the hypervisor wasn't
2426         # enabled before
2427         if hv not in new_hvp:
2428           new_hvp[hv] = {}
2429         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2430         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2431     else:
2432       self.hv_list = cluster.enabled_hypervisors
2433
2434     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2435       # either the enabled list has changed, or the parameters have, validate
2436       for hv_name, hv_params in self.new_hvparams.items():
2437         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2438             (self.op.enabled_hypervisors and
2439              hv_name in self.op.enabled_hypervisors)):
2440           # either this is a new hypervisor, or its parameters have changed
2441           hv_class = hypervisor.GetHypervisor(hv_name)
2442           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2443           hv_class.CheckParameterSyntax(hv_params)
2444           _CheckHVParams(self, node_list, hv_name, hv_params)
2445
2446     if self.op.os_hvp:
2447       # no need to check any newly-enabled hypervisors, since the
2448       # defaults have already been checked in the above code-block
2449       for os_name, os_hvp in self.new_os_hvp.items():
2450         for hv_name, hv_params in os_hvp.items():
2451           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2452           # we need to fill in the new os_hvp on top of the actual hv_p
2453           cluster_defaults = self.new_hvparams.get(hv_name, {})
2454           new_osp = objects.FillDict(cluster_defaults, hv_params)
2455           hv_class = hypervisor.GetHypervisor(hv_name)
2456           hv_class.CheckParameterSyntax(new_osp)
2457           _CheckHVParams(self, node_list, hv_name, new_osp)
2458
2459
2460   def Exec(self, feedback_fn):
2461     """Change the parameters of the cluster.
2462
2463     """
2464     if self.op.vg_name is not None:
2465       new_volume = self.op.vg_name
2466       if not new_volume:
2467         new_volume = None
2468       if new_volume != self.cfg.GetVGName():
2469         self.cfg.SetVGName(new_volume)
2470       else:
2471         feedback_fn("Cluster LVM configuration already in desired"
2472                     " state, not changing")
2473     if self.op.hvparams:
2474       self.cluster.hvparams = self.new_hvparams
2475     if self.op.os_hvp:
2476       self.cluster.os_hvp = self.new_os_hvp
2477     if self.op.enabled_hypervisors is not None:
2478       self.cluster.hvparams = self.new_hvparams
2479       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2480     if self.op.beparams:
2481       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2482     if self.op.nicparams:
2483       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2484
2485     if self.op.candidate_pool_size is not None:
2486       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2487       # we need to update the pool size here, otherwise the save will fail
2488       _AdjustCandidatePool(self, [])
2489
2490     if self.op.maintain_node_health is not None:
2491       self.cluster.maintain_node_health = self.op.maintain_node_health
2492
2493     if self.op.add_uids is not None:
2494       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2495
2496     if self.op.remove_uids is not None:
2497       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2498
2499     if self.op.uid_pool is not None:
2500       self.cluster.uid_pool = self.op.uid_pool
2501
2502     self.cfg.Update(self.cluster, feedback_fn)
2503
2504
2505 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2506   """Distribute additional files which are part of the cluster configuration.
2507
2508   ConfigWriter takes care of distributing the config and ssconf files, but
2509   there are more files which should be distributed to all nodes. This function
2510   makes sure those are copied.
2511
2512   @param lu: calling logical unit
2513   @param additional_nodes: list of nodes not in the config to distribute to
2514
2515   """
2516   # 1. Gather target nodes
2517   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2518   dist_nodes = lu.cfg.GetOnlineNodeList()
2519   if additional_nodes is not None:
2520     dist_nodes.extend(additional_nodes)
2521   if myself.name in dist_nodes:
2522     dist_nodes.remove(myself.name)
2523
2524   # 2. Gather files to distribute
2525   dist_files = set([constants.ETC_HOSTS,
2526                     constants.SSH_KNOWN_HOSTS_FILE,
2527                     constants.RAPI_CERT_FILE,
2528                     constants.RAPI_USERS_FILE,
2529                     constants.CONFD_HMAC_KEY,
2530                    ])
2531
2532   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2533   for hv_name in enabled_hypervisors:
2534     hv_class = hypervisor.GetHypervisor(hv_name)
2535     dist_files.update(hv_class.GetAncillaryFiles())
2536
2537   # 3. Perform the files upload
2538   for fname in dist_files:
2539     if os.path.exists(fname):
2540       result = lu.rpc.call_upload_file(dist_nodes, fname)
2541       for to_node, to_result in result.items():
2542         msg = to_result.fail_msg
2543         if msg:
2544           msg = ("Copy of file %s to node %s failed: %s" %
2545                  (fname, to_node, msg))
2546           lu.proc.LogWarning(msg)
2547
2548
2549 class LURedistributeConfig(NoHooksLU):
2550   """Force the redistribution of cluster configuration.
2551
2552   This is a very simple LU.
2553
2554   """
2555   _OP_REQP = []
2556   REQ_BGL = False
2557
2558   def ExpandNames(self):
2559     self.needed_locks = {
2560       locking.LEVEL_NODE: locking.ALL_SET,
2561     }
2562     self.share_locks[locking.LEVEL_NODE] = 1
2563
2564   def CheckPrereq(self):
2565     """Check prerequisites.
2566
2567     """
2568
2569   def Exec(self, feedback_fn):
2570     """Redistribute the configuration.
2571
2572     """
2573     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2574     _RedistributeAncillaryFiles(self)
2575
2576
2577 def _WaitForSync(lu, instance, oneshot=False):
2578   """Sleep and poll for an instance's disk to sync.
2579
2580   """
2581   if not instance.disks:
2582     return True
2583
2584   if not oneshot:
2585     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2586
2587   node = instance.primary_node
2588
2589   for dev in instance.disks:
2590     lu.cfg.SetDiskID(dev, node)
2591
2592   # TODO: Convert to utils.Retry
2593
2594   retries = 0
2595   degr_retries = 10 # in seconds, as we sleep 1 second each time
2596   while True:
2597     max_time = 0
2598     done = True
2599     cumul_degraded = False
2600     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2601     msg = rstats.fail_msg
2602     if msg:
2603       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2604       retries += 1
2605       if retries >= 10:
2606         raise errors.RemoteError("Can't contact node %s for mirror data,"
2607                                  " aborting." % node)
2608       time.sleep(6)
2609       continue
2610     rstats = rstats.payload
2611     retries = 0
2612     for i, mstat in enumerate(rstats):
2613       if mstat is None:
2614         lu.LogWarning("Can't compute data for node %s/%s",
2615                            node, instance.disks[i].iv_name)
2616         continue
2617
2618       cumul_degraded = (cumul_degraded or
2619                         (mstat.is_degraded and mstat.sync_percent is None))
2620       if mstat.sync_percent is not None:
2621         done = False
2622         if mstat.estimated_time is not None:
2623           rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2624           max_time = mstat.estimated_time
2625         else:
2626           rem_time = "no time estimate"
2627         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2628                         (instance.disks[i].iv_name, mstat.sync_percent,
2629                          rem_time))
2630
2631     # if we're done but degraded, let's do a few small retries, to
2632     # make sure we see a stable and not transient situation; therefore
2633     # we force restart of the loop
2634     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2635       logging.info("Degraded disks found, %d retries left", degr_retries)
2636       degr_retries -= 1
2637       time.sleep(1)
2638       continue
2639
2640     if done or oneshot:
2641       break
2642
2643     time.sleep(min(60, max_time))
2644
2645   if done:
2646     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2647   return not cumul_degraded
2648
2649
2650 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2651   """Check that mirrors are not degraded.
2652
2653   The ldisk parameter, if True, will change the test from the
2654   is_degraded attribute (which represents overall non-ok status for
2655   the device(s)) to the ldisk (representing the local storage status).
2656
2657   """
2658   lu.cfg.SetDiskID(dev, node)
2659
2660   result = True
2661
2662   if on_primary or dev.AssembleOnSecondary():
2663     rstats = lu.rpc.call_blockdev_find(node, dev)
2664     msg = rstats.fail_msg
2665     if msg:
2666       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2667       result = False
2668     elif not rstats.payload:
2669       lu.LogWarning("Can't find disk on node %s", node)
2670       result = False
2671     else:
2672       if ldisk:
2673         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2674       else:
2675         result = result and not rstats.payload.is_degraded
2676
2677   if dev.children:
2678     for child in dev.children:
2679       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2680
2681   return result
2682
2683
2684 class LUDiagnoseOS(NoHooksLU):
2685   """Logical unit for OS diagnose/query.
2686
2687   """
2688   _OP_REQP = ["output_fields", "names"]
2689   REQ_BGL = False
2690   _FIELDS_STATIC = utils.FieldSet()
2691   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2692   # Fields that need calculation of global os validity
2693   _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2694
2695   def ExpandNames(self):
2696     if self.op.names:
2697       raise errors.OpPrereqError("Selective OS query not supported",
2698                                  errors.ECODE_INVAL)
2699
2700     _CheckOutputFields(static=self._FIELDS_STATIC,
2701                        dynamic=self._FIELDS_DYNAMIC,
2702                        selected=self.op.output_fields)
2703
2704     # Lock all nodes, in shared mode
2705     # Temporary removal of locks, should be reverted later
2706     # TODO: reintroduce locks when they are lighter-weight
2707     self.needed_locks = {}
2708     #self.share_locks[locking.LEVEL_NODE] = 1
2709     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2710
2711   def CheckPrereq(self):
2712     """Check prerequisites.
2713
2714     """
2715
2716   @staticmethod
2717   def _DiagnoseByOS(rlist):
2718     """Remaps a per-node return list into an a per-os per-node dictionary
2719
2720     @param rlist: a map with node names as keys and OS objects as values
2721
2722     @rtype: dict
2723     @return: a dictionary with osnames as keys and as value another map, with
2724         nodes as keys and tuples of (path, status, diagnose) as values, eg::
2725
2726           {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2727                                      (/srv/..., False, "invalid api")],
2728                            "node2": [(/srv/..., True, "")]}
2729           }
2730
2731     """
2732     all_os = {}
2733     # we build here the list of nodes that didn't fail the RPC (at RPC
2734     # level), so that nodes with a non-responding node daemon don't
2735     # make all OSes invalid
2736     good_nodes = [node_name for node_name in rlist
2737                   if not rlist[node_name].fail_msg]
2738     for node_name, nr in rlist.items():
2739       if nr.fail_msg or not nr.payload:
2740         continue
2741       for name, path, status, diagnose, variants in nr.payload:
2742         if name not in all_os:
2743           # build a list of nodes for this os containing empty lists
2744           # for each node in node_list
2745           all_os[name] = {}
2746           for nname in good_nodes:
2747             all_os[name][nname] = []
2748         all_os[name][node_name].append((path, status, diagnose, variants))
2749     return all_os
2750
2751   def Exec(self, feedback_fn):
2752     """Compute the list of OSes.
2753
2754     """
2755     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2756     node_data = self.rpc.call_os_diagnose(valid_nodes)
2757     pol = self._DiagnoseByOS(node_data)
2758     output = []
2759     calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2760     calc_variants = "variants" in self.op.output_fields
2761
2762     for os_name, os_data in pol.items():
2763       row = []
2764       if calc_valid:
2765         valid = True
2766         variants = None
2767         for osl in os_data.values():
2768           valid = valid and osl and osl[0][1]
2769           if not valid:
2770             variants = None
2771             break
2772           if calc_variants:
2773             node_variants = osl[0][3]
2774             if variants is None:
2775               variants = node_variants
2776             else:
2777               variants = [v for v in variants if v in node_variants]
2778
2779       for field in self.op.output_fields:
2780         if field == "name":
2781           val = os_name
2782         elif field == "valid":
2783           val = valid
2784         elif field == "node_status":
2785           # this is just a copy of the dict
2786           val = {}
2787           for node_name, nos_list in os_data.items():
2788             val[node_name] = nos_list
2789         elif field == "variants":
2790           val =  variants
2791         else:
2792           raise errors.ParameterError(field)
2793         row.append(val)
2794       output.append(row)
2795
2796     return output
2797
2798
2799 class LURemoveNode(LogicalUnit):
2800   """Logical unit for removing a node.
2801
2802   """
2803   HPATH = "node-remove"
2804   HTYPE = constants.HTYPE_NODE
2805   _OP_REQP = ["node_name"]
2806
2807   def BuildHooksEnv(self):
2808     """Build hooks env.
2809
2810     This doesn't run on the target node in the pre phase as a failed
2811     node would then be impossible to remove.
2812
2813     """
2814     env = {
2815       "OP_TARGET": self.op.node_name,
2816       "NODE_NAME": self.op.node_name,
2817       }
2818     all_nodes = self.cfg.GetNodeList()
2819     try:
2820       all_nodes.remove(self.op.node_name)
2821     except ValueError:
2822       logging.warning("Node %s which is about to be removed not found"
2823                       " in the all nodes list", self.op.node_name)
2824     return env, all_nodes, all_nodes
2825
2826   def CheckPrereq(self):
2827     """Check prerequisites.
2828
2829     This checks:
2830      - the node exists in the configuration
2831      - it does not have primary or secondary instances
2832      - it's not the master
2833
2834     Any errors are signaled by raising errors.OpPrereqError.
2835
2836     """
2837     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2838     node = self.cfg.GetNodeInfo(self.op.node_name)
2839     assert node is not None
2840
2841     instance_list = self.cfg.GetInstanceList()
2842
2843     masternode = self.cfg.GetMasterNode()
2844     if node.name == masternode:
2845       raise errors.OpPrereqError("Node is the master node,"
2846                                  " you need to failover first.",
2847                                  errors.ECODE_INVAL)
2848
2849     for instance_name in instance_list:
2850       instance = self.cfg.GetInstanceInfo(instance_name)
2851       if node.name in instance.all_nodes:
2852         raise errors.OpPrereqError("Instance %s is still running on the node,"
2853                                    " please remove first." % instance_name,
2854                                    errors.ECODE_INVAL)
2855     self.op.node_name = node.name
2856     self.node = node
2857
2858   def Exec(self, feedback_fn):
2859     """Removes the node from the cluster.
2860
2861     """
2862     node = self.node
2863     logging.info("Stopping the node daemon and removing configs from node %s",
2864                  node.name)
2865
2866     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2867
2868     # Promote nodes to master candidate as needed
2869     _AdjustCandidatePool(self, exceptions=[node.name])
2870     self.context.RemoveNode(node.name)
2871
2872     # Run post hooks on the node before it's removed
2873     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2874     try:
2875       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2876     except:
2877       # pylint: disable-msg=W0702
2878       self.LogWarning("Errors occurred running hooks on %s" % node.name)
2879
2880     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2881     msg = result.fail_msg
2882     if msg:
2883       self.LogWarning("Errors encountered on the remote node while leaving"
2884                       " the cluster: %s", msg)
2885
2886     # Remove node from our /etc/hosts
2887     if self.cfg.GetClusterInfo().modify_etc_hosts:
2888       # FIXME: this should be done via an rpc call to node daemon
2889       utils.RemoveHostFromEtcHosts(node.name)
2890       _RedistributeAncillaryFiles(self)
2891
2892
2893 class LUQueryNodes(NoHooksLU):
2894   """Logical unit for querying nodes.
2895
2896   """
2897   # pylint: disable-msg=W0142
2898   _OP_REQP = ["output_fields", "names", "use_locking"]
2899   REQ_BGL = False
2900
2901   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2902                     "master_candidate", "offline", "drained"]
2903
2904   _FIELDS_DYNAMIC = utils.FieldSet(
2905     "dtotal", "dfree",
2906     "mtotal", "mnode", "mfree",
2907     "bootid",
2908     "ctotal", "cnodes", "csockets",
2909     )
2910
2911   _FIELDS_STATIC = utils.FieldSet(*[
2912     "pinst_cnt", "sinst_cnt",
2913     "pinst_list", "sinst_list",
2914     "pip", "sip", "tags",
2915     "master",
2916     "role"] + _SIMPLE_FIELDS
2917     )
2918
2919   def ExpandNames(self):
2920     _CheckOutputFields(static=self._FIELDS_STATIC,
2921                        dynamic=self._FIELDS_DYNAMIC,
2922                        selected=self.op.output_fields)
2923
2924     self.needed_locks = {}
2925     self.share_locks[locking.LEVEL_NODE] = 1
2926
2927     if self.op.names:
2928       self.wanted = _GetWantedNodes(self, self.op.names)
2929     else:
2930       self.wanted = locking.ALL_SET
2931
2932     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2933     self.do_locking = self.do_node_query and self.op.use_locking
2934     if self.do_locking:
2935       # if we don't request only static fields, we need to lock the nodes
2936       self.needed_locks[locking.LEVEL_NODE] = self.wanted
2937
2938   def CheckPrereq(self):
2939     """Check prerequisites.
2940
2941     """
2942     # The validation of the node list is done in the _GetWantedNodes,
2943     # if non empty, and if empty, there's no validation to do
2944     pass
2945
2946   def Exec(self, feedback_fn):
2947     """Computes the list of nodes and their attributes.
2948
2949     """
2950     all_info = self.cfg.GetAllNodesInfo()
2951     if self.do_locking:
2952       nodenames = self.acquired_locks[locking.LEVEL_NODE]
2953     elif self.wanted != locking.ALL_SET:
2954       nodenames = self.wanted
2955       missing = set(nodenames).difference(all_info.keys())
2956       if missing:
2957         raise errors.OpExecError(
2958           "Some nodes were removed before retrieving their data: %s" % missing)
2959     else:
2960       nodenames = all_info.keys()
2961
2962     nodenames = utils.NiceSort(nodenames)
2963     nodelist = [all_info[name] for name in nodenames]
2964
2965     # begin data gathering
2966
2967     if self.do_node_query:
2968       live_data = {}
2969       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2970                                           self.cfg.GetHypervisorType())
2971       for name in nodenames:
2972         nodeinfo = node_data[name]
2973         if not nodeinfo.fail_msg and nodeinfo.payload:
2974           nodeinfo = nodeinfo.payload
2975           fn = utils.TryConvert
2976           live_data[name] = {
2977             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2978             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2979             "mfree": fn(int, nodeinfo.get('memory_free', None)),
2980             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2981             "dfree": fn(int, nodeinfo.get('vg_free', None)),
2982             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2983             "bootid": nodeinfo.get('bootid', None),
2984             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2985             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2986             }
2987         else:
2988           live_data[name] = {}
2989     else:
2990       live_data = dict.fromkeys(nodenames, {})
2991
2992     node_to_primary = dict([(name, set()) for name in nodenames])
2993     node_to_secondary = dict([(name, set()) for name in nodenames])
2994
2995     inst_fields = frozenset(("pinst_cnt", "pinst_list",
2996                              "sinst_cnt", "sinst_list"))
2997     if inst_fields & frozenset(self.op.output_fields):
2998       inst_data = self.cfg.GetAllInstancesInfo()
2999
3000       for inst in inst_data.values():
3001         if inst.primary_node in node_to_primary:
3002           node_to_primary[inst.primary_node].add(inst.name)
3003         for secnode in inst.secondary_nodes:
3004           if secnode in node_to_secondary:
3005             node_to_secondary[secnode].add(inst.name)
3006
3007     master_node = self.cfg.GetMasterNode()
3008
3009     # end data gathering
3010
3011     output = []
3012     for node in nodelist:
3013       node_output = []
3014       for field in self.op.output_fields:
3015         if field in self._SIMPLE_FIELDS:
3016           val = getattr(node, field)
3017         elif field == "pinst_list":
3018           val = list(node_to_primary[node.name])
3019         elif field == "sinst_list":
3020           val = list(node_to_secondary[node.name])
3021         elif field == "pinst_cnt":
3022           val = len(node_to_primary[node.name])
3023         elif field == "sinst_cnt":
3024           val = len(node_to_secondary[node.name])
3025         elif field == "pip":
3026           val = node.primary_ip
3027         elif field == "sip":
3028           val = node.secondary_ip
3029         elif field == "tags":
3030           val = list(node.GetTags())
3031         elif field == "master":
3032           val = node.name == master_node
3033         elif self._FIELDS_DYNAMIC.Matches(field):
3034           val = live_data[node.name].get(field, None)
3035         elif field == "role":
3036           if node.name == master_node:
3037             val = "M"
3038           elif node.master_candidate:
3039             val = "C"
3040           elif node.drained:
3041             val = "D"
3042           elif node.offline:
3043             val = "O"
3044           else:
3045             val = "R"
3046         else:
3047           raise errors.ParameterError(field)
3048         node_output.append(val)
3049       output.append(node_output)
3050
3051     return output
3052
3053
3054 class LUQueryNodeVolumes(NoHooksLU):
3055   """Logical unit for getting volumes on node(s).
3056
3057   """
3058   _OP_REQP = ["nodes", "output_fields"]
3059   REQ_BGL = False
3060   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3061   _FIELDS_STATIC = utils.FieldSet("node")
3062
3063   def ExpandNames(self):
3064     _CheckOutputFields(static=self._FIELDS_STATIC,
3065                        dynamic=self._FIELDS_DYNAMIC,
3066                        selected=self.op.output_fields)
3067
3068     self.needed_locks = {}
3069     self.share_locks[locking.LEVEL_NODE] = 1
3070     if not self.op.nodes:
3071       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3072     else:
3073       self.needed_locks[locking.LEVEL_NODE] = \
3074         _GetWantedNodes(self, self.op.nodes)
3075
3076   def CheckPrereq(self):
3077     """Check prerequisites.
3078
3079     This checks that the fields required are valid output fields.
3080
3081     """
3082     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3083
3084   def Exec(self, feedback_fn):
3085     """Computes the list of nodes and their attributes.
3086
3087     """
3088     nodenames = self.nodes
3089     volumes = self.rpc.call_node_volumes(nodenames)
3090
3091     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3092              in self.cfg.GetInstanceList()]
3093
3094     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3095
3096     output = []
3097     for node in nodenames:
3098       nresult = volumes[node]
3099       if nresult.offline:
3100         continue
3101       msg = nresult.fail_msg
3102       if msg:
3103         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3104         continue
3105
3106       node_vols = nresult.payload[:]
3107       node_vols.sort(key=lambda vol: vol['dev'])
3108
3109       for vol in node_vols:
3110         node_output = []
3111         for field in self.op.output_fields:
3112           if field == "node":
3113             val = node
3114           elif field == "phys":
3115             val = vol['dev']
3116           elif field == "vg":
3117             val = vol['vg']
3118           elif field == "name":
3119             val = vol['name']
3120           elif field == "size":
3121             val = int(float(vol['size']))
3122           elif field == "instance":
3123             for inst in ilist:
3124               if node not in lv_by_node[inst]:
3125                 continue
3126               if vol['name'] in lv_by_node[inst][node]:
3127                 val = inst.name
3128                 break
3129             else:
3130               val = '-'
3131           else:
3132             raise errors.ParameterError(field)
3133           node_output.append(str(val))
3134
3135         output.append(node_output)
3136
3137     return output
3138
3139
3140 class LUQueryNodeStorage(NoHooksLU):
3141   """Logical unit for getting information on storage units on node(s).
3142
3143   """
3144   _OP_REQP = ["nodes", "storage_type", "output_fields"]
3145   REQ_BGL = False
3146   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3147
3148   def CheckArguments(self):
3149     _CheckStorageType(self.op.storage_type)
3150
3151     _CheckOutputFields(static=self._FIELDS_STATIC,
3152                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3153                        selected=self.op.output_fields)
3154
3155   def ExpandNames(self):
3156     self.needed_locks = {}
3157     self.share_locks[locking.LEVEL_NODE] = 1
3158
3159     if self.op.nodes:
3160       self.needed_locks[locking.LEVEL_NODE] = \
3161         _GetWantedNodes(self, self.op.nodes)
3162     else:
3163       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3164
3165   def CheckPrereq(self):
3166     """Check prerequisites.
3167
3168     This checks that the fields required are valid output fields.
3169
3170     """
3171     self.op.name = getattr(self.op, "name", None)
3172
3173     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3174
3175   def Exec(self, feedback_fn):
3176     """Computes the list of nodes and their attributes.
3177
3178     """
3179     # Always get name to sort by
3180     if constants.SF_NAME in self.op.output_fields:
3181       fields = self.op.output_fields[:]
3182     else:
3183       fields = [constants.SF_NAME] + self.op.output_fields
3184
3185     # Never ask for node or type as it's only known to the LU
3186     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3187       while extra in fields:
3188         fields.remove(extra)
3189
3190     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3191     name_idx = field_idx[constants.SF_NAME]
3192
3193     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3194     data = self.rpc.call_storage_list(self.nodes,
3195                                       self.op.storage_type, st_args,
3196                                       self.op.name, fields)
3197
3198     result = []
3199
3200     for node in utils.NiceSort(self.nodes):
3201       nresult = data[node]
3202       if nresult.offline:
3203         continue
3204
3205       msg = nresult.fail_msg
3206       if msg:
3207         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3208         continue
3209
3210       rows = dict([(row[name_idx], row) for row in nresult.payload])
3211
3212       for name in utils.NiceSort(rows.keys()):
3213         row = rows[name]
3214
3215         out = []
3216
3217         for field in self.op.output_fields:
3218           if field == constants.SF_NODE:
3219             val = node
3220           elif field == constants.SF_TYPE:
3221             val = self.op.storage_type
3222           elif field in field_idx:
3223             val = row[field_idx[field]]
3224           else:
3225             raise errors.ParameterError(field)
3226
3227           out.append(val)
3228
3229         result.append(out)
3230
3231     return result
3232
3233
3234 class LUModifyNodeStorage(NoHooksLU):
3235   """Logical unit for modifying a storage volume on a node.
3236
3237   """
3238   _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3239   REQ_BGL = False
3240
3241   def CheckArguments(self):
3242     self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3243
3244     _CheckStorageType(self.op.storage_type)
3245
3246   def ExpandNames(self):
3247     self.needed_locks = {
3248       locking.LEVEL_NODE: self.op.node_name,
3249       }
3250
3251   def CheckPrereq(self):
3252     """Check prerequisites.
3253
3254     """
3255     storage_type = self.op.storage_type
3256
3257     try:
3258       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3259     except KeyError:
3260       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3261                                  " modified" % storage_type,
3262                                  errors.ECODE_INVAL)
3263
3264     diff = set(self.op.changes.keys()) - modifiable
3265     if diff:
3266       raise errors.OpPrereqError("The following fields can not be modified for"
3267                                  " storage units of type '%s': %r" %
3268                                  (storage_type, list(diff)),
3269                                  errors.ECODE_INVAL)
3270
3271   def Exec(self, feedback_fn):
3272     """Computes the list of nodes and their attributes.
3273
3274     """
3275     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3276     result = self.rpc.call_storage_modify(self.op.node_name,
3277                                           self.op.storage_type, st_args,
3278                                           self.op.name, self.op.changes)
3279     result.Raise("Failed to modify storage unit '%s' on %s" %
3280                  (self.op.name, self.op.node_name))
3281
3282
3283 class LUAddNode(LogicalUnit):
3284   """Logical unit for adding node to the cluster.
3285
3286   """
3287   HPATH = "node-add"
3288   HTYPE = constants.HTYPE_NODE
3289   _OP_REQP = ["node_name"]
3290
3291   def CheckArguments(self):
3292     # validate/normalize the node name
3293     self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3294
3295   def BuildHooksEnv(self):
3296     """Build hooks env.
3297
3298     This will run on all nodes before, and on all nodes + the new node after.
3299
3300     """
3301     env = {
3302       "OP_TARGET": self.op.node_name,
3303       "NODE_NAME": self.op.node_name,
3304       "NODE_PIP": self.op.primary_ip,
3305       "NODE_SIP": self.op.secondary_ip,
3306       }
3307     nodes_0 = self.cfg.GetNodeList()
3308     nodes_1 = nodes_0 + [self.op.node_name, ]
3309     return env, nodes_0, nodes_1
3310
3311   def CheckPrereq(self):
3312     """Check prerequisites.
3313
3314     This checks:
3315      - the new node is not already in the config
3316      - it is resolvable
3317      - its parameters (single/dual homed) matches the cluster
3318
3319     Any errors are signaled by raising errors.OpPrereqError.
3320
3321     """
3322     node_name = self.op.node_name
3323     cfg = self.cfg
3324
3325     dns_data = utils.GetHostInfo(node_name)
3326
3327     node = dns_data.name
3328     primary_ip = self.op.primary_ip = dns_data.ip
3329     secondary_ip = getattr(self.op, "secondary_ip", None)
3330     if secondary_ip is None:
3331       secondary_ip = primary_ip
3332     if not utils.IsValidIP(secondary_ip):
3333       raise errors.OpPrereqError("Invalid secondary IP given",
3334                                  errors.ECODE_INVAL)
3335     self.op.secondary_ip = secondary_ip
3336
3337     node_list = cfg.GetNodeList()
3338     if not self.op.readd and node in node_list:
3339       raise errors.OpPrereqError("Node %s is already in the configuration" %
3340                                  node, errors.ECODE_EXISTS)
3341     elif self.op.readd and node not in node_list:
3342       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3343                                  errors.ECODE_NOENT)
3344
3345     self.changed_primary_ip = False
3346
3347     for existing_node_name in node_list:
3348       existing_node = cfg.GetNodeInfo(existing_node_name)
3349
3350       if self.op.readd and node == existing_node_name:
3351         if existing_node.secondary_ip != secondary_ip:
3352           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3353                                      " address configuration as before",
3354                                      errors.ECODE_INVAL)
3355         if existing_node.primary_ip != primary_ip:
3356           self.changed_primary_ip = True
3357
3358         continue
3359
3360       if (existing_node.primary_ip == primary_ip or
3361           existing_node.secondary_ip == primary_ip or
3362           existing_node.primary_ip == secondary_ip or
3363           existing_node.secondary_ip == secondary_ip):
3364         raise errors.OpPrereqError("New node ip address(es) conflict with"
3365                                    " existing node %s" % existing_node.name,
3366                                    errors.ECODE_NOTUNIQUE)
3367
3368     # check that the type of the node (single versus dual homed) is the
3369     # same as for the master
3370     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3371     master_singlehomed = myself.secondary_ip == myself.primary_ip
3372     newbie_singlehomed = secondary_ip == primary_ip
3373     if master_singlehomed != newbie_singlehomed:
3374       if master_singlehomed:
3375         raise errors.OpPrereqError("The master has no private ip but the"
3376                                    " new node has one",
3377                                    errors.ECODE_INVAL)
3378       else:
3379         raise errors.OpPrereqError("The master has a private ip but the"
3380                                    " new node doesn't have one",
3381                                    errors.ECODE_INVAL)
3382
3383     # checks reachability
3384     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3385       raise errors.OpPrereqError("Node not reachable by ping",
3386                                  errors.ECODE_ENVIRON)
3387
3388     if not newbie_singlehomed:
3389       # check reachability from my secondary ip to newbie's secondary ip
3390       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3391                            source=myself.secondary_ip):
3392         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3393                                    " based ping to noded port",
3394                                    errors.ECODE_ENVIRON)
3395
3396     if self.op.readd:
3397       exceptions = [node]
3398     else:
3399       exceptions = []
3400
3401     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3402
3403     if self.op.readd:
3404       self.new_node = self.cfg.GetNodeInfo(node)
3405       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3406     else:
3407       self.new_node = objects.Node(name=node,
3408                                    primary_ip=primary_ip,
3409                                    secondary_ip=secondary_ip,
3410                                    master_candidate=self.master_candidate,
3411                                    offline=False, drained=False)
3412
3413   def Exec(self, feedback_fn):
3414     """Adds the new node to the cluster.
3415
3416     """
3417     new_node = self.new_node
3418     node = new_node.name
3419
3420     # for re-adds, reset the offline/drained/master-candidate flags;
3421     # we need to reset here, otherwise offline would prevent RPC calls
3422     # later in the procedure; this also means that if the re-add
3423     # fails, we are left with a non-offlined, broken node
3424     if self.op.readd:
3425       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3426       self.LogInfo("Readding a node, the offline/drained flags were reset")
3427       # if we demote the node, we do cleanup later in the procedure
3428       new_node.master_candidate = self.master_candidate
3429       if self.changed_primary_ip:
3430         new_node.primary_ip = self.op.primary_ip
3431
3432     # notify the user about any possible mc promotion
3433     if new_node.master_candidate:
3434       self.LogInfo("Node will be a master candidate")
3435
3436     # check connectivity
3437     result = self.rpc.call_version([node])[node]
3438     result.Raise("Can't get version information from node %s" % node)
3439     if constants.PROTOCOL_VERSION == result.payload:
3440       logging.info("Communication to node %s fine, sw version %s match",
3441                    node, result.payload)
3442     else:
3443       raise errors.OpExecError("Version mismatch master version %s,"
3444                                " node version %s" %
3445                                (constants.PROTOCOL_VERSION, result.payload))
3446
3447     # setup ssh on node
3448     if self.cfg.GetClusterInfo().modify_ssh_setup:
3449       logging.info("Copy ssh key to node %s", node)
3450       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3451       keyarray = []
3452       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3453                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3454                   priv_key, pub_key]
3455
3456       for i in keyfiles:
3457         keyarray.append(utils.ReadFile(i))
3458
3459       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3460                                       keyarray[2], keyarray[3], keyarray[4],
3461                                       keyarray[5])
3462       result.Raise("Cannot transfer ssh keys to the new node")
3463
3464     # Add node to our /etc/hosts, and add key to known_hosts
3465     if self.cfg.GetClusterInfo().modify_etc_hosts:
3466       # FIXME: this should be done via an rpc call to node daemon
3467       utils.AddHostToEtcHosts(new_node.name)
3468
3469     if new_node.secondary_ip != new_node.primary_ip:
3470       result = self.rpc.call_node_has_ip_address(new_node.name,
3471                                                  new_node.secondary_ip)
3472       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3473                    prereq=True, ecode=errors.ECODE_ENVIRON)
3474       if not result.payload:
3475         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3476                                  " you gave (%s). Please fix and re-run this"
3477                                  " command." % new_node.secondary_ip)
3478
3479     node_verify_list = [self.cfg.GetMasterNode()]
3480     node_verify_param = {
3481       constants.NV_NODELIST: [node],
3482       # TODO: do a node-net-test as well?
3483     }
3484
3485     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3486                                        self.cfg.GetClusterName())
3487     for verifier in node_verify_list:
3488       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3489       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3490       if nl_payload:
3491         for failed in nl_payload:
3492           feedback_fn("ssh/hostname verification failed"
3493                       " (checking from %s): %s" %
3494                       (verifier, nl_payload[failed]))
3495         raise errors.OpExecError("ssh/hostname verification failed.")
3496
3497     if self.op.readd:
3498       _RedistributeAncillaryFiles(self)
3499       self.context.ReaddNode(new_node)
3500       # make sure we redistribute the config
3501       self.cfg.Update(new_node, feedback_fn)
3502       # and make sure the new node will not have old files around
3503       if not new_node.master_candidate:
3504         result = self.rpc.call_node_demote_from_mc(new_node.name)
3505         msg = result.fail_msg
3506         if msg:
3507           self.LogWarning("Node failed to demote itself from master"
3508                           " candidate status: %s" % msg)
3509     else:
3510       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3511       self.context.AddNode(new_node, self.proc.GetECId())
3512
3513
3514 class LUSetNodeParams(LogicalUnit):
3515   """Modifies the parameters of a node.
3516
3517   """
3518   HPATH = "node-modify"
3519   HTYPE = constants.HTYPE_NODE
3520   _OP_REQP = ["node_name"]
3521   REQ_BGL = False
3522
3523   def CheckArguments(self):
3524     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3525     _CheckBooleanOpField(self.op, 'master_candidate')
3526     _CheckBooleanOpField(self.op, 'offline')
3527     _CheckBooleanOpField(self.op, 'drained')
3528     _CheckBooleanOpField(self.op, 'auto_promote')
3529     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3530     if all_mods.count(None) == 3:
3531       raise errors.OpPrereqError("Please pass at least one modification",
3532                                  errors.ECODE_INVAL)
3533     if all_mods.count(True) > 1:
3534       raise errors.OpPrereqError("Can't set the node into more than one"
3535                                  " state at the same time",
3536                                  errors.ECODE_INVAL)
3537
3538     # Boolean value that tells us whether we're offlining or draining the node
3539     self.offline_or_drain = (self.op.offline == True or
3540                              self.op.drained == True)
3541     self.deoffline_or_drain = (self.op.offline == False or
3542                                self.op.drained == False)
3543     self.might_demote = (self.op.master_candidate == False or
3544                          self.offline_or_drain)
3545
3546     self.lock_all = self.op.auto_promote and self.might_demote
3547
3548
3549   def ExpandNames(self):
3550     if self.lock_all:
3551       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3552     else:
3553       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3554
3555   def BuildHooksEnv(self):
3556     """Build hooks env.
3557
3558     This runs on the master node.
3559
3560     """
3561     env = {
3562       "OP_TARGET": self.op.node_name,
3563       "MASTER_CANDIDATE": str(self.op.master_candidate),
3564       "OFFLINE": str(self.op.offline),
3565       "DRAINED": str(self.op.drained),
3566       }
3567     nl = [self.cfg.GetMasterNode(),
3568           self.op.node_name]
3569     return env, nl, nl
3570
3571   def CheckPrereq(self):
3572     """Check prerequisites.
3573
3574     This only checks the instance list against the existing names.
3575
3576     """
3577     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3578
3579     if (self.op.master_candidate is not None or
3580         self.op.drained is not None or
3581         self.op.offline is not None):
3582       # we can't change the master's node flags
3583       if self.op.node_name == self.cfg.GetMasterNode():
3584         raise errors.OpPrereqError("The master role can be changed"
3585                                    " only via masterfailover",
3586                                    errors.ECODE_INVAL)
3587
3588
3589     if node.master_candidate and self.might_demote and not self.lock_all:
3590       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3591       # check if after removing the current node, we're missing master
3592       # candidates
3593       (mc_remaining, mc_should, _) = \
3594           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3595       if mc_remaining < mc_should:
3596         raise errors.OpPrereqError("Not enough master candidates, please"
3597                                    " pass auto_promote to allow promotion",
3598                                    errors.ECODE_INVAL)
3599
3600     if (self.op.master_candidate == True and
3601         ((node.offline and not self.op.offline == False) or
3602          (node.drained and not self.op.drained == False))):
3603       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3604                                  " to master_candidate" % node.name,
3605                                  errors.ECODE_INVAL)
3606
3607     # If we're being deofflined/drained, we'll MC ourself if needed
3608     if (self.deoffline_or_drain and not self.offline_or_drain and not
3609         self.op.master_candidate == True and not node.master_candidate):
3610       self.op.master_candidate = _DecideSelfPromotion(self)
3611       if self.op.master_candidate:
3612         self.LogInfo("Autopromoting node to master candidate")
3613
3614     return
3615
3616   def Exec(self, feedback_fn):
3617     """Modifies a node.
3618
3619     """
3620     node = self.node
3621
3622     result = []
3623     changed_mc = False
3624
3625     if self.op.offline is not None:
3626       node.offline = self.op.offline
3627       result.append(("offline", str(self.op.offline)))
3628       if self.op.offline == True:
3629         if node.master_candidate:
3630           node.master_candidate = False
3631           changed_mc = True
3632           result.append(("master_candidate", "auto-demotion due to offline"))
3633         if node.drained:
3634           node.drained = False
3635           result.append(("drained", "clear drained status due to offline"))
3636
3637     if self.op.master_candidate is not None:
3638       node.master_candidate = self.op.master_candidate
3639       changed_mc = True
3640       result.append(("master_candidate", str(self.op.master_candidate)))
3641       if self.op.master_candidate == False:
3642         rrc = self.rpc.call_node_demote_from_mc(node.name)
3643         msg = rrc.fail_msg
3644         if msg:
3645           self.LogWarning("Node failed to demote itself: %s" % msg)
3646
3647     if self.op.drained is not None:
3648       node.drained = self.op.drained
3649       result.append(("drained", str(self.op.drained)))
3650       if self.op.drained == True:
3651         if node.master_candidate:
3652           node.master_candidate = False
3653           changed_mc = True
3654           result.append(("master_candidate", "auto-demotion due to drain"))
3655           rrc = self.rpc.call_node_demote_from_mc(node.name)
3656           msg = rrc.fail_msg
3657           if msg:
3658             self.LogWarning("Node failed to demote itself: %s" % msg)
3659         if node.offline:
3660           node.offline = False
3661           result.append(("offline", "clear offline status due to drain"))
3662
3663     # we locked all nodes, we adjust the CP before updating this node
3664     if self.lock_all:
3665       _AdjustCandidatePool(self, [node.name])
3666
3667     # this will trigger configuration file update, if needed
3668     self.cfg.Update(node, feedback_fn)
3669
3670     # this will trigger job queue propagation or cleanup
3671     if changed_mc:
3672       self.context.ReaddNode(node)
3673
3674     return result
3675
3676
3677 class LUPowercycleNode(NoHooksLU):
3678   """Powercycles a node.
3679
3680   """
3681   _OP_REQP = ["node_name", "force"]
3682   REQ_BGL = False
3683
3684   def CheckArguments(self):
3685     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3686     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3687       raise errors.OpPrereqError("The node is the master and the force"
3688                                  " parameter was not set",
3689                                  errors.ECODE_INVAL)
3690
3691   def ExpandNames(self):
3692     """Locking for PowercycleNode.
3693
3694     This is a last-resort option and shouldn't block on other
3695     jobs. Therefore, we grab no locks.
3696
3697     """
3698     self.needed_locks = {}
3699
3700   def CheckPrereq(self):
3701     """Check prerequisites.
3702
3703     This LU has no prereqs.
3704
3705     """
3706     pass
3707
3708   def Exec(self, feedback_fn):
3709     """Reboots a node.
3710
3711     """
3712     result = self.rpc.call_node_powercycle(self.op.node_name,
3713                                            self.cfg.GetHypervisorType())
3714     result.Raise("Failed to schedule the reboot")
3715     return result.payload
3716
3717
3718 class LUQueryClusterInfo(NoHooksLU):
3719   """Query cluster configuration.
3720
3721   """
3722   _OP_REQP = []
3723   REQ_BGL = False
3724
3725   def ExpandNames(self):
3726     self.needed_locks = {}
3727
3728   def CheckPrereq(self):
3729     """No prerequsites needed for this LU.
3730
3731     """
3732     pass
3733
3734   def Exec(self, feedback_fn):
3735     """Return cluster config.
3736
3737     """
3738     cluster = self.cfg.GetClusterInfo()
3739     os_hvp = {}
3740
3741     # Filter just for enabled hypervisors
3742     for os_name, hv_dict in cluster.os_hvp.items():
3743       os_hvp[os_name] = {}
3744       for hv_name, hv_params in hv_dict.items():
3745         if hv_name in cluster.enabled_hypervisors:
3746           os_hvp[os_name][hv_name] = hv_params
3747
3748     result = {
3749       "software_version": constants.RELEASE_VERSION,
3750       "protocol_version": constants.PROTOCOL_VERSION,
3751       "config_version": constants.CONFIG_VERSION,
3752       "os_api_version": max(constants.OS_API_VERSIONS),
3753       "export_version": constants.EXPORT_VERSION,
3754       "architecture": (platform.architecture()[0], platform.machine()),
3755       "name": cluster.cluster_name,
3756       "master": cluster.master_node,
3757       "default_hypervisor": cluster.enabled_hypervisors[0],
3758       "enabled_hypervisors": cluster.enabled_hypervisors,
3759       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3760                         for hypervisor_name in cluster.enabled_hypervisors]),
3761       "os_hvp": os_hvp,
3762       "beparams": cluster.beparams,
3763       "nicparams": cluster.nicparams,
3764       "candidate_pool_size": cluster.candidate_pool_size,
3765       "master_netdev": cluster.master_netdev,
3766       "volume_group_name": cluster.volume_group_name,
3767       "file_storage_dir": cluster.file_storage_dir,
3768       "maintain_node_health": cluster.maintain_node_health,
3769       "ctime": cluster.ctime,
3770       "mtime": cluster.mtime,
3771       "uuid": cluster.uuid,
3772       "tags": list(cluster.GetTags()),
3773       "uid_pool": cluster.uid_pool,
3774       }
3775
3776     return result
3777
3778
3779 class LUQueryConfigValues(NoHooksLU):
3780   """Return configuration values.
3781
3782   """
3783   _OP_REQP = []
3784   REQ_BGL = False
3785   _FIELDS_DYNAMIC = utils.FieldSet()
3786   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3787                                   "watcher_pause")
3788
3789   def ExpandNames(self):
3790     self.needed_locks = {}
3791
3792     _CheckOutputFields(static=self._FIELDS_STATIC,
3793                        dynamic=self._FIELDS_DYNAMIC,
3794                        selected=self.op.output_fields)
3795
3796   def CheckPrereq(self):
3797     """No prerequisites.
3798
3799     """
3800     pass
3801
3802   def Exec(self, feedback_fn):
3803     """Dump a representation of the cluster config to the standard output.
3804
3805     """
3806     values = []
3807     for field in self.op.output_fields:
3808       if field == "cluster_name":
3809         entry = self.cfg.GetClusterName()
3810       elif field == "master_node":
3811         entry = self.cfg.GetMasterNode()
3812       elif field == "drain_flag":
3813         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3814       elif field == "watcher_pause":
3815         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3816       else:
3817         raise errors.ParameterError(field)
3818       values.append(entry)
3819     return values
3820
3821
3822 class LUActivateInstanceDisks(NoHooksLU):
3823   """Bring up an instance's disks.
3824
3825   """
3826   _OP_REQP = ["instance_name"]
3827   REQ_BGL = False
3828
3829   def ExpandNames(self):
3830     self._ExpandAndLockInstance()
3831     self.needed_locks[locking.LEVEL_NODE] = []
3832     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3833
3834   def DeclareLocks(self, level):
3835     if level == locking.LEVEL_NODE:
3836       self._LockInstancesNodes()
3837
3838   def CheckPrereq(self):
3839     """Check prerequisites.
3840
3841     This checks that the instance is in the cluster.
3842
3843     """
3844     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3845     assert self.instance is not None, \
3846       "Cannot retrieve locked instance %s" % self.op.instance_name
3847     _CheckNodeOnline(self, self.instance.primary_node)
3848     if not hasattr(self.op, "ignore_size"):
3849       self.op.ignore_size = False
3850
3851   def Exec(self, feedback_fn):
3852     """Activate the disks.
3853
3854     """
3855     disks_ok, disks_info = \
3856               _AssembleInstanceDisks(self, self.instance,
3857                                      ignore_size=self.op.ignore_size)
3858     if not disks_ok:
3859       raise errors.OpExecError("Cannot activate block devices")
3860
3861     return disks_info
3862
3863
3864 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3865                            ignore_size=False):
3866   """Prepare the block devices for an instance.
3867
3868   This sets up the block devices on all nodes.
3869
3870   @type lu: L{LogicalUnit}
3871   @param lu: the logical unit on whose behalf we execute
3872   @type instance: L{objects.Instance}
3873   @param instance: the instance for whose disks we assemble
3874   @type ignore_secondaries: boolean
3875   @param ignore_secondaries: if true, errors on secondary nodes
3876       won't result in an error return from the function
3877   @type ignore_size: boolean
3878   @param ignore_size: if true, the current known size of the disk
3879       will not be used during the disk activation, useful for cases
3880       when the size is wrong
3881   @return: False if the operation failed, otherwise a list of
3882       (host, instance_visible_name, node_visible_name)
3883       with the mapping from node devices to instance devices
3884
3885   """
3886   device_info = []
3887   disks_ok = True
3888   iname = instance.name
3889   # With the two passes mechanism we try to reduce the window of
3890   # opportunity for the race condition of switching DRBD to primary
3891   # before handshaking occured, but we do not eliminate it
3892
3893   # The proper fix would be to wait (with some limits) until the
3894   # connection has been made and drbd transitions from WFConnection
3895   # into any other network-connected state (Connected, SyncTarget,
3896   # SyncSource, etc.)
3897
3898   # 1st pass, assemble on all nodes in secondary mode
3899   for inst_disk in instance.disks:
3900     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3901       if ignore_size:
3902         node_disk = node_disk.Copy()
3903         node_disk.UnsetSize()
3904       lu.cfg.SetDiskID(node_disk, node)
3905       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3906       msg = result.fail_msg
3907       if msg:
3908         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3909                            " (is_primary=False, pass=1): %s",
3910                            inst_disk.iv_name, node, msg)
3911         if not ignore_secondaries:
3912           disks_ok = False
3913
3914   # FIXME: race condition on drbd migration to primary
3915
3916   # 2nd pass, do only the primary node
3917   for inst_disk in instance.disks:
3918     dev_path = None
3919
3920     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3921       if node != instance.primary_node:
3922         continue
3923       if ignore_size:
3924         node_disk = node_disk.Copy()
3925         node_disk.UnsetSize()
3926       lu.cfg.SetDiskID(node_disk, node)
3927       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3928       msg = result.fail_msg
3929       if msg:
3930         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3931                            " (is_primary=True, pass=2): %s",
3932                            inst_disk.iv_name, node, msg)
3933         disks_ok = False
3934       else:
3935         dev_path = result.payload
3936
3937     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3938
3939   # leave the disks configured for the primary node
3940   # this is a workaround that would be fixed better by
3941   # improving the logical/physical id handling
3942   for disk in instance.disks:
3943     lu.cfg.SetDiskID(disk, instance.primary_node)
3944
3945   return disks_ok, device_info
3946
3947
3948 def _StartInstanceDisks(lu, instance, force):
3949   """Start the disks of an instance.
3950
3951   """
3952   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3953                                            ignore_secondaries=force)
3954   if not disks_ok:
3955     _ShutdownInstanceDisks(lu, instance)
3956     if force is not None and not force:
3957       lu.proc.LogWarning("", hint="If the message above refers to a"
3958                          " secondary node,"
3959                          " you can retry the operation using '--force'.")
3960     raise errors.OpExecError("Disk consistency error")
3961
3962
3963 class LUDeactivateInstanceDisks(NoHooksLU):
3964   """Shutdown an instance's disks.
3965
3966   """
3967   _OP_REQP = ["instance_name"]
3968   REQ_BGL = False
3969
3970   def ExpandNames(self):
3971     self._ExpandAndLockInstance()
3972     self.needed_locks[locking.LEVEL_NODE] = []
3973     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3974
3975   def DeclareLocks(self, level):
3976     if level == locking.LEVEL_NODE:
3977       self._LockInstancesNodes()
3978
3979   def CheckPrereq(self):
3980     """Check prerequisites.
3981
3982     This checks that the instance is in the cluster.
3983
3984     """
3985     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3986     assert self.instance is not None, \
3987       "Cannot retrieve locked instance %s" % self.op.instance_name
3988
3989   def Exec(self, feedback_fn):
3990     """Deactivate the disks
3991
3992     """
3993     instance = self.instance
3994     _SafeShutdownInstanceDisks(self, instance)
3995
3996
3997 def _SafeShutdownInstanceDisks(lu, instance):
3998   """Shutdown block devices of an instance.
3999
4000   This function checks if an instance is running, before calling
4001   _ShutdownInstanceDisks.
4002
4003   """
4004   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4005   _ShutdownInstanceDisks(lu, instance)
4006
4007
4008 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
4009   """Shutdown block devices of an instance.
4010
4011   This does the shutdown on all nodes of the instance.
4012
4013   If the ignore_primary is false, errors on the primary node are
4014   ignored.
4015
4016   """
4017   all_result = True
4018   for disk in instance.disks:
4019     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4020       lu.cfg.SetDiskID(top_disk, node)
4021       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4022       msg = result.fail_msg
4023       if msg:
4024         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4025                       disk.iv_name, node, msg)
4026         if not ignore_primary or node != instance.primary_node:
4027           all_result = False
4028   return all_result
4029
4030
4031 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4032   """Checks if a node has enough free memory.
4033
4034   This function check if a given node has the needed amount of free
4035   memory. In case the node has less memory or we cannot get the
4036   information from the node, this function raise an OpPrereqError
4037   exception.
4038
4039   @type lu: C{LogicalUnit}
4040   @param lu: a logical unit from which we get configuration data
4041   @type node: C{str}
4042   @param node: the node to check
4043   @type reason: C{str}
4044   @param reason: string to use in the error message
4045   @type requested: C{int}
4046   @param requested: the amount of memory in MiB to check for
4047   @type hypervisor_name: C{str}
4048   @param hypervisor_name: the hypervisor to ask for memory stats
4049   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4050       we cannot check the node
4051
4052   """
4053   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4054   nodeinfo[node].Raise("Can't get data from node %s" % node,
4055                        prereq=True, ecode=errors.ECODE_ENVIRON)
4056   free_mem = nodeinfo[node].payload.get('memory_free', None)
4057   if not isinstance(free_mem, int):
4058     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4059                                " was '%s'" % (node, free_mem),
4060                                errors.ECODE_ENVIRON)
4061   if requested > free_mem:
4062     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4063                                " needed %s MiB, available %s MiB" %
4064                                (node, reason, requested, free_mem),
4065                                errors.ECODE_NORES)
4066
4067
4068 def _CheckNodesFreeDisk(lu, nodenames, requested):
4069   """Checks if nodes have enough free disk space in the default VG.
4070
4071   This function check if all given nodes have the needed amount of
4072   free disk. In case any node has less disk or we cannot get the
4073   information from the node, this function raise an OpPrereqError
4074   exception.
4075
4076   @type lu: C{LogicalUnit}
4077   @param lu: a logical unit from which we get configuration data
4078   @type nodenames: C{list}
4079   @param nodenames: the list of node names to check
4080   @type requested: C{int}
4081   @param requested: the amount of disk in MiB to check for
4082   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4083       we cannot check the node
4084
4085   """
4086   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4087                                    lu.cfg.GetHypervisorType())
4088   for node in nodenames:
4089     info = nodeinfo[node]
4090     info.Raise("Cannot get current information from node %s" % node,
4091                prereq=True, ecode=errors.ECODE_ENVIRON)
4092     vg_free = info.payload.get("vg_free", None)
4093     if not isinstance(vg_free, int):
4094       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4095                                  " result was '%s'" % (node, vg_free),
4096                                  errors.ECODE_ENVIRON)
4097     if requested > vg_free:
4098       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4099                                  " required %d MiB, available %d MiB" %
4100                                  (node, requested, vg_free),
4101                                  errors.ECODE_NORES)
4102
4103
4104 class LUStartupInstance(LogicalUnit):
4105   """Starts an instance.
4106
4107   """
4108   HPATH = "instance-start"
4109   HTYPE = constants.HTYPE_INSTANCE
4110   _OP_REQP = ["instance_name", "force"]
4111   REQ_BGL = False
4112
4113   def ExpandNames(self):
4114     self._ExpandAndLockInstance()
4115
4116   def BuildHooksEnv(self):
4117     """Build hooks env.
4118
4119     This runs on master, primary and secondary nodes of the instance.
4120
4121     """
4122     env = {
4123       "FORCE": self.op.force,
4124       }
4125     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4126     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4127     return env, nl, nl
4128
4129   def CheckPrereq(self):
4130     """Check prerequisites.
4131
4132     This checks that the instance is in the cluster.
4133
4134     """
4135     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4136     assert self.instance is not None, \
4137       "Cannot retrieve locked instance %s" % self.op.instance_name
4138
4139     # extra beparams
4140     self.beparams = getattr(self.op, "beparams", {})
4141     if self.beparams:
4142       if not isinstance(self.beparams, dict):
4143         raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4144                                    " dict" % (type(self.beparams), ),
4145                                    errors.ECODE_INVAL)
4146       # fill the beparams dict
4147       utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4148       self.op.beparams = self.beparams
4149
4150     # extra hvparams
4151     self.hvparams = getattr(self.op, "hvparams", {})
4152     if self.hvparams:
4153       if not isinstance(self.hvparams, dict):
4154         raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4155                                    " dict" % (type(self.hvparams), ),
4156                                    errors.ECODE_INVAL)
4157
4158       # check hypervisor parameter syntax (locally)
4159       cluster = self.cfg.GetClusterInfo()
4160       utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4161       filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4162                                     instance.hvparams)
4163       filled_hvp.update(self.hvparams)
4164       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4165       hv_type.CheckParameterSyntax(filled_hvp)
4166       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4167       self.op.hvparams = self.hvparams
4168
4169     _CheckNodeOnline(self, instance.primary_node)
4170
4171     bep = self.cfg.GetClusterInfo().FillBE(instance)
4172     # check bridges existence
4173     _CheckInstanceBridgesExist(self, instance)
4174
4175     remote_info = self.rpc.call_instance_info(instance.primary_node,
4176                                               instance.name,
4177                                               instance.hypervisor)
4178     remote_info.Raise("Error checking node %s" % instance.primary_node,
4179                       prereq=True, ecode=errors.ECODE_ENVIRON)
4180     if not remote_info.payload: # not running already
4181       _CheckNodeFreeMemory(self, instance.primary_node,
4182                            "starting instance %s" % instance.name,
4183                            bep[constants.BE_MEMORY], instance.hypervisor)
4184
4185   def Exec(self, feedback_fn):
4186     """Start the instance.
4187
4188     """
4189     instance = self.instance
4190     force = self.op.force
4191
4192     self.cfg.MarkInstanceUp(instance.name)
4193
4194     node_current = instance.primary_node
4195
4196     _StartInstanceDisks(self, instance, force)
4197
4198     result = self.rpc.call_instance_start(node_current, instance,
4199                                           self.hvparams, self.beparams)
4200     msg = result.fail_msg
4201     if msg:
4202       _ShutdownInstanceDisks(self, instance)
4203       raise errors.OpExecError("Could not start instance: %s" % msg)
4204
4205
4206 class LURebootInstance(LogicalUnit):
4207   """Reboot an instance.
4208
4209   """
4210   HPATH = "instance-reboot"
4211   HTYPE = constants.HTYPE_INSTANCE
4212   _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4213   REQ_BGL = False
4214
4215   def CheckArguments(self):
4216     """Check the arguments.
4217
4218     """
4219     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4220                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4221
4222   def ExpandNames(self):
4223     if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4224                                    constants.INSTANCE_REBOOT_HARD,
4225                                    constants.INSTANCE_REBOOT_FULL]:
4226       raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4227                                   (constants.INSTANCE_REBOOT_SOFT,
4228                                    constants.INSTANCE_REBOOT_HARD,
4229                                    constants.INSTANCE_REBOOT_FULL))
4230     self._ExpandAndLockInstance()
4231
4232   def BuildHooksEnv(self):
4233     """Build hooks env.
4234
4235     This runs on master, primary and secondary nodes of the instance.
4236
4237     """
4238     env = {
4239       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4240       "REBOOT_TYPE": self.op.reboot_type,
4241       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4242       }
4243     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4244     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4245     return env, nl, nl
4246
4247   def CheckPrereq(self):
4248     """Check prerequisites.
4249
4250     This checks that the instance is in the cluster.
4251
4252     """
4253     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4254     assert self.instance is not None, \
4255       "Cannot retrieve locked instance %s" % self.op.instance_name
4256
4257     _CheckNodeOnline(self, instance.primary_node)
4258
4259     # check bridges existence
4260     _CheckInstanceBridgesExist(self, instance)
4261
4262   def Exec(self, feedback_fn):
4263     """Reboot the instance.
4264
4265     """
4266     instance = self.instance
4267     ignore_secondaries = self.op.ignore_secondaries
4268     reboot_type = self.op.reboot_type
4269
4270     node_current = instance.primary_node
4271
4272     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4273                        constants.INSTANCE_REBOOT_HARD]:
4274       for disk in instance.disks:
4275         self.cfg.SetDiskID(disk, node_current)
4276       result = self.rpc.call_instance_reboot(node_current, instance,
4277                                              reboot_type,
4278                                              self.shutdown_timeout)
4279       result.Raise("Could not reboot instance")
4280     else:
4281       result = self.rpc.call_instance_shutdown(node_current, instance,
4282                                                self.shutdown_timeout)
4283       result.Raise("Could not shutdown instance for full reboot")
4284       _ShutdownInstanceDisks(self, instance)
4285       _StartInstanceDisks(self, instance, ignore_secondaries)
4286       result = self.rpc.call_instance_start(node_current, instance, None, None)
4287       msg = result.fail_msg
4288       if msg:
4289         _ShutdownInstanceDisks(self, instance)
4290         raise errors.OpExecError("Could not start instance for"
4291                                  " full reboot: %s" % msg)
4292
4293     self.cfg.MarkInstanceUp(instance.name)
4294
4295
4296 class LUShutdownInstance(LogicalUnit):
4297   """Shutdown an instance.
4298
4299   """
4300   HPATH = "instance-stop"
4301   HTYPE = constants.HTYPE_INSTANCE
4302   _OP_REQP = ["instance_name"]
4303   REQ_BGL = False
4304
4305   def CheckArguments(self):
4306     """Check the arguments.
4307
4308     """
4309     self.timeout = getattr(self.op, "timeout",
4310                            constants.DEFAULT_SHUTDOWN_TIMEOUT)
4311
4312   def ExpandNames(self):
4313     self._ExpandAndLockInstance()
4314
4315   def BuildHooksEnv(self):
4316     """Build hooks env.
4317
4318     This runs on master, primary and secondary nodes of the instance.
4319
4320     """
4321     env = _BuildInstanceHookEnvByObject(self, self.instance)
4322     env["TIMEOUT"] = self.timeout
4323     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4324     return env, nl, nl
4325
4326   def CheckPrereq(self):
4327     """Check prerequisites.
4328
4329     This checks that the instance is in the cluster.
4330
4331     """
4332     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4333     assert self.instance is not None, \
4334       "Cannot retrieve locked instance %s" % self.op.instance_name
4335     _CheckNodeOnline(self, self.instance.primary_node)
4336
4337   def Exec(self, feedback_fn):
4338     """Shutdown the instance.
4339
4340     """
4341     instance = self.instance
4342     node_current = instance.primary_node
4343     timeout = self.timeout
4344     self.cfg.MarkInstanceDown(instance.name)
4345     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4346     msg = result.fail_msg
4347     if msg:
4348       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4349
4350     _ShutdownInstanceDisks(self, instance)
4351
4352
4353 class LUReinstallInstance(LogicalUnit):
4354   """Reinstall an instance.
4355
4356   """
4357   HPATH = "instance-reinstall"
4358   HTYPE = constants.HTYPE_INSTANCE
4359   _OP_REQP = ["instance_name"]
4360   REQ_BGL = False
4361
4362   def ExpandNames(self):
4363     self._ExpandAndLockInstance()
4364
4365   def BuildHooksEnv(self):
4366     """Build hooks env.
4367
4368     This runs on master, primary and secondary nodes of the instance.
4369
4370     """
4371     env = _BuildInstanceHookEnvByObject(self, self.instance)
4372     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4373     return env, nl, nl
4374
4375   def CheckPrereq(self):
4376     """Check prerequisites.
4377
4378     This checks that the instance is in the cluster and is not running.
4379
4380     """
4381     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4382     assert instance is not None, \
4383       "Cannot retrieve locked instance %s" % self.op.instance_name
4384     _CheckNodeOnline(self, instance.primary_node)
4385
4386     if instance.disk_template == constants.DT_DISKLESS:
4387       raise errors.OpPrereqError("Instance '%s' has no disks" %
4388                                  self.op.instance_name,
4389                                  errors.ECODE_INVAL)
4390     _CheckInstanceDown(self, instance, "cannot reinstall")
4391
4392     self.op.os_type = getattr(self.op, "os_type", None)
4393     self.op.force_variant = getattr(self.op, "force_variant", False)
4394     if self.op.os_type is not None:
4395       # OS verification
4396       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4397       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4398
4399     self.instance = instance
4400
4401   def Exec(self, feedback_fn):
4402     """Reinstall the instance.
4403
4404     """
4405     inst = self.instance
4406
4407     if self.op.os_type is not None:
4408       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4409       inst.os = self.op.os_type
4410       self.cfg.Update(inst, feedback_fn)
4411
4412     _StartInstanceDisks(self, inst, None)
4413     try:
4414       feedback_fn("Running the instance OS create scripts...")
4415       # FIXME: pass debug option from opcode to backend
4416       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4417                                              self.op.debug_level)
4418       result.Raise("Could not install OS for instance %s on node %s" %
4419                    (inst.name, inst.primary_node))
4420     finally:
4421       _ShutdownInstanceDisks(self, inst)
4422
4423
4424 class LURecreateInstanceDisks(LogicalUnit):
4425   """Recreate an instance's missing disks.
4426
4427   """
4428   HPATH = "instance-recreate-disks"
4429   HTYPE = constants.HTYPE_INSTANCE
4430   _OP_REQP = ["instance_name", "disks"]
4431   REQ_BGL = False
4432
4433   def CheckArguments(self):
4434     """Check the arguments.
4435
4436     """
4437     if not isinstance(self.op.disks, list):
4438       raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4439     for item in self.op.disks:
4440       if (not isinstance(item, int) or
4441           item < 0):
4442         raise errors.OpPrereqError("Invalid disk specification '%s'" %
4443                                    str(item), errors.ECODE_INVAL)
4444
4445   def ExpandNames(self):
4446     self._ExpandAndLockInstance()
4447
4448   def BuildHooksEnv(self):
4449     """Build hooks env.
4450
4451     This runs on master, primary and secondary nodes of the instance.
4452
4453     """
4454     env = _BuildInstanceHookEnvByObject(self, self.instance)
4455     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4456     return env, nl, nl
4457
4458   def CheckPrereq(self):
4459     """Check prerequisites.
4460
4461     This checks that the instance is in the cluster and is not running.
4462
4463     """
4464     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4465     assert instance is not None, \
4466       "Cannot retrieve locked instance %s" % self.op.instance_name
4467     _CheckNodeOnline(self, instance.primary_node)
4468
4469     if instance.disk_template == constants.DT_DISKLESS:
4470       raise errors.OpPrereqError("Instance '%s' has no disks" %
4471                                  self.op.instance_name, errors.ECODE_INVAL)
4472     _CheckInstanceDown(self, instance, "cannot recreate disks")
4473
4474     if not self.op.disks:
4475       self.op.disks = range(len(instance.disks))
4476     else:
4477       for idx in self.op.disks:
4478         if idx >= len(instance.disks):
4479           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4480                                      errors.ECODE_INVAL)
4481
4482     self.instance = instance
4483
4484   def Exec(self, feedback_fn):
4485     """Recreate the disks.
4486
4487     """
4488     to_skip = []
4489     for idx, _ in enumerate(self.instance.disks):
4490       if idx not in self.op.disks: # disk idx has not been passed in
4491         to_skip.append(idx)
4492         continue
4493
4494     _CreateDisks(self, self.instance, to_skip=to_skip)
4495
4496
4497 class LURenameInstance(LogicalUnit):
4498   """Rename an instance.
4499
4500   """
4501   HPATH = "instance-rename"
4502   HTYPE = constants.HTYPE_INSTANCE
4503   _OP_REQP = ["instance_name", "new_name"]
4504
4505   def BuildHooksEnv(self):
4506     """Build hooks env.
4507
4508     This runs on master, primary and secondary nodes of the instance.
4509
4510     """
4511     env = _BuildInstanceHookEnvByObject(self, self.instance)
4512     env["INSTANCE_NEW_NAME"] = self.op.new_name
4513     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4514     return env, nl, nl
4515
4516   def CheckPrereq(self):
4517     """Check prerequisites.
4518
4519     This checks that the instance is in the cluster and is not running.
4520
4521     """
4522     self.op.instance_name = _ExpandInstanceName(self.cfg,
4523                                                 self.op.instance_name)
4524     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4525     assert instance is not None
4526     _CheckNodeOnline(self, instance.primary_node)
4527     _CheckInstanceDown(self, instance, "cannot rename")
4528     self.instance = instance
4529
4530     # new name verification
4531     name_info = utils.GetHostInfo(self.op.new_name)
4532
4533     self.op.new_name = new_name = name_info.name
4534     instance_list = self.cfg.GetInstanceList()
4535     if new_name in instance_list:
4536       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4537                                  new_name, errors.ECODE_EXISTS)
4538
4539     if not getattr(self.op, "ignore_ip", False):
4540       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4541         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4542                                    (name_info.ip, new_name),
4543                                    errors.ECODE_NOTUNIQUE)
4544
4545
4546   def Exec(self, feedback_fn):
4547     """Reinstall the instance.
4548
4549     """
4550     inst = self.instance
4551     old_name = inst.name
4552
4553     if inst.disk_template == constants.DT_FILE:
4554       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4555
4556     self.cfg.RenameInstance(inst.name, self.op.new_name)
4557     # Change the instance lock. This is definitely safe while we hold the BGL
4558     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4559     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4560
4561     # re-read the instance from the configuration after rename
4562     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4563
4564     if inst.disk_template == constants.DT_FILE:
4565       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4566       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4567                                                      old_file_storage_dir,
4568                                                      new_file_storage_dir)
4569       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4570                    " (but the instance has been renamed in Ganeti)" %
4571                    (inst.primary_node, old_file_storage_dir,
4572                     new_file_storage_dir))
4573
4574     _StartInstanceDisks(self, inst, None)
4575     try:
4576       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4577                                                  old_name, self.op.debug_level)
4578       msg = result.fail_msg
4579       if msg:
4580         msg = ("Could not run OS rename script for instance %s on node %s"
4581                " (but the instance has been renamed in Ganeti): %s" %
4582                (inst.name, inst.primary_node, msg))
4583         self.proc.LogWarning(msg)
4584     finally:
4585       _ShutdownInstanceDisks(self, inst)
4586
4587
4588 class LURemoveInstance(LogicalUnit):
4589   """Remove an instance.
4590
4591   """
4592   HPATH = "instance-remove"
4593   HTYPE = constants.HTYPE_INSTANCE
4594   _OP_REQP = ["instance_name", "ignore_failures"]
4595   REQ_BGL = False
4596
4597   def CheckArguments(self):
4598     """Check the arguments.
4599
4600     """
4601     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4602                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4603
4604   def ExpandNames(self):
4605     self._ExpandAndLockInstance()
4606     self.needed_locks[locking.LEVEL_NODE] = []
4607     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4608
4609   def DeclareLocks(self, level):
4610     if level == locking.LEVEL_NODE:
4611       self._LockInstancesNodes()
4612
4613   def BuildHooksEnv(self):
4614     """Build hooks env.
4615
4616     This runs on master, primary and secondary nodes of the instance.
4617
4618     """
4619     env = _BuildInstanceHookEnvByObject(self, self.instance)
4620     env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4621     nl = [self.cfg.GetMasterNode()]
4622     nl_post = list(self.instance.all_nodes) + nl
4623     return env, nl, nl_post
4624
4625   def CheckPrereq(self):
4626     """Check prerequisites.
4627
4628     This checks that the instance is in the cluster.
4629
4630     """
4631     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4632     assert self.instance is not None, \
4633       "Cannot retrieve locked instance %s" % self.op.instance_name
4634
4635   def Exec(self, feedback_fn):
4636     """Remove the instance.
4637
4638     """
4639     instance = self.instance
4640     logging.info("Shutting down instance %s on node %s",
4641                  instance.name, instance.primary_node)
4642
4643     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4644                                              self.shutdown_timeout)
4645     msg = result.fail_msg
4646     if msg:
4647       if self.op.ignore_failures:
4648         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4649       else:
4650         raise errors.OpExecError("Could not shutdown instance %s on"
4651                                  " node %s: %s" %
4652                                  (instance.name, instance.primary_node, msg))
4653
4654     logging.info("Removing block devices for instance %s", instance.name)
4655
4656     if not _RemoveDisks(self, instance):
4657       if self.op.ignore_failures:
4658         feedback_fn("Warning: can't remove instance's disks")
4659       else:
4660         raise errors.OpExecError("Can't remove instance's disks")
4661
4662     logging.info("Removing instance %s out of cluster config", instance.name)
4663
4664     self.cfg.RemoveInstance(instance.name)
4665     self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4666
4667
4668 class LUQueryInstances(NoHooksLU):
4669   """Logical unit for querying instances.
4670
4671   """
4672   # pylint: disable-msg=W0142
4673   _OP_REQP = ["output_fields", "names", "use_locking"]
4674   REQ_BGL = False
4675   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4676                     "serial_no", "ctime", "mtime", "uuid"]
4677   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4678                                     "admin_state",
4679                                     "disk_template", "ip", "mac", "bridge",
4680                                     "nic_mode", "nic_link",
4681                                     "sda_size", "sdb_size", "vcpus", "tags",
4682                                     "network_port", "beparams",
4683                                     r"(disk)\.(size)/([0-9]+)",
4684                                     r"(disk)\.(sizes)", "disk_usage",
4685                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4686                                     r"(nic)\.(bridge)/([0-9]+)",
4687                                     r"(nic)\.(macs|ips|modes|links|bridges)",
4688                                     r"(disk|nic)\.(count)",
4689                                     "hvparams",
4690                                     ] + _SIMPLE_FIELDS +
4691                                   ["hv/%s" % name
4692                                    for name in constants.HVS_PARAMETERS
4693                                    if name not in constants.HVC_GLOBALS] +
4694                                   ["be/%s" % name
4695                                    for name in constants.BES_PARAMETERS])
4696   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4697
4698
4699   def ExpandNames(self):
4700     _CheckOutputFields(static=self._FIELDS_STATIC,
4701                        dynamic=self._FIELDS_DYNAMIC,
4702                        selected=self.op.output_fields)
4703
4704     self.needed_locks = {}
4705     self.share_locks[locking.LEVEL_INSTANCE] = 1
4706     self.share_locks[locking.LEVEL_NODE] = 1
4707
4708     if self.op.names:
4709       self.wanted = _GetWantedInstances(self, self.op.names)
4710     else:
4711       self.wanted = locking.ALL_SET
4712
4713     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4714     self.do_locking = self.do_node_query and self.op.use_locking
4715     if self.do_locking:
4716       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4717       self.needed_locks[locking.LEVEL_NODE] = []
4718       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4719
4720   def DeclareLocks(self, level):
4721     if level == locking.LEVEL_NODE and self.do_locking:
4722       self._LockInstancesNodes()
4723
4724   def CheckPrereq(self):
4725     """Check prerequisites.
4726
4727     """
4728     pass
4729
4730   def Exec(self, feedback_fn):
4731     """Computes the list of nodes and their attributes.
4732
4733     """
4734     # pylint: disable-msg=R0912
4735     # way too many branches here
4736     all_info = self.cfg.GetAllInstancesInfo()
4737     if self.wanted == locking.ALL_SET:
4738       # caller didn't specify instance names, so ordering is not important
4739       if self.do_locking:
4740         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4741       else:
4742         instance_names = all_info.keys()
4743       instance_names = utils.NiceSort(instance_names)
4744     else:
4745       # caller did specify names, so we must keep the ordering
4746       if self.do_locking:
4747         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4748       else:
4749         tgt_set = all_info.keys()
4750       missing = set(self.wanted).difference(tgt_set)
4751       if missing:
4752         raise errors.OpExecError("Some instances were removed before"
4753                                  " retrieving their data: %s" % missing)
4754       instance_names = self.wanted
4755
4756     instance_list = [all_info[iname] for iname in instance_names]
4757
4758     # begin data gathering
4759
4760     nodes = frozenset([inst.primary_node for inst in instance_list])
4761     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4762
4763     bad_nodes = []
4764     off_nodes = []
4765     if self.do_node_query:
4766       live_data = {}
4767       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4768       for name in nodes:
4769         result = node_data[name]
4770         if result.offline:
4771           # offline nodes will be in both lists
4772           off_nodes.append(name)
4773         if result.fail_msg:
4774           bad_nodes.append(name)
4775         else:
4776           if result.payload:
4777             live_data.update(result.payload)
4778           # else no instance is alive
4779     else:
4780       live_data = dict([(name, {}) for name in instance_names])
4781
4782     # end data gathering
4783
4784     HVPREFIX = "hv/"
4785     BEPREFIX = "be/"
4786     output = []
4787     cluster = self.cfg.GetClusterInfo()
4788     for instance in instance_list:
4789       iout = []
4790       i_hv = cluster.FillHV(instance, skip_globals=True)
4791       i_be = cluster.FillBE(instance)
4792       i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4793                                  nic.nicparams) for nic in instance.nics]
4794       for field in self.op.output_fields:
4795         st_match = self._FIELDS_STATIC.Matches(field)
4796         if field in self._SIMPLE_FIELDS:
4797           val = getattr(instance, field)
4798         elif field == "pnode":
4799           val = instance.primary_node
4800         elif field == "snodes":
4801           val = list(instance.secondary_nodes)
4802         elif field == "admin_state":
4803           val = instance.admin_up
4804         elif field == "oper_state":
4805           if instance.primary_node in bad_nodes:
4806             val = None
4807           else:
4808             val = bool(live_data.get(instance.name))
4809         elif field == "status":
4810           if instance.primary_node in off_nodes:
4811             val = "ERROR_nodeoffline"
4812           elif instance.primary_node in bad_nodes:
4813             val = "ERROR_nodedown"
4814           else:
4815             running = bool(live_data.get(instance.name))
4816             if running:
4817               if instance.admin_up:
4818                 val = "running"
4819               else:
4820                 val = "ERROR_up"
4821             else:
4822               if instance.admin_up:
4823                 val = "ERROR_down"
4824               else:
4825                 val = "ADMIN_down"
4826         elif field == "oper_ram":
4827           if instance.primary_node in bad_nodes:
4828             val = None
4829           elif instance.name in live_data:
4830             val = live_data[instance.name].get("memory", "?")
4831           else:
4832             val = "-"
4833         elif field == "vcpus":
4834           val = i_be[constants.BE_VCPUS]
4835         elif field == "disk_template":
4836           val = instance.disk_template
4837         elif field == "ip":
4838           if instance.nics:
4839             val = instance.nics[0].ip
4840           else:
4841             val = None
4842         elif field == "nic_mode":
4843           if instance.nics:
4844             val = i_nicp[0][constants.NIC_MODE]
4845           else:
4846             val = None
4847         elif field == "nic_link":
4848           if instance.nics:
4849             val = i_nicp[0][constants.NIC_LINK]
4850           else:
4851             val = None
4852         elif field == "bridge":
4853           if (instance.nics and
4854               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4855             val = i_nicp[0][constants.NIC_LINK]
4856           else:
4857             val = None
4858         elif field == "mac":
4859           if instance.nics:
4860             val = instance.nics[0].mac
4861           else:
4862             val = None
4863         elif field == "sda_size" or field == "sdb_size":
4864           idx = ord(field[2]) - ord('a')
4865           try:
4866             val = instance.FindDisk(idx).size
4867           except errors.OpPrereqError:
4868             val = None
4869         elif field == "disk_usage": # total disk usage per node
4870           disk_sizes = [{'size': disk.size} for disk in instance.disks]
4871           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4872         elif field == "tags":
4873           val = list(instance.GetTags())
4874         elif field == "hvparams":
4875           val = i_hv
4876         elif (field.startswith(HVPREFIX) and
4877               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4878               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4879           val = i_hv.get(field[len(HVPREFIX):], None)
4880         elif field == "beparams":
4881           val = i_be
4882         elif (field.startswith(BEPREFIX) and
4883               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4884           val = i_be.get(field[len(BEPREFIX):], None)
4885         elif st_match and st_match.groups():
4886           # matches a variable list
4887           st_groups = st_match.groups()
4888           if st_groups and st_groups[0] == "disk":
4889             if st_groups[1] == "count":
4890               val = len(instance.disks)
4891             elif st_groups[1] == "sizes":
4892               val = [disk.size for disk in instance.disks]
4893             elif st_groups[1] == "size":
4894               try:
4895                 val = instance.FindDisk(st_groups[2]).size
4896               except errors.OpPrereqError:
4897                 val = None
4898             else:
4899               assert False, "Unhandled disk parameter"
4900           elif st_groups[0] == "nic":
4901             if st_groups[1] == "count":
4902               val = len(instance.nics)
4903             elif st_groups[1] == "macs":
4904               val = [nic.mac for nic in instance.nics]
4905             elif st_groups[1] == "ips":
4906               val = [nic.ip for nic in instance.nics]
4907             elif st_groups[1] == "modes":
4908               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4909             elif st_groups[1] == "links":
4910               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4911             elif st_groups[1] == "bridges":
4912               val = []
4913               for nicp in i_nicp:
4914                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4915                   val.append(nicp[constants.NIC_LINK])
4916                 else:
4917                   val.append(None)
4918             else:
4919               # index-based item
4920               nic_idx = int(st_groups[2])
4921               if nic_idx >= len(instance.nics):
4922                 val = None
4923               else:
4924                 if st_groups[1] == "mac":
4925                   val = instance.nics[nic_idx].mac
4926                 elif st_groups[1] == "ip":
4927                   val = instance.nics[nic_idx].ip
4928                 elif st_groups[1] == "mode":
4929                   val = i_nicp[nic_idx][constants.NIC_MODE]
4930                 elif st_groups[1] == "link":
4931                   val = i_nicp[nic_idx][constants.NIC_LINK]
4932                 elif st_groups[1] == "bridge":
4933                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4934                   if nic_mode == constants.NIC_MODE_BRIDGED:
4935                     val = i_nicp[nic_idx][constants.NIC_LINK]
4936                   else:
4937                     val = None
4938                 else:
4939                   assert False, "Unhandled NIC parameter"
4940           else:
4941             assert False, ("Declared but unhandled variable parameter '%s'" %
4942                            field)
4943         else:
4944           assert False, "Declared but unhandled parameter '%s'" % field
4945         iout.append(val)
4946       output.append(iout)
4947
4948     return output
4949
4950
4951 class LUFailoverInstance(LogicalUnit):
4952   """Failover an instance.
4953
4954   """
4955   HPATH = "instance-failover"
4956   HTYPE = constants.HTYPE_INSTANCE
4957   _OP_REQP = ["instance_name", "ignore_consistency"]
4958   REQ_BGL = False
4959
4960   def CheckArguments(self):
4961     """Check the arguments.
4962
4963     """
4964     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4965                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4966
4967   def ExpandNames(self):
4968     self._ExpandAndLockInstance()
4969     self.needed_locks[locking.LEVEL_NODE] = []
4970     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4971
4972   def DeclareLocks(self, level):
4973     if level == locking.LEVEL_NODE:
4974       self._LockInstancesNodes()
4975
4976   def BuildHooksEnv(self):
4977     """Build hooks env.
4978
4979     This runs on master, primary and secondary nodes of the instance.
4980
4981     """
4982     instance = self.instance
4983     source_node = instance.primary_node
4984     target_node = instance.secondary_nodes[0]
4985     env = {
4986       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4987       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4988       "OLD_PRIMARY": source_node,
4989       "OLD_SECONDARY": target_node,
4990       "NEW_PRIMARY": target_node,
4991       "NEW_SECONDARY": source_node,
4992       }
4993     env.update(_BuildInstanceHookEnvByObject(self, instance))
4994     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4995     nl_post = list(nl)
4996     nl_post.append(source_node)
4997     return env, nl, nl_post
4998
4999   def CheckPrereq(self):
5000     """Check prerequisites.
5001
5002     This checks that the instance is in the cluster.
5003
5004     """
5005     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5006     assert self.instance is not None, \
5007       "Cannot retrieve locked instance %s" % self.op.instance_name
5008
5009     bep = self.cfg.GetClusterInfo().FillBE(instance)
5010     if instance.disk_template not in constants.DTS_NET_MIRROR:
5011       raise errors.OpPrereqError("Instance's disk layout is not"
5012                                  " network mirrored, cannot failover.",
5013                                  errors.ECODE_STATE)
5014
5015     secondary_nodes = instance.secondary_nodes
5016     if not secondary_nodes:
5017       raise errors.ProgrammerError("no secondary node but using "
5018                                    "a mirrored disk template")
5019
5020     target_node = secondary_nodes[0]
5021     _CheckNodeOnline(self, target_node)
5022     _CheckNodeNotDrained(self, target_node)
5023     if instance.admin_up:
5024       # check memory requirements on the secondary node
5025       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5026                            instance.name, bep[constants.BE_MEMORY],
5027                            instance.hypervisor)
5028     else:
5029       self.LogInfo("Not checking memory on the secondary node as"
5030                    " instance will not be started")
5031
5032     # check bridge existance
5033     _CheckInstanceBridgesExist(self, instance, node=target_node)
5034
5035   def Exec(self, feedback_fn):
5036     """Failover an instance.
5037
5038     The failover is done by shutting it down on its present node and
5039     starting it on the secondary.
5040
5041     """
5042     instance = self.instance
5043
5044     source_node = instance.primary_node
5045     target_node = instance.secondary_nodes[0]
5046
5047     if instance.admin_up:
5048       feedback_fn("* checking disk consistency between source and target")
5049       for dev in instance.disks:
5050         # for drbd, these are drbd over lvm
5051         if not _CheckDiskConsistency(self, dev, target_node, False):
5052           if not self.op.ignore_consistency:
5053             raise errors.OpExecError("Disk %s is degraded on target node,"
5054                                      " aborting failover." % dev.iv_name)
5055     else:
5056       feedback_fn("* not checking disk consistency as instance is not running")
5057
5058     feedback_fn("* shutting down instance on source node")
5059     logging.info("Shutting down instance %s on node %s",
5060                  instance.name, source_node)
5061
5062     result = self.rpc.call_instance_shutdown(source_node, instance,
5063                                              self.shutdown_timeout)
5064     msg = result.fail_msg
5065     if msg:
5066       if self.op.ignore_consistency:
5067         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5068                              " Proceeding anyway. Please make sure node"
5069                              " %s is down. Error details: %s",
5070                              instance.name, source_node, source_node, msg)
5071       else:
5072         raise errors.OpExecError("Could not shutdown instance %s on"
5073                                  " node %s: %s" %
5074                                  (instance.name, source_node, msg))
5075
5076     feedback_fn("* deactivating the instance's disks on source node")
5077     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5078       raise errors.OpExecError("Can't shut down the instance's disks.")
5079
5080     instance.primary_node = target_node
5081     # distribute new instance config to the other nodes
5082     self.cfg.Update(instance, feedback_fn)
5083
5084     # Only start the instance if it's marked as up
5085     if instance.admin_up:
5086       feedback_fn("* activating the instance's disks on target node")
5087       logging.info("Starting instance %s on node %s",
5088                    instance.name, target_node)
5089
5090       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5091                                                ignore_secondaries=True)
5092       if not disks_ok:
5093         _ShutdownInstanceDisks(self, instance)
5094         raise errors.OpExecError("Can't activate the instance's disks")
5095
5096       feedback_fn("* starting the instance on the target node")
5097       result = self.rpc.call_instance_start(target_node, instance, None, None)
5098       msg = result.fail_msg
5099       if msg:
5100         _ShutdownInstanceDisks(self, instance)
5101         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5102                                  (instance.name, target_node, msg))
5103
5104
5105 class LUMigrateInstance(LogicalUnit):
5106   """Migrate an instance.
5107
5108   This is migration without shutting down, compared to the failover,
5109   which is done with shutdown.
5110
5111   """
5112   HPATH = "instance-migrate"
5113   HTYPE = constants.HTYPE_INSTANCE
5114   _OP_REQP = ["instance_name", "live", "cleanup"]
5115
5116   REQ_BGL = False
5117
5118   def ExpandNames(self):
5119     self._ExpandAndLockInstance()
5120
5121     self.needed_locks[locking.LEVEL_NODE] = []
5122     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5123
5124     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5125                                        self.op.live, self.op.cleanup)
5126     self.tasklets = [self._migrater]
5127
5128   def DeclareLocks(self, level):
5129     if level == locking.LEVEL_NODE:
5130       self._LockInstancesNodes()
5131
5132   def BuildHooksEnv(self):
5133     """Build hooks env.
5134
5135     This runs on master, primary and secondary nodes of the instance.
5136
5137     """
5138     instance = self._migrater.instance
5139     source_node = instance.primary_node
5140     target_node = instance.secondary_nodes[0]
5141     env = _BuildInstanceHookEnvByObject(self, instance)
5142     env["MIGRATE_LIVE"] = self.op.live
5143     env["MIGRATE_CLEANUP"] = self.op.cleanup
5144     env.update({
5145         "OLD_PRIMARY": source_node,
5146         "OLD_SECONDARY": target_node,
5147         "NEW_PRIMARY": target_node,
5148         "NEW_SECONDARY": source_node,
5149         })
5150     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5151     nl_post = list(nl)
5152     nl_post.append(source_node)
5153     return env, nl, nl_post
5154
5155
5156 class LUMoveInstance(LogicalUnit):
5157   """Move an instance by data-copying.
5158
5159   """
5160   HPATH = "instance-move"
5161   HTYPE = constants.HTYPE_INSTANCE
5162   _OP_REQP = ["instance_name", "target_node"]
5163   REQ_BGL = False
5164
5165   def CheckArguments(self):
5166     """Check the arguments.
5167
5168     """
5169     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5170                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
5171
5172   def ExpandNames(self):
5173     self._ExpandAndLockInstance()
5174     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5175     self.op.target_node = target_node
5176     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5177     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5178
5179   def DeclareLocks(self, level):
5180     if level == locking.LEVEL_NODE:
5181       self._LockInstancesNodes(primary_only=True)
5182
5183   def BuildHooksEnv(self):
5184     """Build hooks env.
5185
5186     This runs on master, primary and secondary nodes of the instance.
5187
5188     """
5189     env = {
5190       "TARGET_NODE": self.op.target_node,
5191       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5192       }
5193     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5194     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5195                                        self.op.target_node]
5196     return env, nl, nl
5197
5198   def CheckPrereq(self):
5199     """Check prerequisites.
5200
5201     This checks that the instance is in the cluster.
5202
5203     """
5204     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5205     assert self.instance is not None, \
5206       "Cannot retrieve locked instance %s" % self.op.instance_name
5207
5208     node = self.cfg.GetNodeInfo(self.op.target_node)
5209     assert node is not None, \
5210       "Cannot retrieve locked node %s" % self.op.target_node
5211
5212     self.target_node = target_node = node.name
5213
5214     if target_node == instance.primary_node:
5215       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5216                                  (instance.name, target_node),
5217                                  errors.ECODE_STATE)
5218
5219     bep = self.cfg.GetClusterInfo().FillBE(instance)
5220
5221     for idx, dsk in enumerate(instance.disks):
5222       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5223         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5224                                    " cannot copy" % idx, errors.ECODE_STATE)
5225
5226     _CheckNodeOnline(self, target_node)
5227     _CheckNodeNotDrained(self, target_node)
5228
5229     if instance.admin_up:
5230       # check memory requirements on the secondary node
5231       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5232                            instance.name, bep[constants.BE_MEMORY],
5233                            instance.hypervisor)
5234     else:
5235       self.LogInfo("Not checking memory on the secondary node as"
5236                    " instance will not be started")
5237
5238     # check bridge existance
5239     _CheckInstanceBridgesExist(self, instance, node=target_node)
5240
5241   def Exec(self, feedback_fn):
5242     """Move an instance.
5243
5244     The move is done by shutting it down on its present node, copying
5245     the data over (slow) and starting it on the new node.
5246
5247     """
5248     instance = self.instance
5249
5250     source_node = instance.primary_node
5251     target_node = self.target_node
5252
5253     self.LogInfo("Shutting down instance %s on source node %s",
5254                  instance.name, source_node)
5255
5256     result = self.rpc.call_instance_shutdown(source_node, instance,
5257                                              self.shutdown_timeout)
5258     msg = result.fail_msg
5259     if msg:
5260       if self.op.ignore_consistency:
5261         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5262                              " Proceeding anyway. Please make sure node"
5263                              " %s is down. Error details: %s",
5264                              instance.name, source_node, source_node, msg)
5265       else:
5266         raise errors.OpExecError("Could not shutdown instance %s on"
5267                                  " node %s: %s" %
5268                                  (instance.name, source_node, msg))
5269
5270     # create the target disks
5271     try:
5272       _CreateDisks(self, instance, target_node=target_node)
5273     except errors.OpExecError:
5274       self.LogWarning("Device creation failed, reverting...")
5275       try:
5276         _RemoveDisks(self, instance, target_node=target_node)
5277       finally:
5278         self.cfg.ReleaseDRBDMinors(instance.name)
5279         raise
5280
5281     cluster_name = self.cfg.GetClusterInfo().cluster_name
5282
5283     errs = []
5284     # activate, get path, copy the data over
5285     for idx, disk in enumerate(instance.disks):
5286       self.LogInfo("Copying data for disk %d", idx)
5287       result = self.rpc.call_blockdev_assemble(target_node, disk,
5288                                                instance.name, True)
5289       if result.fail_msg:
5290         self.LogWarning("Can't assemble newly created disk %d: %s",
5291                         idx, result.fail_msg)
5292         errs.append(result.fail_msg)
5293         break
5294       dev_path = result.payload
5295       result = self.rpc.call_blockdev_export(source_node, disk,
5296                                              target_node, dev_path,
5297                                              cluster_name)
5298       if result.fail_msg:
5299         self.LogWarning("Can't copy data over for disk %d: %s",
5300                         idx, result.fail_msg)
5301         errs.append(result.fail_msg)
5302         break
5303
5304     if errs:
5305       self.LogWarning("Some disks failed to copy, aborting")
5306       try:
5307         _RemoveDisks(self, instance, target_node=target_node)
5308       finally:
5309         self.cfg.ReleaseDRBDMinors(instance.name)
5310         raise errors.OpExecError("Errors during disk copy: %s" %
5311                                  (",".join(errs),))
5312
5313     instance.primary_node = target_node
5314     self.cfg.Update(instance, feedback_fn)
5315
5316     self.LogInfo("Removing the disks on the original node")
5317     _RemoveDisks(self, instance, target_node=source_node)
5318
5319     # Only start the instance if it's marked as up
5320     if instance.admin_up:
5321       self.LogInfo("Starting instance %s on node %s",
5322                    instance.name, target_node)
5323
5324       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5325                                            ignore_secondaries=True)
5326       if not disks_ok:
5327         _ShutdownInstanceDisks(self, instance)
5328         raise errors.OpExecError("Can't activate the instance's disks")
5329
5330       result = self.rpc.call_instance_start(target_node, instance, None, None)
5331       msg = result.fail_msg
5332       if msg:
5333         _ShutdownInstanceDisks(self, instance)
5334         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5335                                  (instance.name, target_node, msg))
5336
5337
5338 class LUMigrateNode(LogicalUnit):
5339   """Migrate all instances from a node.
5340
5341   """
5342   HPATH = "node-migrate"
5343   HTYPE = constants.HTYPE_NODE
5344   _OP_REQP = ["node_name", "live"]
5345   REQ_BGL = False
5346
5347   def ExpandNames(self):
5348     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5349
5350     self.needed_locks = {
5351       locking.LEVEL_NODE: [self.op.node_name],
5352       }
5353
5354     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5355
5356     # Create tasklets for migrating instances for all instances on this node
5357     names = []
5358     tasklets = []
5359
5360     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5361       logging.debug("Migrating instance %s", inst.name)
5362       names.append(inst.name)
5363
5364       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5365
5366     self.tasklets = tasklets
5367
5368     # Declare instance locks
5369     self.needed_locks[locking.LEVEL_INSTANCE] = names
5370
5371   def DeclareLocks(self, level):
5372     if level == locking.LEVEL_NODE:
5373       self._LockInstancesNodes()
5374
5375   def BuildHooksEnv(self):
5376     """Build hooks env.
5377
5378     This runs on the master, the primary and all the secondaries.
5379
5380     """
5381     env = {
5382       "NODE_NAME": self.op.node_name,
5383       }
5384
5385     nl = [self.cfg.GetMasterNode()]
5386
5387     return (env, nl, nl)
5388
5389
5390 class TLMigrateInstance(Tasklet):
5391   def __init__(self, lu, instance_name, live, cleanup):
5392     """Initializes this class.
5393
5394     """
5395     Tasklet.__init__(self, lu)
5396
5397     # Parameters
5398     self.instance_name = instance_name
5399     self.live = live
5400     self.cleanup = cleanup
5401
5402   def CheckPrereq(self):
5403     """Check prerequisites.
5404
5405     This checks that the instance is in the cluster.
5406
5407     """
5408     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5409     instance = self.cfg.GetInstanceInfo(instance_name)
5410     assert instance is not None
5411
5412     if instance.disk_template != constants.DT_DRBD8:
5413       raise errors.OpPrereqError("Instance's disk layout is not"
5414                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5415
5416     secondary_nodes = instance.secondary_nodes
5417     if not secondary_nodes:
5418       raise errors.ConfigurationError("No secondary node but using"
5419                                       " drbd8 disk template")
5420
5421     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5422
5423     target_node = secondary_nodes[0]
5424     # check memory requirements on the secondary node
5425     _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5426                          instance.name, i_be[constants.BE_MEMORY],
5427                          instance.hypervisor)
5428
5429     # check bridge existance
5430     _CheckInstanceBridgesExist(self, instance, node=target_node)
5431
5432     if not self.cleanup:
5433       _CheckNodeNotDrained(self, target_node)
5434       result = self.rpc.call_instance_migratable(instance.primary_node,
5435                                                  instance)
5436       result.Raise("Can't migrate, please use failover",
5437                    prereq=True, ecode=errors.ECODE_STATE)
5438
5439     self.instance = instance
5440
5441   def _WaitUntilSync(self):
5442     """Poll with custom rpc for disk sync.
5443
5444     This uses our own step-based rpc call.
5445
5446     """
5447     self.feedback_fn("* wait until resync is done")
5448     all_done = False
5449     while not all_done:
5450       all_done = True
5451       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5452                                             self.nodes_ip,
5453                                             self.instance.disks)
5454       min_percent = 100
5455       for node, nres in result.items():
5456         nres.Raise("Cannot resync disks on node %s" % node)
5457         node_done, node_percent = nres.payload
5458         all_done = all_done and node_done
5459         if node_percent is not None:
5460           min_percent = min(min_percent, node_percent)
5461       if not all_done:
5462         if min_percent < 100:
5463           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5464         time.sleep(2)
5465
5466   def _EnsureSecondary(self, node):
5467     """Demote a node to secondary.
5468
5469     """
5470     self.feedback_fn("* switching node %s to secondary mode" % node)
5471
5472     for dev in self.instance.disks:
5473       self.cfg.SetDiskID(dev, node)
5474
5475     result = self.rpc.call_blockdev_close(node, self.instance.name,
5476                                           self.instance.disks)
5477     result.Raise("Cannot change disk to secondary on node %s" % node)
5478
5479   def _GoStandalone(self):
5480     """Disconnect from the network.
5481
5482     """
5483     self.feedback_fn("* changing into standalone mode")
5484     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5485                                                self.instance.disks)
5486     for node, nres in result.items():
5487       nres.Raise("Cannot disconnect disks node %s" % node)
5488
5489   def _GoReconnect(self, multimaster):
5490     """Reconnect to the network.
5491
5492     """
5493     if multimaster:
5494       msg = "dual-master"
5495     else:
5496       msg = "single-master"
5497     self.feedback_fn("* changing disks into %s mode" % msg)
5498     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5499                                            self.instance.disks,
5500                                            self.instance.name, multimaster)
5501     for node, nres in result.items():
5502       nres.Raise("Cannot change disks config on node %s" % node)
5503
5504   def _ExecCleanup(self):
5505     """Try to cleanup after a failed migration.
5506
5507     The cleanup is done by:
5508       - check that the instance is running only on one node
5509         (and update the config if needed)
5510       - change disks on its secondary node to secondary
5511       - wait until disks are fully synchronized
5512       - disconnect from the network
5513       - change disks into single-master mode
5514       - wait again until disks are fully synchronized
5515
5516     """
5517     instance = self.instance
5518     target_node = self.target_node
5519     source_node = self.source_node
5520
5521     # check running on only one node
5522     self.feedback_fn("* checking where the instance actually runs"
5523                      " (if this hangs, the hypervisor might be in"
5524                      " a bad state)")
5525     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5526     for node, result in ins_l.items():
5527       result.Raise("Can't contact node %s" % node)
5528
5529     runningon_source = instance.name in ins_l[source_node].payload
5530     runningon_target = instance.name in ins_l[target_node].payload
5531
5532     if runningon_source and runningon_target:
5533       raise errors.OpExecError("Instance seems to be running on two nodes,"
5534                                " or the hypervisor is confused. You will have"
5535                                " to ensure manually that it runs only on one"
5536                                " and restart this operation.")
5537
5538     if not (runningon_source or runningon_target):
5539       raise errors.OpExecError("Instance does not seem to be running at all."
5540                                " In this case, it's safer to repair by"
5541                                " running 'gnt-instance stop' to ensure disk"
5542                                " shutdown, and then restarting it.")
5543
5544     if runningon_target:
5545       # the migration has actually succeeded, we need to update the config
5546       self.feedback_fn("* instance running on secondary node (%s),"
5547                        " updating config" % target_node)
5548       instance.primary_node = target_node
5549       self.cfg.Update(instance, self.feedback_fn)
5550       demoted_node = source_node
5551     else:
5552       self.feedback_fn("* instance confirmed to be running on its"
5553                        " primary node (%s)" % source_node)
5554       demoted_node = target_node
5555
5556     self._EnsureSecondary(demoted_node)
5557     try:
5558       self._WaitUntilSync()
5559     except errors.OpExecError:
5560       # we ignore here errors, since if the device is standalone, it
5561       # won't be able to sync
5562       pass
5563     self._GoStandalone()
5564     self._GoReconnect(False)
5565     self._WaitUntilSync()
5566
5567     self.feedback_fn("* done")
5568
5569   def _RevertDiskStatus(self):
5570     """Try to revert the disk status after a failed migration.
5571
5572     """
5573     target_node = self.target_node
5574     try:
5575       self._EnsureSecondary(target_node)
5576       self._GoStandalone()
5577       self._GoReconnect(False)
5578       self._WaitUntilSync()
5579     except errors.OpExecError, err:
5580       self.lu.LogWarning("Migration failed and I can't reconnect the"
5581                          " drives: error '%s'\n"
5582                          "Please look and recover the instance status" %
5583                          str(err))
5584
5585   def _AbortMigration(self):
5586     """Call the hypervisor code to abort a started migration.
5587
5588     """
5589     instance = self.instance
5590     target_node = self.target_node
5591     migration_info = self.migration_info
5592
5593     abort_result = self.rpc.call_finalize_migration(target_node,
5594                                                     instance,
5595                                                     migration_info,
5596                                                     False)
5597     abort_msg = abort_result.fail_msg
5598     if abort_msg:
5599       logging.error("Aborting migration failed on target node %s: %s",
5600                     target_node, abort_msg)
5601       # Don't raise an exception here, as we stil have to try to revert the
5602       # disk status, even if this step failed.
5603
5604   def _ExecMigration(self):
5605     """Migrate an instance.
5606
5607     The migrate is done by:
5608       - change the disks into dual-master mode
5609       - wait until disks are fully synchronized again
5610       - migrate the instance
5611       - change disks on the new secondary node (the old primary) to secondary
5612       - wait until disks are fully synchronized
5613       - change disks into single-master mode
5614
5615     """
5616     instance = self.instance
5617     target_node = self.target_node
5618     source_node = self.source_node
5619
5620     self.feedback_fn("* checking disk consistency between source and target")
5621     for dev in instance.disks:
5622       if not _CheckDiskConsistency(self, dev, target_node, False):
5623         raise errors.OpExecError("Disk %s is degraded or not fully"
5624                                  " synchronized on target node,"
5625                                  " aborting migrate." % dev.iv_name)
5626
5627     # First get the migration information from the remote node
5628     result = self.rpc.call_migration_info(source_node, instance)
5629     msg = result.fail_msg
5630     if msg:
5631       log_err = ("Failed fetching source migration information from %s: %s" %
5632                  (source_node, msg))
5633       logging.error(log_err)
5634       raise errors.OpExecError(log_err)
5635
5636     self.migration_info = migration_info = result.payload
5637
5638     # Then switch the disks to master/master mode
5639     self._EnsureSecondary(target_node)
5640     self._GoStandalone()
5641     self._GoReconnect(True)
5642     self._WaitUntilSync()
5643
5644     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5645     result = self.rpc.call_accept_instance(target_node,
5646                                            instance,
5647                                            migration_info,
5648                                            self.nodes_ip[target_node])
5649
5650     msg = result.fail_msg
5651     if msg:
5652       logging.error("Instance pre-migration failed, trying to revert"
5653                     " disk status: %s", msg)
5654       self.feedback_fn("Pre-migration failed, aborting")
5655       self._AbortMigration()
5656       self._RevertDiskStatus()
5657       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5658                                (instance.name, msg))
5659
5660     self.feedback_fn("* migrating instance to %s" % target_node)
5661     time.sleep(10)
5662     result = self.rpc.call_instance_migrate(source_node, instance,
5663                                             self.nodes_ip[target_node],
5664                                             self.live)
5665     msg = result.fail_msg
5666     if msg:
5667       logging.error("Instance migration failed, trying to revert"
5668                     " disk status: %s", msg)
5669       self.feedback_fn("Migration failed, aborting")
5670       self._AbortMigration()
5671       self._RevertDiskStatus()
5672       raise errors.OpExecError("Could not migrate instance %s: %s" %
5673                                (instance.name, msg))
5674     time.sleep(10)
5675
5676     instance.primary_node = target_node
5677     # distribute new instance config to the other nodes
5678     self.cfg.Update(instance, self.feedback_fn)
5679
5680     result = self.rpc.call_finalize_migration(target_node,
5681                                               instance,
5682                                               migration_info,
5683                                               True)
5684     msg = result.fail_msg
5685     if msg:
5686       logging.error("Instance migration succeeded, but finalization failed:"
5687                     " %s", msg)
5688       raise errors.OpExecError("Could not finalize instance migration: %s" %
5689                                msg)
5690
5691     self._EnsureSecondary(source_node)
5692     self._WaitUntilSync()
5693     self._GoStandalone()
5694     self._GoReconnect(False)
5695     self._WaitUntilSync()
5696
5697     self.feedback_fn("* done")
5698
5699   def Exec(self, feedback_fn):
5700     """Perform the migration.
5701
5702     """
5703     feedback_fn("Migrating instance %s" % self.instance.name)
5704
5705     self.feedback_fn = feedback_fn
5706
5707     self.source_node = self.instance.primary_node
5708     self.target_node = self.instance.secondary_nodes[0]
5709     self.all_nodes = [self.source_node, self.target_node]
5710     self.nodes_ip = {
5711       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5712       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5713       }
5714
5715     if self.cleanup:
5716       return self._ExecCleanup()
5717     else:
5718       return self._ExecMigration()
5719
5720
5721 def _CreateBlockDev(lu, node, instance, device, force_create,
5722                     info, force_open):
5723   """Create a tree of block devices on a given node.
5724
5725   If this device type has to be created on secondaries, create it and
5726   all its children.
5727
5728   If not, just recurse to children keeping the same 'force' value.
5729
5730   @param lu: the lu on whose behalf we execute
5731   @param node: the node on which to create the device
5732   @type instance: L{objects.Instance}
5733   @param instance: the instance which owns the device
5734   @type device: L{objects.Disk}
5735   @param device: the device to create
5736   @type force_create: boolean
5737   @param force_create: whether to force creation of this device; this
5738       will be change to True whenever we find a device which has
5739       CreateOnSecondary() attribute
5740   @param info: the extra 'metadata' we should attach to the device
5741       (this will be represented as a LVM tag)
5742   @type force_open: boolean
5743   @param force_open: this parameter will be passes to the
5744       L{backend.BlockdevCreate} function where it specifies
5745       whether we run on primary or not, and it affects both
5746       the child assembly and the device own Open() execution
5747
5748   """
5749   if device.CreateOnSecondary():
5750     force_create = True
5751
5752   if device.children:
5753     for child in device.children:
5754       _CreateBlockDev(lu, node, instance, child, force_create,
5755                       info, force_open)
5756
5757   if not force_create:
5758     return
5759
5760   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5761
5762
5763 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5764   """Create a single block device on a given node.
5765
5766   This will not recurse over children of the device, so they must be
5767   created in advance.
5768
5769   @param lu: the lu on whose behalf we execute
5770   @param node: the node on which to create the device
5771   @type instance: L{objects.Instance}
5772   @param instance: the instance which owns the device
5773   @type device: L{objects.Disk}
5774   @param device: the device to create
5775   @param info: the extra 'metadata' we should attach to the device
5776       (this will be represented as a LVM tag)
5777   @type force_open: boolean
5778   @param force_open: this parameter will be passes to the
5779       L{backend.BlockdevCreate} function where it specifies
5780       whether we run on primary or not, and it affects both
5781       the child assembly and the device own Open() execution
5782
5783   """
5784   lu.cfg.SetDiskID(device, node)
5785   result = lu.rpc.call_blockdev_create(node, device, device.size,
5786                                        instance.name, force_open, info)
5787   result.Raise("Can't create block device %s on"
5788                " node %s for instance %s" % (device, node, instance.name))
5789   if device.physical_id is None:
5790     device.physical_id = result.payload
5791
5792
5793 def _GenerateUniqueNames(lu, exts):
5794   """Generate a suitable LV name.
5795
5796   This will generate a logical volume name for the given instance.
5797
5798   """
5799   results = []
5800   for val in exts:
5801     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5802     results.append("%s%s" % (new_id, val))
5803   return results
5804
5805
5806 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5807                          p_minor, s_minor):
5808   """Generate a drbd8 device complete with its children.
5809
5810   """
5811   port = lu.cfg.AllocatePort()
5812   vgname = lu.cfg.GetVGName()
5813   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5814   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5815                           logical_id=(vgname, names[0]))
5816   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5817                           logical_id=(vgname, names[1]))
5818   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5819                           logical_id=(primary, secondary, port,
5820                                       p_minor, s_minor,
5821                                       shared_secret),
5822                           children=[dev_data, dev_meta],
5823                           iv_name=iv_name)
5824   return drbd_dev
5825
5826
5827 def _GenerateDiskTemplate(lu, template_name,
5828                           instance_name, primary_node,
5829                           secondary_nodes, disk_info,
5830                           file_storage_dir, file_driver,
5831                           base_index):
5832   """Generate the entire disk layout for a given template type.
5833
5834   """
5835   #TODO: compute space requirements
5836
5837   vgname = lu.cfg.GetVGName()
5838   disk_count = len(disk_info)
5839   disks = []
5840   if template_name == constants.DT_DISKLESS:
5841     pass
5842   elif template_name == constants.DT_PLAIN:
5843     if len(secondary_nodes) != 0:
5844       raise errors.ProgrammerError("Wrong template configuration")
5845
5846     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5847                                       for i in range(disk_count)])
5848     for idx, disk in enumerate(disk_info):
5849       disk_index = idx + base_index
5850       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5851                               logical_id=(vgname, names[idx]),
5852                               iv_name="disk/%d" % disk_index,
5853                               mode=disk["mode"])
5854       disks.append(disk_dev)
5855   elif template_name == constants.DT_DRBD8:
5856     if len(secondary_nodes) != 1:
5857       raise errors.ProgrammerError("Wrong template configuration")
5858     remote_node = secondary_nodes[0]
5859     minors = lu.cfg.AllocateDRBDMinor(
5860       [primary_node, remote_node] * len(disk_info), instance_name)
5861
5862     names = []
5863     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5864                                                for i in range(disk_count)]):
5865       names.append(lv_prefix + "_data")
5866       names.append(lv_prefix + "_meta")
5867     for idx, disk in enumerate(disk_info):
5868       disk_index = idx + base_index
5869       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5870                                       disk["size"], names[idx*2:idx*2+2],
5871                                       "disk/%d" % disk_index,
5872                                       minors[idx*2], minors[idx*2+1])
5873       disk_dev.mode = disk["mode"]
5874       disks.append(disk_dev)
5875   elif template_name == constants.DT_FILE:
5876     if len(secondary_nodes) != 0:
5877       raise errors.ProgrammerError("Wrong template configuration")
5878
5879     _RequireFileStorage()
5880
5881     for idx, disk in enumerate(disk_info):
5882       disk_index = idx + base_index
5883       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5884                               iv_name="disk/%d" % disk_index,
5885                               logical_id=(file_driver,
5886                                           "%s/disk%d" % (file_storage_dir,
5887                                                          disk_index)),
5888                               mode=disk["mode"])
5889       disks.append(disk_dev)
5890   else:
5891     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5892   return disks
5893
5894
5895 def _GetInstanceInfoText(instance):
5896   """Compute that text that should be added to the disk's metadata.
5897
5898   """
5899   return "originstname+%s" % instance.name
5900
5901
5902 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5903   """Create all disks for an instance.
5904
5905   This abstracts away some work from AddInstance.
5906
5907   @type lu: L{LogicalUnit}
5908   @param lu: the logical unit on whose behalf we execute
5909   @type instance: L{objects.Instance}
5910   @param instance: the instance whose disks we should create
5911   @type to_skip: list
5912   @param to_skip: list of indices to skip
5913   @type target_node: string
5914   @param target_node: if passed, overrides the target node for creation
5915   @rtype: boolean
5916   @return: the success of the creation
5917
5918   """
5919   info = _GetInstanceInfoText(instance)
5920   if target_node is None:
5921     pnode = instance.primary_node
5922     all_nodes = instance.all_nodes
5923   else:
5924     pnode = target_node
5925     all_nodes = [pnode]
5926
5927   if instance.disk_template == constants.DT_FILE:
5928     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5929     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5930
5931     result.Raise("Failed to create directory '%s' on"
5932                  " node %s" % (file_storage_dir, pnode))
5933
5934   # Note: this needs to be kept in sync with adding of disks in
5935   # LUSetInstanceParams
5936   for idx, device in enumerate(instance.disks):
5937     if to_skip and idx in to_skip:
5938       continue
5939     logging.info("Creating volume %s for instance %s",
5940                  device.iv_name, instance.name)
5941     #HARDCODE
5942     for node in all_nodes:
5943       f_create = node == pnode
5944       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5945
5946
5947 def _RemoveDisks(lu, instance, target_node=None):
5948   """Remove all disks for an instance.
5949
5950   This abstracts away some work from `AddInstance()` and
5951   `RemoveInstance()`. Note that in case some of the devices couldn't
5952   be removed, the removal will continue with the other ones (compare
5953   with `_CreateDisks()`).
5954
5955   @type lu: L{LogicalUnit}
5956   @param lu: the logical unit on whose behalf we execute
5957   @type instance: L{objects.Instance}
5958   @param instance: the instance whose disks we should remove
5959   @type target_node: string
5960   @param target_node: used to override the node on which to remove the disks
5961   @rtype: boolean
5962   @return: the success of the removal
5963
5964   """
5965   logging.info("Removing block devices for instance %s", instance.name)
5966
5967   all_result = True
5968   for device in instance.disks:
5969     if target_node:
5970       edata = [(target_node, device)]
5971     else:
5972       edata = device.ComputeNodeTree(instance.primary_node)
5973     for node, disk in edata:
5974       lu.cfg.SetDiskID(disk, node)
5975       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5976       if msg:
5977         lu.LogWarning("Could not remove block device %s on node %s,"
5978                       " continuing anyway: %s", device.iv_name, node, msg)
5979         all_result = False
5980
5981   if instance.disk_template == constants.DT_FILE:
5982     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5983     if target_node:
5984       tgt = target_node
5985     else:
5986       tgt = instance.primary_node
5987     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5988     if result.fail_msg:
5989       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5990                     file_storage_dir, instance.primary_node, result.fail_msg)
5991       all_result = False
5992
5993   return all_result
5994
5995
5996 def _ComputeDiskSize(disk_template, disks):
5997   """Compute disk size requirements in the volume group
5998
5999   """
6000   # Required free disk space as a function of disk and swap space
6001   req_size_dict = {
6002     constants.DT_DISKLESS: None,
6003     constants.DT_PLAIN: sum(d["size"] for d in disks),
6004     # 128 MB are added for drbd metadata for each disk
6005     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6006     constants.DT_FILE: None,
6007   }
6008
6009   if disk_template not in req_size_dict:
6010     raise errors.ProgrammerError("Disk template '%s' size requirement"
6011                                  " is unknown" %  disk_template)
6012
6013   return req_size_dict[disk_template]
6014
6015
6016 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6017   """Hypervisor parameter validation.
6018
6019   This function abstract the hypervisor parameter validation to be
6020   used in both instance create and instance modify.
6021
6022   @type lu: L{LogicalUnit}
6023   @param lu: the logical unit for which we check
6024   @type nodenames: list
6025   @param nodenames: the list of nodes on which we should check
6026   @type hvname: string
6027   @param hvname: the name of the hypervisor we should use
6028   @type hvparams: dict
6029   @param hvparams: the parameters which we need to check
6030   @raise errors.OpPrereqError: if the parameters are not valid
6031
6032   """
6033   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6034                                                   hvname,
6035                                                   hvparams)
6036   for node in nodenames:
6037     info = hvinfo[node]
6038     if info.offline:
6039       continue
6040     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6041
6042
6043 class LUCreateInstance(LogicalUnit):
6044   """Create an instance.
6045
6046   """
6047   HPATH = "instance-add"
6048   HTYPE = constants.HTYPE_INSTANCE
6049   _OP_REQP = ["instance_name", "disks",
6050               "mode", "start",
6051               "wait_for_sync", "ip_check", "nics",
6052               "hvparams", "beparams"]
6053   REQ_BGL = False
6054
6055   def CheckArguments(self):
6056     """Check arguments.
6057
6058     """
6059     # set optional parameters to none if they don't exist
6060     for attr in ["pnode", "snode", "iallocator", "hypervisor",
6061                  "disk_template", "identify_defaults"]:
6062       if not hasattr(self.op, attr):
6063         setattr(self.op, attr, None)
6064
6065     # do not require name_check to ease forward/backward compatibility
6066     # for tools
6067     if not hasattr(self.op, "name_check"):
6068       self.op.name_check = True
6069     if not hasattr(self.op, "no_install"):
6070       self.op.no_install = False
6071     if self.op.no_install and self.op.start:
6072       self.LogInfo("No-installation mode selected, disabling startup")
6073       self.op.start = False
6074     # validate/normalize the instance name
6075     self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6076     if self.op.ip_check and not self.op.name_check:
6077       # TODO: make the ip check more flexible and not depend on the name check
6078       raise errors.OpPrereqError("Cannot do ip checks without a name check",
6079                                  errors.ECODE_INVAL)
6080     # check disk information: either all adopt, or no adopt
6081     has_adopt = has_no_adopt = False
6082     for disk in self.op.disks:
6083       if "adopt" in disk:
6084         has_adopt = True
6085       else:
6086         has_no_adopt = True
6087     if has_adopt and has_no_adopt:
6088       raise errors.OpPrereqError("Either all disks are adopted or none is",
6089                                  errors.ECODE_INVAL)
6090     if has_adopt:
6091       if self.op.disk_template != constants.DT_PLAIN:
6092         raise errors.OpPrereqError("Disk adoption is only supported for the"
6093                                    " 'plain' disk template",
6094                                    errors.ECODE_INVAL)
6095       if self.op.iallocator is not None:
6096         raise errors.OpPrereqError("Disk adoption not allowed with an"
6097                                    " iallocator script", errors.ECODE_INVAL)
6098       if self.op.mode == constants.INSTANCE_IMPORT:
6099         raise errors.OpPrereqError("Disk adoption not allowed for"
6100                                    " instance import", errors.ECODE_INVAL)
6101
6102     self.adopt_disks = has_adopt
6103
6104     # verify creation mode
6105     if self.op.mode not in (constants.INSTANCE_CREATE,
6106                             constants.INSTANCE_IMPORT):
6107       raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6108                                  self.op.mode, errors.ECODE_INVAL)
6109
6110     # instance name verification
6111     if self.op.name_check:
6112       self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6113       self.op.instance_name = self.hostname1.name
6114       # used in CheckPrereq for ip ping check
6115       self.check_ip = self.hostname1.ip
6116     else:
6117       self.check_ip = None
6118
6119     # file storage checks
6120     if (self.op.file_driver and
6121         not self.op.file_driver in constants.FILE_DRIVER):
6122       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6123                                  self.op.file_driver, errors.ECODE_INVAL)
6124
6125     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6126       raise errors.OpPrereqError("File storage directory path not absolute",
6127                                  errors.ECODE_INVAL)
6128
6129     ### Node/iallocator related checks
6130     if [self.op.iallocator, self.op.pnode].count(None) != 1:
6131       raise errors.OpPrereqError("One and only one of iallocator and primary"
6132                                  " node must be given",
6133                                  errors.ECODE_INVAL)
6134
6135     if self.op.mode == constants.INSTANCE_IMPORT:
6136       # On import force_variant must be True, because if we forced it at
6137       # initial install, our only chance when importing it back is that it
6138       # works again!
6139       self.op.force_variant = True
6140
6141       if self.op.no_install:
6142         self.LogInfo("No-installation mode has no effect during import")
6143
6144     else: # INSTANCE_CREATE
6145       if getattr(self.op, "os_type", None) is None:
6146         raise errors.OpPrereqError("No guest OS specified",
6147                                    errors.ECODE_INVAL)
6148       self.op.force_variant = getattr(self.op, "force_variant", False)
6149       if self.op.disk_template is None:
6150         raise errors.OpPrereqError("No disk template specified",
6151                                    errors.ECODE_INVAL)
6152
6153   def ExpandNames(self):
6154     """ExpandNames for CreateInstance.
6155
6156     Figure out the right locks for instance creation.
6157
6158     """
6159     self.needed_locks = {}
6160
6161     instance_name = self.op.instance_name
6162     # this is just a preventive check, but someone might still add this
6163     # instance in the meantime, and creation will fail at lock-add time
6164     if instance_name in self.cfg.GetInstanceList():
6165       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6166                                  instance_name, errors.ECODE_EXISTS)
6167
6168     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6169
6170     if self.op.iallocator:
6171       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6172     else:
6173       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6174       nodelist = [self.op.pnode]
6175       if self.op.snode is not None:
6176         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6177         nodelist.append(self.op.snode)
6178       self.needed_locks[locking.LEVEL_NODE] = nodelist
6179
6180     # in case of import lock the source node too
6181     if self.op.mode == constants.INSTANCE_IMPORT:
6182       src_node = getattr(self.op, "src_node", None)
6183       src_path = getattr(self.op, "src_path", None)
6184
6185       if src_path is None:
6186         self.op.src_path = src_path = self.op.instance_name
6187
6188       if src_node is None:
6189         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6190         self.op.src_node = None
6191         if os.path.isabs(src_path):
6192           raise errors.OpPrereqError("Importing an instance from an absolute"
6193                                      " path requires a source node option.",
6194                                      errors.ECODE_INVAL)
6195       else:
6196         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6197         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6198           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6199         if not os.path.isabs(src_path):
6200           self.op.src_path = src_path = \
6201             utils.PathJoin(constants.EXPORT_DIR, src_path)
6202
6203   def _RunAllocator(self):
6204     """Run the allocator based on input opcode.
6205
6206     """
6207     nics = [n.ToDict() for n in self.nics]
6208     ial = IAllocator(self.cfg, self.rpc,
6209                      mode=constants.IALLOCATOR_MODE_ALLOC,
6210                      name=self.op.instance_name,
6211                      disk_template=self.op.disk_template,
6212                      tags=[],
6213                      os=self.op.os_type,
6214                      vcpus=self.be_full[constants.BE_VCPUS],
6215                      mem_size=self.be_full[constants.BE_MEMORY],
6216                      disks=self.disks,
6217                      nics=nics,
6218                      hypervisor=self.op.hypervisor,
6219                      )
6220
6221     ial.Run(self.op.iallocator)
6222
6223     if not ial.success:
6224       raise errors.OpPrereqError("Can't compute nodes using"
6225                                  " iallocator '%s': %s" %
6226                                  (self.op.iallocator, ial.info),
6227                                  errors.ECODE_NORES)
6228     if len(ial.result) != ial.required_nodes:
6229       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6230                                  " of nodes (%s), required %s" %
6231                                  (self.op.iallocator, len(ial.result),
6232                                   ial.required_nodes), errors.ECODE_FAULT)
6233     self.op.pnode = ial.result[0]
6234     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6235                  self.op.instance_name, self.op.iallocator,
6236                  utils.CommaJoin(ial.result))
6237     if ial.required_nodes == 2:
6238       self.op.snode = ial.result[1]
6239
6240   def BuildHooksEnv(self):
6241     """Build hooks env.
6242
6243     This runs on master, primary and secondary nodes of the instance.
6244
6245     """
6246     env = {
6247       "ADD_MODE": self.op.mode,
6248       }
6249     if self.op.mode == constants.INSTANCE_IMPORT:
6250       env["SRC_NODE"] = self.op.src_node
6251       env["SRC_PATH"] = self.op.src_path
6252       env["SRC_IMAGES"] = self.src_images
6253
6254     env.update(_BuildInstanceHookEnv(
6255       name=self.op.instance_name,
6256       primary_node=self.op.pnode,
6257       secondary_nodes=self.secondaries,
6258       status=self.op.start,
6259       os_type=self.op.os_type,
6260       memory=self.be_full[constants.BE_MEMORY],
6261       vcpus=self.be_full[constants.BE_VCPUS],
6262       nics=_NICListToTuple(self, self.nics),
6263       disk_template=self.op.disk_template,
6264       disks=[(d["size"], d["mode"]) for d in self.disks],
6265       bep=self.be_full,
6266       hvp=self.hv_full,
6267       hypervisor_name=self.op.hypervisor,
6268     ))
6269
6270     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6271           self.secondaries)
6272     return env, nl, nl
6273
6274   def _ReadExportInfo(self):
6275     """Reads the export information from disk.
6276
6277     It will override the opcode source node and path with the actual
6278     information, if these two were not specified before.
6279
6280     @return: the export information
6281
6282     """
6283     assert self.op.mode == constants.INSTANCE_IMPORT
6284
6285     src_node = self.op.src_node
6286     src_path = self.op.src_path
6287
6288     if src_node is None:
6289       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6290       exp_list = self.rpc.call_export_list(locked_nodes)
6291       found = False
6292       for node in exp_list:
6293         if exp_list[node].fail_msg:
6294           continue
6295         if src_path in exp_list[node].payload:
6296           found = True
6297           self.op.src_node = src_node = node
6298           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6299                                                        src_path)
6300           break
6301       if not found:
6302         raise errors.OpPrereqError("No export found for relative path %s" %
6303                                     src_path, errors.ECODE_INVAL)
6304
6305     _CheckNodeOnline(self, src_node)
6306     result = self.rpc.call_export_info(src_node, src_path)
6307     result.Raise("No export or invalid export found in dir %s" % src_path)
6308
6309     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6310     if not export_info.has_section(constants.INISECT_EXP):
6311       raise errors.ProgrammerError("Corrupted export config",
6312                                    errors.ECODE_ENVIRON)
6313
6314     ei_version = export_info.get(constants.INISECT_EXP, "version")
6315     if (int(ei_version) != constants.EXPORT_VERSION):
6316       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6317                                  (ei_version, constants.EXPORT_VERSION),
6318                                  errors.ECODE_ENVIRON)
6319     return export_info
6320
6321   def _ReadExportParams(self, einfo):
6322     """Use export parameters as defaults.
6323
6324     In case the opcode doesn't specify (as in override) some instance
6325     parameters, then try to use them from the export information, if
6326     that declares them.
6327
6328     """
6329     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6330
6331     if self.op.disk_template is None:
6332       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6333         self.op.disk_template = einfo.get(constants.INISECT_INS,
6334                                           "disk_template")
6335       else:
6336         raise errors.OpPrereqError("No disk template specified and the export"
6337                                    " is missing the disk_template information",
6338                                    errors.ECODE_INVAL)
6339
6340     if not self.op.disks:
6341       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6342         disks = []
6343         # TODO: import the disk iv_name too
6344         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6345           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6346           disks.append({"size": disk_sz})
6347         self.op.disks = disks
6348       else:
6349         raise errors.OpPrereqError("No disk info specified and the export"
6350                                    " is missing the disk information",
6351                                    errors.ECODE_INVAL)
6352
6353     if (not self.op.nics and
6354         einfo.has_option(constants.INISECT_INS, "nic_count")):
6355       nics = []
6356       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6357         ndict = {}
6358         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6359           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6360           ndict[name] = v
6361         nics.append(ndict)
6362       self.op.nics = nics
6363
6364     if (self.op.hypervisor is None and
6365         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6366       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6367     if einfo.has_section(constants.INISECT_HYP):
6368       # use the export parameters but do not override the ones
6369       # specified by the user
6370       for name, value in einfo.items(constants.INISECT_HYP):
6371         if name not in self.op.hvparams:
6372           self.op.hvparams[name] = value
6373
6374     if einfo.has_section(constants.INISECT_BEP):
6375       # use the parameters, without overriding
6376       for name, value in einfo.items(constants.INISECT_BEP):
6377         if name not in self.op.beparams:
6378           self.op.beparams[name] = value
6379     else:
6380       # try to read the parameters old style, from the main section
6381       for name in constants.BES_PARAMETERS:
6382         if (name not in self.op.beparams and
6383             einfo.has_option(constants.INISECT_INS, name)):
6384           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6385
6386   def _RevertToDefaults(self, cluster):
6387     """Revert the instance parameters to the default values.
6388
6389     """
6390     # hvparams
6391     hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6392     for name in self.op.hvparams.keys():
6393       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6394         del self.op.hvparams[name]
6395     # beparams
6396     be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6397     for name in self.op.beparams.keys():
6398       if name in be_defs and be_defs[name] == self.op.beparams[name]:
6399         del self.op.beparams[name]
6400     # nic params
6401     nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6402     for nic in self.op.nics:
6403       for name in constants.NICS_PARAMETERS:
6404         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6405           del nic[name]
6406
6407   def CheckPrereq(self):
6408     """Check prerequisites.
6409
6410     """
6411     if self.op.mode == constants.INSTANCE_IMPORT:
6412       export_info = self._ReadExportInfo()
6413       self._ReadExportParams(export_info)
6414
6415     _CheckDiskTemplate(self.op.disk_template)
6416
6417     if (not self.cfg.GetVGName() and
6418         self.op.disk_template not in constants.DTS_NOT_LVM):
6419       raise errors.OpPrereqError("Cluster does not support lvm-based"
6420                                  " instances", errors.ECODE_STATE)
6421
6422     if self.op.hypervisor is None:
6423       self.op.hypervisor = self.cfg.GetHypervisorType()
6424
6425     cluster = self.cfg.GetClusterInfo()
6426     enabled_hvs = cluster.enabled_hypervisors
6427     if self.op.hypervisor not in enabled_hvs:
6428       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6429                                  " cluster (%s)" % (self.op.hypervisor,
6430                                   ",".join(enabled_hvs)),
6431                                  errors.ECODE_STATE)
6432
6433     # check hypervisor parameter syntax (locally)
6434     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6435     filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6436                                                         self.op.os_type),
6437                                   self.op.hvparams)
6438     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6439     hv_type.CheckParameterSyntax(filled_hvp)
6440     self.hv_full = filled_hvp
6441     # check that we don't specify global parameters on an instance
6442     _CheckGlobalHvParams(self.op.hvparams)
6443
6444     # fill and remember the beparams dict
6445     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6446     self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6447                                     self.op.beparams)
6448
6449     # now that hvp/bep are in final format, let's reset to defaults,
6450     # if told to do so
6451     if self.op.identify_defaults:
6452       self._RevertToDefaults(cluster)
6453
6454     # NIC buildup
6455     self.nics = []
6456     for idx, nic in enumerate(self.op.nics):
6457       nic_mode_req = nic.get("mode", None)
6458       nic_mode = nic_mode_req
6459       if nic_mode is None:
6460         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6461
6462       # in routed mode, for the first nic, the default ip is 'auto'
6463       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6464         default_ip_mode = constants.VALUE_AUTO
6465       else:
6466         default_ip_mode = constants.VALUE_NONE
6467
6468       # ip validity checks
6469       ip = nic.get("ip", default_ip_mode)
6470       if ip is None or ip.lower() == constants.VALUE_NONE:
6471         nic_ip = None
6472       elif ip.lower() == constants.VALUE_AUTO:
6473         if not self.op.name_check:
6474           raise errors.OpPrereqError("IP address set to auto but name checks"
6475                                      " have been skipped. Aborting.",
6476                                      errors.ECODE_INVAL)
6477         nic_ip = self.hostname1.ip
6478       else:
6479         if not utils.IsValidIP(ip):
6480           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6481                                      " like a valid IP" % ip,
6482                                      errors.ECODE_INVAL)
6483         nic_ip = ip
6484
6485       # TODO: check the ip address for uniqueness
6486       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6487         raise errors.OpPrereqError("Routed nic mode requires an ip address",
6488                                    errors.ECODE_INVAL)
6489
6490       # MAC address verification
6491       mac = nic.get("mac", constants.VALUE_AUTO)
6492       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6493         mac = utils.NormalizeAndValidateMac(mac)
6494
6495         try:
6496           self.cfg.ReserveMAC(mac, self.proc.GetECId())
6497         except errors.ReservationError:
6498           raise errors.OpPrereqError("MAC address %s already in use"
6499                                      " in cluster" % mac,
6500                                      errors.ECODE_NOTUNIQUE)
6501
6502       # bridge verification
6503       bridge = nic.get("bridge", None)
6504       link = nic.get("link", None)
6505       if bridge and link:
6506         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6507                                    " at the same time", errors.ECODE_INVAL)
6508       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6509         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6510                                    errors.ECODE_INVAL)
6511       elif bridge:
6512         link = bridge
6513
6514       nicparams = {}
6515       if nic_mode_req:
6516         nicparams[constants.NIC_MODE] = nic_mode_req
6517       if link:
6518         nicparams[constants.NIC_LINK] = link
6519
6520       check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6521                                       nicparams)
6522       objects.NIC.CheckParameterSyntax(check_params)
6523       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6524
6525     # disk checks/pre-build
6526     self.disks = []
6527     for disk in self.op.disks:
6528       mode = disk.get("mode", constants.DISK_RDWR)
6529       if mode not in constants.DISK_ACCESS_SET:
6530         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6531                                    mode, errors.ECODE_INVAL)
6532       size = disk.get("size", None)
6533       if size is None:
6534         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6535       try:
6536         size = int(size)
6537       except (TypeError, ValueError):
6538         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6539                                    errors.ECODE_INVAL)
6540       new_disk = {"size": size, "mode": mode}
6541       if "adopt" in disk:
6542         new_disk["adopt"] = disk["adopt"]
6543       self.disks.append(new_disk)
6544
6545     if self.op.mode == constants.INSTANCE_IMPORT:
6546
6547       # Check that the new instance doesn't have less disks than the export
6548       instance_disks = len(self.disks)
6549       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6550       if instance_disks < export_disks:
6551         raise errors.OpPrereqError("Not enough disks to import."
6552                                    " (instance: %d, export: %d)" %
6553                                    (instance_disks, export_disks),
6554                                    errors.ECODE_INVAL)
6555
6556       disk_images = []
6557       for idx in range(export_disks):
6558         option = 'disk%d_dump' % idx
6559         if export_info.has_option(constants.INISECT_INS, option):
6560           # FIXME: are the old os-es, disk sizes, etc. useful?
6561           export_name = export_info.get(constants.INISECT_INS, option)
6562           image = utils.PathJoin(self.op.src_path, export_name)
6563           disk_images.append(image)
6564         else:
6565           disk_images.append(False)
6566
6567       self.src_images = disk_images
6568
6569       old_name = export_info.get(constants.INISECT_INS, 'name')
6570       try:
6571         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6572       except (TypeError, ValueError), err:
6573         raise errors.OpPrereqError("Invalid export file, nic_count is not"
6574                                    " an integer: %s" % str(err),
6575                                    errors.ECODE_STATE)
6576       if self.op.instance_name == old_name:
6577         for idx, nic in enumerate(self.nics):
6578           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6579             nic_mac_ini = 'nic%d_mac' % idx
6580             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6581
6582     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6583
6584     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6585     if self.op.ip_check:
6586       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6587         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6588                                    (self.check_ip, self.op.instance_name),
6589                                    errors.ECODE_NOTUNIQUE)
6590
6591     #### mac address generation
6592     # By generating here the mac address both the allocator and the hooks get
6593     # the real final mac address rather than the 'auto' or 'generate' value.
6594     # There is a race condition between the generation and the instance object
6595     # creation, which means that we know the mac is valid now, but we're not
6596     # sure it will be when we actually add the instance. If things go bad
6597     # adding the instance will abort because of a duplicate mac, and the
6598     # creation job will fail.
6599     for nic in self.nics:
6600       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6601         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6602
6603     #### allocator run
6604
6605     if self.op.iallocator is not None:
6606       self._RunAllocator()
6607
6608     #### node related checks
6609
6610     # check primary node
6611     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6612     assert self.pnode is not None, \
6613       "Cannot retrieve locked node %s" % self.op.pnode
6614     if pnode.offline:
6615       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6616                                  pnode.name, errors.ECODE_STATE)
6617     if pnode.drained:
6618       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6619                                  pnode.name, errors.ECODE_STATE)
6620
6621     self.secondaries = []
6622
6623     # mirror node verification
6624     if self.op.disk_template in constants.DTS_NET_MIRROR:
6625       if self.op.snode is None:
6626         raise errors.OpPrereqError("The networked disk templates need"
6627                                    " a mirror node", errors.ECODE_INVAL)
6628       if self.op.snode == pnode.name:
6629         raise errors.OpPrereqError("The secondary node cannot be the"
6630                                    " primary node.", errors.ECODE_INVAL)
6631       _CheckNodeOnline(self, self.op.snode)
6632       _CheckNodeNotDrained(self, self.op.snode)
6633       self.secondaries.append(self.op.snode)
6634
6635     nodenames = [pnode.name] + self.secondaries
6636
6637     req_size = _ComputeDiskSize(self.op.disk_template,
6638                                 self.disks)
6639
6640     # Check lv size requirements, if not adopting
6641     if req_size is not None and not self.adopt_disks:
6642       _CheckNodesFreeDisk(self, nodenames, req_size)
6643
6644     if self.adopt_disks: # instead, we must check the adoption data
6645       all_lvs = set([i["adopt"] for i in self.disks])
6646       if len(all_lvs) != len(self.disks):
6647         raise errors.OpPrereqError("Duplicate volume names given for adoption",
6648                                    errors.ECODE_INVAL)
6649       for lv_name in all_lvs:
6650         try:
6651           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6652         except errors.ReservationError:
6653           raise errors.OpPrereqError("LV named %s used by another instance" %
6654                                      lv_name, errors.ECODE_NOTUNIQUE)
6655
6656       node_lvs = self.rpc.call_lv_list([pnode.name],
6657                                        self.cfg.GetVGName())[pnode.name]
6658       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6659       node_lvs = node_lvs.payload
6660       delta = all_lvs.difference(node_lvs.keys())
6661       if delta:
6662         raise errors.OpPrereqError("Missing logical volume(s): %s" %
6663                                    utils.CommaJoin(delta),
6664                                    errors.ECODE_INVAL)
6665       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6666       if online_lvs:
6667         raise errors.OpPrereqError("Online logical volumes found, cannot"
6668                                    " adopt: %s" % utils.CommaJoin(online_lvs),
6669                                    errors.ECODE_STATE)
6670       # update the size of disk based on what is found
6671       for dsk in self.disks:
6672         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6673
6674     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6675
6676     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6677
6678     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6679
6680     # memory check on primary node
6681     if self.op.start:
6682       _CheckNodeFreeMemory(self, self.pnode.name,
6683                            "creating instance %s" % self.op.instance_name,
6684                            self.be_full[constants.BE_MEMORY],
6685                            self.op.hypervisor)
6686
6687     self.dry_run_result = list(nodenames)
6688
6689   def Exec(self, feedback_fn):
6690     """Create and add the instance to the cluster.
6691
6692     """
6693     instance = self.op.instance_name
6694     pnode_name = self.pnode.name
6695
6696     ht_kind = self.op.hypervisor
6697     if ht_kind in constants.HTS_REQ_PORT:
6698       network_port = self.cfg.AllocatePort()
6699     else:
6700       network_port = None
6701
6702     if constants.ENABLE_FILE_STORAGE:
6703       # this is needed because os.path.join does not accept None arguments
6704       if self.op.file_storage_dir is None:
6705         string_file_storage_dir = ""
6706       else:
6707         string_file_storage_dir = self.op.file_storage_dir
6708
6709       # build the full file storage dir path
6710       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6711                                         string_file_storage_dir, instance)
6712     else:
6713       file_storage_dir = ""
6714
6715
6716     disks = _GenerateDiskTemplate(self,
6717                                   self.op.disk_template,
6718                                   instance, pnode_name,
6719                                   self.secondaries,
6720                                   self.disks,
6721                                   file_storage_dir,
6722                                   self.op.file_driver,
6723                                   0)
6724
6725     iobj = objects.Instance(name=instance, os=self.op.os_type,
6726                             primary_node=pnode_name,
6727                             nics=self.nics, disks=disks,
6728                             disk_template=self.op.disk_template,
6729                             admin_up=False,
6730                             network_port=network_port,
6731                             beparams=self.op.beparams,
6732                             hvparams=self.op.hvparams,
6733                             hypervisor=self.op.hypervisor,
6734                             )
6735
6736     if self.adopt_disks:
6737       # rename LVs to the newly-generated names; we need to construct
6738       # 'fake' LV disks with the old data, plus the new unique_id
6739       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6740       rename_to = []
6741       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6742         rename_to.append(t_dsk.logical_id)
6743         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6744         self.cfg.SetDiskID(t_dsk, pnode_name)
6745       result = self.rpc.call_blockdev_rename(pnode_name,
6746                                              zip(tmp_disks, rename_to))
6747       result.Raise("Failed to rename adoped LVs")
6748     else:
6749       feedback_fn("* creating instance disks...")
6750       try:
6751         _CreateDisks(self, iobj)
6752       except errors.OpExecError:
6753         self.LogWarning("Device creation failed, reverting...")
6754         try:
6755           _RemoveDisks(self, iobj)
6756         finally:
6757           self.cfg.ReleaseDRBDMinors(instance)
6758           raise
6759
6760     feedback_fn("adding instance %s to cluster config" % instance)
6761
6762     self.cfg.AddInstance(iobj, self.proc.GetECId())
6763
6764     # Declare that we don't want to remove the instance lock anymore, as we've
6765     # added the instance to the config
6766     del self.remove_locks[locking.LEVEL_INSTANCE]
6767     # Unlock all the nodes
6768     if self.op.mode == constants.INSTANCE_IMPORT:
6769       nodes_keep = [self.op.src_node]
6770       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6771                        if node != self.op.src_node]
6772       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6773       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6774     else:
6775       self.context.glm.release(locking.LEVEL_NODE)
6776       del self.acquired_locks[locking.LEVEL_NODE]
6777
6778     if self.op.wait_for_sync:
6779       disk_abort = not _WaitForSync(self, iobj)
6780     elif iobj.disk_template in constants.DTS_NET_MIRROR:
6781       # make sure the disks are not degraded (still sync-ing is ok)
6782       time.sleep(15)
6783       feedback_fn("* checking mirrors status")
6784       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6785     else:
6786       disk_abort = False
6787
6788     if disk_abort:
6789       _RemoveDisks(self, iobj)
6790       self.cfg.RemoveInstance(iobj.name)
6791       # Make sure the instance lock gets removed
6792       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6793       raise errors.OpExecError("There are some degraded disks for"
6794                                " this instance")
6795
6796     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6797       if self.op.mode == constants.INSTANCE_CREATE:
6798         if not self.op.no_install:
6799           feedback_fn("* running the instance OS create scripts...")
6800           # FIXME: pass debug option from opcode to backend
6801           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6802                                                  self.op.debug_level)
6803           result.Raise("Could not add os for instance %s"
6804                        " on node %s" % (instance, pnode_name))
6805
6806       elif self.op.mode == constants.INSTANCE_IMPORT:
6807         feedback_fn("* running the instance OS import scripts...")
6808         src_node = self.op.src_node
6809         src_images = self.src_images
6810         cluster_name = self.cfg.GetClusterName()
6811         # FIXME: pass debug option from opcode to backend
6812         import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6813                                                          src_node, src_images,
6814                                                          cluster_name,
6815                                                          self.op.debug_level)
6816         msg = import_result.fail_msg
6817         if msg:
6818           self.LogWarning("Error while importing the disk images for instance"
6819                           " %s on node %s: %s" % (instance, pnode_name, msg))
6820       else:
6821         # also checked in the prereq part
6822         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6823                                      % self.op.mode)
6824
6825     if self.op.start:
6826       iobj.admin_up = True
6827       self.cfg.Update(iobj, feedback_fn)
6828       logging.info("Starting instance %s on node %s", instance, pnode_name)
6829       feedback_fn("* starting instance...")
6830       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6831       result.Raise("Could not start instance")
6832
6833     return list(iobj.all_nodes)
6834
6835
6836 class LUConnectConsole(NoHooksLU):
6837   """Connect to an instance's console.
6838
6839   This is somewhat special in that it returns the command line that
6840   you need to run on the master node in order to connect to the
6841   console.
6842
6843   """
6844   _OP_REQP = ["instance_name"]
6845   REQ_BGL = False
6846
6847   def ExpandNames(self):
6848     self._ExpandAndLockInstance()
6849
6850   def CheckPrereq(self):
6851     """Check prerequisites.
6852
6853     This checks that the instance is in the cluster.
6854
6855     """
6856     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6857     assert self.instance is not None, \
6858       "Cannot retrieve locked instance %s" % self.op.instance_name
6859     _CheckNodeOnline(self, self.instance.primary_node)
6860
6861   def Exec(self, feedback_fn):
6862     """Connect to the console of an instance
6863
6864     """
6865     instance = self.instance
6866     node = instance.primary_node
6867
6868     node_insts = self.rpc.call_instance_list([node],
6869                                              [instance.hypervisor])[node]
6870     node_insts.Raise("Can't get node information from %s" % node)
6871
6872     if instance.name not in node_insts.payload:
6873       raise errors.OpExecError("Instance %s is not running." % instance.name)
6874
6875     logging.debug("Connecting to console of %s on %s", instance.name, node)
6876
6877     hyper = hypervisor.GetHypervisor(instance.hypervisor)
6878     cluster = self.cfg.GetClusterInfo()
6879     # beparams and hvparams are passed separately, to avoid editing the
6880     # instance and then saving the defaults in the instance itself.
6881     hvparams = cluster.FillHV(instance)
6882     beparams = cluster.FillBE(instance)
6883     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6884
6885     # build ssh cmdline
6886     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6887
6888
6889 class LUReplaceDisks(LogicalUnit):
6890   """Replace the disks of an instance.
6891
6892   """
6893   HPATH = "mirrors-replace"
6894   HTYPE = constants.HTYPE_INSTANCE
6895   _OP_REQP = ["instance_name", "mode", "disks"]
6896   REQ_BGL = False
6897
6898   def CheckArguments(self):
6899     if not hasattr(self.op, "remote_node"):
6900       self.op.remote_node = None
6901     if not hasattr(self.op, "iallocator"):
6902       self.op.iallocator = None
6903     if not hasattr(self.op, "early_release"):
6904       self.op.early_release = False
6905
6906     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6907                                   self.op.iallocator)
6908
6909   def ExpandNames(self):
6910     self._ExpandAndLockInstance()
6911
6912     if self.op.iallocator is not None:
6913       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6914
6915     elif self.op.remote_node is not None:
6916       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6917       self.op.remote_node = remote_node
6918
6919       # Warning: do not remove the locking of the new secondary here
6920       # unless DRBD8.AddChildren is changed to work in parallel;
6921       # currently it doesn't since parallel invocations of
6922       # FindUnusedMinor will conflict
6923       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6924       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6925
6926     else:
6927       self.needed_locks[locking.LEVEL_NODE] = []
6928       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6929
6930     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6931                                    self.op.iallocator, self.op.remote_node,
6932                                    self.op.disks, False, self.op.early_release)
6933
6934     self.tasklets = [self.replacer]
6935
6936   def DeclareLocks(self, level):
6937     # If we're not already locking all nodes in the set we have to declare the
6938     # instance's primary/secondary nodes.
6939     if (level == locking.LEVEL_NODE and
6940         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6941       self._LockInstancesNodes()
6942
6943   def BuildHooksEnv(self):
6944     """Build hooks env.
6945
6946     This runs on the master, the primary and all the secondaries.
6947
6948     """
6949     instance = self.replacer.instance
6950     env = {
6951       "MODE": self.op.mode,
6952       "NEW_SECONDARY": self.op.remote_node,
6953       "OLD_SECONDARY": instance.secondary_nodes[0],
6954       }
6955     env.update(_BuildInstanceHookEnvByObject(self, instance))
6956     nl = [
6957       self.cfg.GetMasterNode(),
6958       instance.primary_node,
6959       ]
6960     if self.op.remote_node is not None:
6961       nl.append(self.op.remote_node)
6962     return env, nl, nl
6963
6964
6965 class LUEvacuateNode(LogicalUnit):
6966   """Relocate the secondary instances from a node.
6967
6968   """
6969   HPATH = "node-evacuate"
6970   HTYPE = constants.HTYPE_NODE
6971   _OP_REQP = ["node_name"]
6972   REQ_BGL = False
6973
6974   def CheckArguments(self):
6975     if not hasattr(self.op, "remote_node"):
6976       self.op.remote_node = None
6977     if not hasattr(self.op, "iallocator"):
6978       self.op.iallocator = None
6979     if not hasattr(self.op, "early_release"):
6980       self.op.early_release = False
6981
6982     TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6983                                   self.op.remote_node,
6984                                   self.op.iallocator)
6985
6986   def ExpandNames(self):
6987     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6988
6989     self.needed_locks = {}
6990
6991     # Declare node locks
6992     if self.op.iallocator is not None:
6993       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6994
6995     elif self.op.remote_node is not None:
6996       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6997
6998       # Warning: do not remove the locking of the new secondary here
6999       # unless DRBD8.AddChildren is changed to work in parallel;
7000       # currently it doesn't since parallel invocations of
7001       # FindUnusedMinor will conflict
7002       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7003       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7004
7005     else:
7006       raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7007
7008     # Create tasklets for replacing disks for all secondary instances on this
7009     # node
7010     names = []
7011     tasklets = []
7012
7013     for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7014       logging.debug("Replacing disks for instance %s", inst.name)
7015       names.append(inst.name)
7016
7017       replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7018                                 self.op.iallocator, self.op.remote_node, [],
7019                                 True, self.op.early_release)
7020       tasklets.append(replacer)
7021
7022     self.tasklets = tasklets
7023     self.instance_names = names
7024
7025     # Declare instance locks
7026     self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7027
7028   def DeclareLocks(self, level):
7029     # If we're not already locking all nodes in the set we have to declare the
7030     # instance's primary/secondary nodes.
7031     if (level == locking.LEVEL_NODE and
7032         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7033       self._LockInstancesNodes()
7034
7035   def BuildHooksEnv(self):
7036     """Build hooks env.
7037
7038     This runs on the master, the primary and all the secondaries.
7039
7040     """
7041     env = {
7042       "NODE_NAME": self.op.node_name,
7043       }
7044
7045     nl = [self.cfg.GetMasterNode()]
7046
7047     if self.op.remote_node is not None:
7048       env["NEW_SECONDARY"] = self.op.remote_node
7049       nl.append(self.op.remote_node)
7050
7051     return (env, nl, nl)
7052
7053
7054 class TLReplaceDisks(Tasklet):
7055   """Replaces disks for an instance.
7056
7057   Note: Locking is not within the scope of this class.
7058
7059   """
7060   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7061                disks, delay_iallocator, early_release):
7062     """Initializes this class.
7063
7064     """
7065     Tasklet.__init__(self, lu)
7066
7067     # Parameters
7068     self.instance_name = instance_name
7069     self.mode = mode
7070     self.iallocator_name = iallocator_name
7071     self.remote_node = remote_node
7072     self.disks = disks
7073     self.delay_iallocator = delay_iallocator
7074     self.early_release = early_release
7075
7076     # Runtime data
7077     self.instance = None
7078     self.new_node = None
7079     self.target_node = None
7080     self.other_node = None
7081     self.remote_node_info = None
7082     self.node_secondary_ip = None
7083
7084   @staticmethod
7085   def CheckArguments(mode, remote_node, iallocator):
7086     """Helper function for users of this class.
7087
7088     """
7089     # check for valid parameter combination
7090     if mode == constants.REPLACE_DISK_CHG:
7091       if remote_node is None and iallocator is None:
7092         raise errors.OpPrereqError("When changing the secondary either an"
7093                                    " iallocator script must be used or the"
7094                                    " new node given", errors.ECODE_INVAL)
7095
7096       if remote_node is not None and iallocator is not None:
7097         raise errors.OpPrereqError("Give either the iallocator or the new"
7098                                    " secondary, not both", errors.ECODE_INVAL)
7099
7100     elif remote_node is not None or iallocator is not None:
7101       # Not replacing the secondary
7102       raise errors.OpPrereqError("The iallocator and new node options can"
7103                                  " only be used when changing the"
7104                                  " secondary node", errors.ECODE_INVAL)
7105
7106   @staticmethod
7107   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7108     """Compute a new secondary node using an IAllocator.
7109
7110     """
7111     ial = IAllocator(lu.cfg, lu.rpc,
7112                      mode=constants.IALLOCATOR_MODE_RELOC,
7113                      name=instance_name,
7114                      relocate_from=relocate_from)
7115
7116     ial.Run(iallocator_name)
7117
7118     if not ial.success:
7119       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7120                                  " %s" % (iallocator_name, ial.info),
7121                                  errors.ECODE_NORES)
7122
7123     if len(ial.result) != ial.required_nodes:
7124       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7125                                  " of nodes (%s), required %s" %
7126                                  (iallocator_name,
7127                                   len(ial.result), ial.required_nodes),
7128                                  errors.ECODE_FAULT)
7129
7130     remote_node_name = ial.result[0]
7131
7132     lu.LogInfo("Selected new secondary for instance '%s': %s",
7133                instance_name, remote_node_name)
7134
7135     return remote_node_name
7136
7137   def _FindFaultyDisks(self, node_name):
7138     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7139                                     node_name, True)
7140
7141   def CheckPrereq(self):
7142     """Check prerequisites.
7143
7144     This checks that the instance is in the cluster.
7145
7146     """
7147     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7148     assert instance is not None, \
7149       "Cannot retrieve locked instance %s" % self.instance_name
7150
7151     if instance.disk_template != constants.DT_DRBD8:
7152       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7153                                  " instances", errors.ECODE_INVAL)
7154
7155     if len(instance.secondary_nodes) != 1:
7156       raise errors.OpPrereqError("The instance has a strange layout,"
7157                                  " expected one secondary but found %d" %
7158                                  len(instance.secondary_nodes),
7159                                  errors.ECODE_FAULT)
7160
7161     if not self.delay_iallocator:
7162       self._CheckPrereq2()
7163
7164   def _CheckPrereq2(self):
7165     """Check prerequisites, second part.
7166
7167     This function should always be part of CheckPrereq. It was separated and is
7168     now called from Exec because during node evacuation iallocator was only
7169     called with an unmodified cluster model, not taking planned changes into
7170     account.
7171
7172     """
7173     instance = self.instance
7174     secondary_node = instance.secondary_nodes[0]
7175
7176     if self.iallocator_name is None:
7177       remote_node = self.remote_node
7178     else:
7179       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7180                                        instance.name, instance.secondary_nodes)
7181
7182     if remote_node is not None:
7183       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7184       assert self.remote_node_info is not None, \
7185         "Cannot retrieve locked node %s" % remote_node
7186     else:
7187       self.remote_node_info = None
7188
7189     if remote_node == self.instance.primary_node:
7190       raise errors.OpPrereqError("The specified node is the primary node of"
7191                                  " the instance.", errors.ECODE_INVAL)
7192
7193     if remote_node == secondary_node:
7194       raise errors.OpPrereqError("The specified node is already the"
7195                                  " secondary node of the instance.",
7196                                  errors.ECODE_INVAL)
7197
7198     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7199                                     constants.REPLACE_DISK_CHG):
7200       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7201                                  errors.ECODE_INVAL)
7202
7203     if self.mode == constants.REPLACE_DISK_AUTO:
7204       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7205       faulty_secondary = self._FindFaultyDisks(secondary_node)
7206
7207       if faulty_primary and faulty_secondary:
7208         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7209                                    " one node and can not be repaired"
7210                                    " automatically" % self.instance_name,
7211                                    errors.ECODE_STATE)
7212
7213       if faulty_primary:
7214         self.disks = faulty_primary
7215         self.target_node = instance.primary_node
7216         self.other_node = secondary_node
7217         check_nodes = [self.target_node, self.other_node]
7218       elif faulty_secondary:
7219         self.disks = faulty_secondary
7220         self.target_node = secondary_node
7221         self.other_node = instance.primary_node
7222         check_nodes = [self.target_node, self.other_node]
7223       else:
7224         self.disks = []
7225         check_nodes = []
7226
7227     else:
7228       # Non-automatic modes
7229       if self.mode == constants.REPLACE_DISK_PRI:
7230         self.target_node = instance.primary_node
7231         self.other_node = secondary_node
7232         check_nodes = [self.target_node, self.other_node]
7233
7234       elif self.mode == constants.REPLACE_DISK_SEC:
7235         self.target_node = secondary_node
7236         self.other_node = instance.primary_node
7237         check_nodes = [self.target_node, self.other_node]
7238
7239       elif self.mode == constants.REPLACE_DISK_CHG:
7240         self.new_node = remote_node
7241         self.other_node = instance.primary_node
7242         self.target_node = secondary_node
7243         check_nodes = [self.new_node, self.other_node]
7244
7245         _CheckNodeNotDrained(self.lu, remote_node)
7246
7247         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7248         assert old_node_info is not None
7249         if old_node_info.offline and not self.early_release:
7250           # doesn't make sense to delay the release
7251           self.early_release = True
7252           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7253                           " early-release mode", secondary_node)
7254
7255       else:
7256         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7257                                      self.mode)
7258
7259       # If not specified all disks should be replaced
7260       if not self.disks:
7261         self.disks = range(len(self.instance.disks))
7262
7263     for node in check_nodes:
7264       _CheckNodeOnline(self.lu, node)
7265
7266     # Check whether disks are valid
7267     for disk_idx in self.disks:
7268       instance.FindDisk(disk_idx)
7269
7270     # Get secondary node IP addresses
7271     node_2nd_ip = {}
7272
7273     for node_name in [self.target_node, self.other_node, self.new_node]:
7274       if node_name is not None:
7275         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7276
7277     self.node_secondary_ip = node_2nd_ip
7278
7279   def Exec(self, feedback_fn):
7280     """Execute disk replacement.
7281
7282     This dispatches the disk replacement to the appropriate handler.
7283
7284     """
7285     if self.delay_iallocator:
7286       self._CheckPrereq2()
7287
7288     if not self.disks:
7289       feedback_fn("No disks need replacement")
7290       return
7291
7292     feedback_fn("Replacing disk(s) %s for %s" %
7293                 (utils.CommaJoin(self.disks), self.instance.name))
7294
7295     activate_disks = (not self.instance.admin_up)
7296
7297     # Activate the instance disks if we're replacing them on a down instance
7298     if activate_disks:
7299       _StartInstanceDisks(self.lu, self.instance, True)
7300
7301     try:
7302       # Should we replace the secondary node?
7303       if self.new_node is not None:
7304         fn = self._ExecDrbd8Secondary
7305       else:
7306         fn = self._ExecDrbd8DiskOnly
7307
7308       return fn(feedback_fn)
7309
7310     finally:
7311       # Deactivate the instance disks if we're replacing them on a
7312       # down instance
7313       if activate_disks:
7314         _SafeShutdownInstanceDisks(self.lu, self.instance)
7315
7316   def _CheckVolumeGroup(self, nodes):
7317     self.lu.LogInfo("Checking volume groups")
7318
7319     vgname = self.cfg.GetVGName()
7320
7321     # Make sure volume group exists on all involved nodes
7322     results = self.rpc.call_vg_list(nodes)
7323     if not results:
7324       raise errors.OpExecError("Can't list volume groups on the nodes")
7325
7326     for node in nodes:
7327       res = results[node]
7328       res.Raise("Error checking node %s" % node)
7329       if vgname not in res.payload:
7330         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7331                                  (vgname, node))
7332
7333   def _CheckDisksExistence(self, nodes):
7334     # Check disk existence
7335     for idx, dev in enumerate(self.instance.disks):
7336       if idx not in self.disks:
7337         continue
7338
7339       for node in nodes:
7340         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7341         self.cfg.SetDiskID(dev, node)
7342
7343         result = self.rpc.call_blockdev_find(node, dev)
7344
7345         msg = result.fail_msg
7346         if msg or not result.payload:
7347           if not msg:
7348             msg = "disk not found"
7349           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7350                                    (idx, node, msg))
7351
7352   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7353     for idx, dev in enumerate(self.instance.disks):
7354       if idx not in self.disks:
7355         continue
7356
7357       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7358                       (idx, node_name))
7359
7360       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7361                                    ldisk=ldisk):
7362         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7363                                  " replace disks for instance %s" %
7364                                  (node_name, self.instance.name))
7365
7366   def _CreateNewStorage(self, node_name):
7367     vgname = self.cfg.GetVGName()
7368     iv_names = {}
7369
7370     for idx, dev in enumerate(self.instance.disks):
7371       if idx not in self.disks:
7372         continue
7373
7374       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7375
7376       self.cfg.SetDiskID(dev, node_name)
7377
7378       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7379       names = _GenerateUniqueNames(self.lu, lv_names)
7380
7381       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7382                              logical_id=(vgname, names[0]))
7383       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7384                              logical_id=(vgname, names[1]))
7385
7386       new_lvs = [lv_data, lv_meta]
7387       old_lvs = dev.children
7388       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7389
7390       # we pass force_create=True to force the LVM creation
7391       for new_lv in new_lvs:
7392         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7393                         _GetInstanceInfoText(self.instance), False)
7394
7395     return iv_names
7396
7397   def _CheckDevices(self, node_name, iv_names):
7398     for name, (dev, _, _) in iv_names.iteritems():
7399       self.cfg.SetDiskID(dev, node_name)
7400
7401       result = self.rpc.call_blockdev_find(node_name, dev)
7402
7403       msg = result.fail_msg
7404       if msg or not result.payload:
7405         if not msg:
7406           msg = "disk not found"
7407         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7408                                  (name, msg))
7409
7410       if result.payload.is_degraded:
7411         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7412
7413   def _RemoveOldStorage(self, node_name, iv_names):
7414     for name, (_, old_lvs, _) in iv_names.iteritems():
7415       self.lu.LogInfo("Remove logical volumes for %s" % name)
7416
7417       for lv in old_lvs:
7418         self.cfg.SetDiskID(lv, node_name)
7419
7420         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7421         if msg:
7422           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7423                              hint="remove unused LVs manually")
7424
7425   def _ReleaseNodeLock(self, node_name):
7426     """Releases the lock for a given node."""
7427     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7428
7429   def _ExecDrbd8DiskOnly(self, feedback_fn):
7430     """Replace a disk on the primary or secondary for DRBD 8.
7431
7432     The algorithm for replace is quite complicated:
7433
7434       1. for each disk to be replaced:
7435
7436         1. create new LVs on the target node with unique names
7437         1. detach old LVs from the drbd device
7438         1. rename old LVs to name_replaced.<time_t>
7439         1. rename new LVs to old LVs
7440         1. attach the new LVs (with the old names now) to the drbd device
7441
7442       1. wait for sync across all devices
7443
7444       1. for each modified disk:
7445
7446         1. remove old LVs (which have the name name_replaces.<time_t>)
7447
7448     Failures are not very well handled.
7449
7450     """
7451     steps_total = 6
7452
7453     # Step: check device activation
7454     self.lu.LogStep(1, steps_total, "Check device existence")
7455     self._CheckDisksExistence([self.other_node, self.target_node])
7456     self._CheckVolumeGroup([self.target_node, self.other_node])
7457
7458     # Step: check other node consistency
7459     self.lu.LogStep(2, steps_total, "Check peer consistency")
7460     self._CheckDisksConsistency(self.other_node,
7461                                 self.other_node == self.instance.primary_node,
7462                                 False)
7463
7464     # Step: create new storage
7465     self.lu.LogStep(3, steps_total, "Allocate new storage")
7466     iv_names = self._CreateNewStorage(self.target_node)
7467
7468     # Step: for each lv, detach+rename*2+attach
7469     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7470     for dev, old_lvs, new_lvs in iv_names.itervalues():
7471       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7472
7473       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7474                                                      old_lvs)
7475       result.Raise("Can't detach drbd from local storage on node"
7476                    " %s for device %s" % (self.target_node, dev.iv_name))
7477       #dev.children = []
7478       #cfg.Update(instance)
7479
7480       # ok, we created the new LVs, so now we know we have the needed
7481       # storage; as such, we proceed on the target node to rename
7482       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7483       # using the assumption that logical_id == physical_id (which in
7484       # turn is the unique_id on that node)
7485
7486       # FIXME(iustin): use a better name for the replaced LVs
7487       temp_suffix = int(time.time())
7488       ren_fn = lambda d, suff: (d.physical_id[0],
7489                                 d.physical_id[1] + "_replaced-%s" % suff)
7490
7491       # Build the rename list based on what LVs exist on the node
7492       rename_old_to_new = []
7493       for to_ren in old_lvs:
7494         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7495         if not result.fail_msg and result.payload:
7496           # device exists
7497           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7498
7499       self.lu.LogInfo("Renaming the old LVs on the target node")
7500       result = self.rpc.call_blockdev_rename(self.target_node,
7501                                              rename_old_to_new)
7502       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7503
7504       # Now we rename the new LVs to the old LVs
7505       self.lu.LogInfo("Renaming the new LVs on the target node")
7506       rename_new_to_old = [(new, old.physical_id)
7507                            for old, new in zip(old_lvs, new_lvs)]
7508       result = self.rpc.call_blockdev_rename(self.target_node,
7509                                              rename_new_to_old)
7510       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7511
7512       for old, new in zip(old_lvs, new_lvs):
7513         new.logical_id = old.logical_id
7514         self.cfg.SetDiskID(new, self.target_node)
7515
7516       for disk in old_lvs:
7517         disk.logical_id = ren_fn(disk, temp_suffix)
7518         self.cfg.SetDiskID(disk, self.target_node)
7519
7520       # Now that the new lvs have the old name, we can add them to the device
7521       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7522       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7523                                                   new_lvs)
7524       msg = result.fail_msg
7525       if msg:
7526         for new_lv in new_lvs:
7527           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7528                                                new_lv).fail_msg
7529           if msg2:
7530             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7531                                hint=("cleanup manually the unused logical"
7532                                      "volumes"))
7533         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7534
7535       dev.children = new_lvs
7536
7537       self.cfg.Update(self.instance, feedback_fn)
7538
7539     cstep = 5
7540     if self.early_release:
7541       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7542       cstep += 1
7543       self._RemoveOldStorage(self.target_node, iv_names)
7544       # WARNING: we release both node locks here, do not do other RPCs
7545       # than WaitForSync to the primary node
7546       self._ReleaseNodeLock([self.target_node, self.other_node])
7547
7548     # Wait for sync
7549     # This can fail as the old devices are degraded and _WaitForSync
7550     # does a combined result over all disks, so we don't check its return value
7551     self.lu.LogStep(cstep, steps_total, "Sync devices")
7552     cstep += 1
7553     _WaitForSync(self.lu, self.instance)
7554
7555     # Check all devices manually
7556     self._CheckDevices(self.instance.primary_node, iv_names)
7557
7558     # Step: remove old storage
7559     if not self.early_release:
7560       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7561       cstep += 1
7562       self._RemoveOldStorage(self.target_node, iv_names)
7563
7564   def _ExecDrbd8Secondary(self, feedback_fn):
7565     """Replace the secondary node for DRBD 8.
7566
7567     The algorithm for replace is quite complicated:
7568       - for all disks of the instance:
7569         - create new LVs on the new node with same names
7570         - shutdown the drbd device on the old secondary
7571         - disconnect the drbd network on the primary
7572         - create the drbd device on the new secondary
7573         - network attach the drbd on the primary, using an artifice:
7574           the drbd code for Attach() will connect to the network if it
7575           finds a device which is connected to the good local disks but
7576           not network enabled
7577       - wait for sync across all devices
7578       - remove all disks from the old secondary
7579
7580     Failures are not very well handled.
7581
7582     """
7583     steps_total = 6
7584
7585     # Step: check device activation
7586     self.lu.LogStep(1, steps_total, "Check device existence")
7587     self._CheckDisksExistence([self.instance.primary_node])
7588     self._CheckVolumeGroup([self.instance.primary_node])
7589
7590     # Step: check other node consistency
7591     self.lu.LogStep(2, steps_total, "Check peer consistency")
7592     self._CheckDisksConsistency(self.instance.primary_node, True, True)
7593
7594     # Step: create new storage
7595     self.lu.LogStep(3, steps_total, "Allocate new storage")
7596     for idx, dev in enumerate(self.instance.disks):
7597       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7598                       (self.new_node, idx))
7599       # we pass force_create=True to force LVM creation
7600       for new_lv in dev.children:
7601         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7602                         _GetInstanceInfoText(self.instance), False)
7603
7604     # Step 4: dbrd minors and drbd setups changes
7605     # after this, we must manually remove the drbd minors on both the
7606     # error and the success paths
7607     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7608     minors = self.cfg.AllocateDRBDMinor([self.new_node
7609                                          for dev in self.instance.disks],
7610                                         self.instance.name)
7611     logging.debug("Allocated minors %r", minors)
7612
7613     iv_names = {}
7614     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7615       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7616                       (self.new_node, idx))
7617       # create new devices on new_node; note that we create two IDs:
7618       # one without port, so the drbd will be activated without
7619       # networking information on the new node at this stage, and one
7620       # with network, for the latter activation in step 4
7621       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7622       if self.instance.primary_node == o_node1:
7623         p_minor = o_minor1
7624       else:
7625         assert self.instance.primary_node == o_node2, "Three-node instance?"
7626         p_minor = o_minor2
7627
7628       new_alone_id = (self.instance.primary_node, self.new_node, None,
7629                       p_minor, new_minor, o_secret)
7630       new_net_id = (self.instance.primary_node, self.new_node, o_port,
7631                     p_minor, new_minor, o_secret)
7632
7633       iv_names[idx] = (dev, dev.children, new_net_id)
7634       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7635                     new_net_id)
7636       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7637                               logical_id=new_alone_id,
7638                               children=dev.children,
7639                               size=dev.size)
7640       try:
7641         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7642                               _GetInstanceInfoText(self.instance), False)
7643       except errors.GenericError:
7644         self.cfg.ReleaseDRBDMinors(self.instance.name)
7645         raise
7646
7647     # We have new devices, shutdown the drbd on the old secondary
7648     for idx, dev in enumerate(self.instance.disks):
7649       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7650       self.cfg.SetDiskID(dev, self.target_node)
7651       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7652       if msg:
7653         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7654                            "node: %s" % (idx, msg),
7655                            hint=("Please cleanup this device manually as"
7656                                  " soon as possible"))
7657
7658     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7659     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7660                                                self.node_secondary_ip,
7661                                                self.instance.disks)\
7662                                               [self.instance.primary_node]
7663
7664     msg = result.fail_msg
7665     if msg:
7666       # detaches didn't succeed (unlikely)
7667       self.cfg.ReleaseDRBDMinors(self.instance.name)
7668       raise errors.OpExecError("Can't detach the disks from the network on"
7669                                " old node: %s" % (msg,))
7670
7671     # if we managed to detach at least one, we update all the disks of
7672     # the instance to point to the new secondary
7673     self.lu.LogInfo("Updating instance configuration")
7674     for dev, _, new_logical_id in iv_names.itervalues():
7675       dev.logical_id = new_logical_id
7676       self.cfg.SetDiskID(dev, self.instance.primary_node)
7677
7678     self.cfg.Update(self.instance, feedback_fn)
7679
7680     # and now perform the drbd attach
7681     self.lu.LogInfo("Attaching primary drbds to new secondary"
7682                     " (standalone => connected)")
7683     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7684                                             self.new_node],
7685                                            self.node_secondary_ip,
7686                                            self.instance.disks,
7687                                            self.instance.name,
7688                                            False)
7689     for to_node, to_result in result.items():
7690       msg = to_result.fail_msg
7691       if msg:
7692         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7693                            to_node, msg,
7694                            hint=("please do a gnt-instance info to see the"
7695                                  " status of disks"))
7696     cstep = 5
7697     if self.early_release:
7698       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7699       cstep += 1
7700       self._RemoveOldStorage(self.target_node, iv_names)
7701       # WARNING: we release all node locks here, do not do other RPCs
7702       # than WaitForSync to the primary node
7703       self._ReleaseNodeLock([self.instance.primary_node,
7704                              self.target_node,
7705                              self.new_node])
7706
7707     # Wait for sync
7708     # This can fail as the old devices are degraded and _WaitForSync
7709     # does a combined result over all disks, so we don't check its return value
7710     self.lu.LogStep(cstep, steps_total, "Sync devices")
7711     cstep += 1
7712     _WaitForSync(self.lu, self.instance)
7713
7714     # Check all devices manually
7715     self._CheckDevices(self.instance.primary_node, iv_names)
7716
7717     # Step: remove old storage
7718     if not self.early_release:
7719       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7720       self._RemoveOldStorage(self.target_node, iv_names)
7721
7722
7723 class LURepairNodeStorage(NoHooksLU):
7724   """Repairs the volume group on a node.
7725
7726   """
7727   _OP_REQP = ["node_name"]
7728   REQ_BGL = False
7729
7730   def CheckArguments(self):
7731     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7732
7733     _CheckStorageType(self.op.storage_type)
7734
7735   def ExpandNames(self):
7736     self.needed_locks = {
7737       locking.LEVEL_NODE: [self.op.node_name],
7738       }
7739
7740   def _CheckFaultyDisks(self, instance, node_name):
7741     """Ensure faulty disks abort the opcode or at least warn."""
7742     try:
7743       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7744                                   node_name, True):
7745         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7746                                    " node '%s'" % (instance.name, node_name),
7747                                    errors.ECODE_STATE)
7748     except errors.OpPrereqError, err:
7749       if self.op.ignore_consistency:
7750         self.proc.LogWarning(str(err.args[0]))
7751       else:
7752         raise
7753
7754   def CheckPrereq(self):
7755     """Check prerequisites.
7756
7757     """
7758     storage_type = self.op.storage_type
7759
7760     if (constants.SO_FIX_CONSISTENCY not in
7761         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7762       raise errors.OpPrereqError("Storage units of type '%s' can not be"
7763                                  " repaired" % storage_type,
7764                                  errors.ECODE_INVAL)
7765
7766     # Check whether any instance on this node has faulty disks
7767     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7768       if not inst.admin_up:
7769         continue
7770       check_nodes = set(inst.all_nodes)
7771       check_nodes.discard(self.op.node_name)
7772       for inst_node_name in check_nodes:
7773         self._CheckFaultyDisks(inst, inst_node_name)
7774
7775   def Exec(self, feedback_fn):
7776     feedback_fn("Repairing storage unit '%s' on %s ..." %
7777                 (self.op.name, self.op.node_name))
7778
7779     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7780     result = self.rpc.call_storage_execute(self.op.node_name,
7781                                            self.op.storage_type, st_args,
7782                                            self.op.name,
7783                                            constants.SO_FIX_CONSISTENCY)
7784     result.Raise("Failed to repair storage unit '%s' on %s" %
7785                  (self.op.name, self.op.node_name))
7786
7787
7788 class LUNodeEvacuationStrategy(NoHooksLU):
7789   """Computes the node evacuation strategy.
7790
7791   """
7792   _OP_REQP = ["nodes"]
7793   REQ_BGL = False
7794
7795   def CheckArguments(self):
7796     if not hasattr(self.op, "remote_node"):
7797       self.op.remote_node = None
7798     if not hasattr(self.op, "iallocator"):
7799       self.op.iallocator = None
7800     if self.op.remote_node is not None and self.op.iallocator is not None:
7801       raise errors.OpPrereqError("Give either the iallocator or the new"
7802                                  " secondary, not both", errors.ECODE_INVAL)
7803
7804   def ExpandNames(self):
7805     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7806     self.needed_locks = locks = {}
7807     if self.op.remote_node is None:
7808       locks[locking.LEVEL_NODE] = locking.ALL_SET
7809     else:
7810       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7811       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7812
7813   def CheckPrereq(self):
7814     pass
7815
7816   def Exec(self, feedback_fn):
7817     if self.op.remote_node is not None:
7818       instances = []
7819       for node in self.op.nodes:
7820         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7821       result = []
7822       for i in instances:
7823         if i.primary_node == self.op.remote_node:
7824           raise errors.OpPrereqError("Node %s is the primary node of"
7825                                      " instance %s, cannot use it as"
7826                                      " secondary" %
7827                                      (self.op.remote_node, i.name),
7828                                      errors.ECODE_INVAL)
7829         result.append([i.name, self.op.remote_node])
7830     else:
7831       ial = IAllocator(self.cfg, self.rpc,
7832                        mode=constants.IALLOCATOR_MODE_MEVAC,
7833                        evac_nodes=self.op.nodes)
7834       ial.Run(self.op.iallocator, validate=True)
7835       if not ial.success:
7836         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7837                                  errors.ECODE_NORES)
7838       result = ial.result
7839     return result
7840
7841
7842 class LUGrowDisk(LogicalUnit):
7843   """Grow a disk of an instance.
7844
7845   """
7846   HPATH = "disk-grow"
7847   HTYPE = constants.HTYPE_INSTANCE
7848   _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7849   REQ_BGL = False
7850
7851   def ExpandNames(self):
7852     self._ExpandAndLockInstance()
7853     self.needed_locks[locking.LEVEL_NODE] = []
7854     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7855
7856   def DeclareLocks(self, level):
7857     if level == locking.LEVEL_NODE:
7858       self._LockInstancesNodes()
7859
7860   def BuildHooksEnv(self):
7861     """Build hooks env.
7862
7863     This runs on the master, the primary and all the secondaries.
7864
7865     """
7866     env = {
7867       "DISK": self.op.disk,
7868       "AMOUNT": self.op.amount,
7869       }
7870     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7871     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7872     return env, nl, nl
7873
7874   def CheckPrereq(self):
7875     """Check prerequisites.
7876
7877     This checks that the instance is in the cluster.
7878
7879     """
7880     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7881     assert instance is not None, \
7882       "Cannot retrieve locked instance %s" % self.op.instance_name
7883     nodenames = list(instance.all_nodes)
7884     for node in nodenames:
7885       _CheckNodeOnline(self, node)
7886
7887
7888     self.instance = instance
7889
7890     if instance.disk_template not in constants.DTS_GROWABLE:
7891       raise errors.OpPrereqError("Instance's disk layout does not support"
7892                                  " growing.", errors.ECODE_INVAL)
7893
7894     self.disk = instance.FindDisk(self.op.disk)
7895
7896     if instance.disk_template != constants.DT_FILE:
7897       # TODO: check the free disk space for file, when that feature will be
7898       # supported
7899       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7900
7901   def Exec(self, feedback_fn):
7902     """Execute disk grow.
7903
7904     """
7905     instance = self.instance
7906     disk = self.disk
7907     for node in instance.all_nodes:
7908       self.cfg.SetDiskID(disk, node)
7909       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7910       result.Raise("Grow request failed to node %s" % node)
7911
7912       # TODO: Rewrite code to work properly
7913       # DRBD goes into sync mode for a short amount of time after executing the
7914       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7915       # calling "resize" in sync mode fails. Sleeping for a short amount of
7916       # time is a work-around.
7917       time.sleep(5)
7918
7919     disk.RecordGrow(self.op.amount)
7920     self.cfg.Update(instance, feedback_fn)
7921     if self.op.wait_for_sync:
7922       disk_abort = not _WaitForSync(self, instance)
7923       if disk_abort:
7924         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7925                              " status.\nPlease check the instance.")
7926
7927
7928 class LUQueryInstanceData(NoHooksLU):
7929   """Query runtime instance data.
7930
7931   """
7932   _OP_REQP = ["instances", "static"]
7933   REQ_BGL = False
7934
7935   def ExpandNames(self):
7936     self.needed_locks = {}
7937     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7938
7939     if not isinstance(self.op.instances, list):
7940       raise errors.OpPrereqError("Invalid argument type 'instances'",
7941                                  errors.ECODE_INVAL)
7942
7943     if self.op.instances:
7944       self.wanted_names = []
7945       for name in self.op.instances:
7946         full_name = _ExpandInstanceName(self.cfg, name)
7947         self.wanted_names.append(full_name)
7948       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7949     else:
7950       self.wanted_names = None
7951       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7952
7953     self.needed_locks[locking.LEVEL_NODE] = []
7954     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7955
7956   def DeclareLocks(self, level):
7957     if level == locking.LEVEL_NODE:
7958       self._LockInstancesNodes()
7959
7960   def CheckPrereq(self):
7961     """Check prerequisites.
7962
7963     This only checks the optional instance list against the existing names.
7964
7965     """
7966     if self.wanted_names is None:
7967       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7968
7969     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7970                              in self.wanted_names]
7971     return
7972
7973   def _ComputeBlockdevStatus(self, node, instance_name, dev):
7974     """Returns the status of a block device
7975
7976     """
7977     if self.op.static or not node:
7978       return None
7979
7980     self.cfg.SetDiskID(dev, node)
7981
7982     result = self.rpc.call_blockdev_find(node, dev)
7983     if result.offline:
7984       return None
7985
7986     result.Raise("Can't compute disk status for %s" % instance_name)
7987
7988     status = result.payload
7989     if status is None:
7990       return None
7991
7992     return (status.dev_path, status.major, status.minor,
7993             status.sync_percent, status.estimated_time,
7994             status.is_degraded, status.ldisk_status)
7995
7996   def _ComputeDiskStatus(self, instance, snode, dev):
7997     """Compute block device status.
7998
7999     """
8000     if dev.dev_type in constants.LDS_DRBD:
8001       # we change the snode then (otherwise we use the one passed in)
8002       if dev.logical_id[0] == instance.primary_node:
8003         snode = dev.logical_id[1]
8004       else:
8005         snode = dev.logical_id[0]
8006
8007     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8008                                               instance.name, dev)
8009     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8010
8011     if dev.children:
8012       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8013                       for child in dev.children]
8014     else:
8015       dev_children = []
8016
8017     data = {
8018       "iv_name": dev.iv_name,
8019       "dev_type": dev.dev_type,
8020       "logical_id": dev.logical_id,
8021       "physical_id": dev.physical_id,
8022       "pstatus": dev_pstatus,
8023       "sstatus": dev_sstatus,
8024       "children": dev_children,
8025       "mode": dev.mode,
8026       "size": dev.size,
8027       }
8028
8029     return data
8030
8031   def Exec(self, feedback_fn):
8032     """Gather and return data"""
8033     result = {}
8034
8035     cluster = self.cfg.GetClusterInfo()
8036
8037     for instance in self.wanted_instances:
8038       if not self.op.static:
8039         remote_info = self.rpc.call_instance_info(instance.primary_node,
8040                                                   instance.name,
8041                                                   instance.hypervisor)
8042         remote_info.Raise("Error checking node %s" % instance.primary_node)
8043         remote_info = remote_info.payload
8044         if remote_info and "state" in remote_info:
8045           remote_state = "up"
8046         else:
8047           remote_state = "down"
8048       else:
8049         remote_state = None
8050       if instance.admin_up:
8051         config_state = "up"
8052       else:
8053         config_state = "down"
8054
8055       disks = [self._ComputeDiskStatus(instance, None, device)
8056                for device in instance.disks]
8057
8058       idict = {
8059         "name": instance.name,
8060         "config_state": config_state,
8061         "run_state": remote_state,
8062         "pnode": instance.primary_node,
8063         "snodes": instance.secondary_nodes,
8064         "os": instance.os,
8065         # this happens to be the same format used for hooks
8066         "nics": _NICListToTuple(self, instance.nics),
8067         "disks": disks,
8068         "hypervisor": instance.hypervisor,
8069         "network_port": instance.network_port,
8070         "hv_instance": instance.hvparams,
8071         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8072         "be_instance": instance.beparams,
8073         "be_actual": cluster.FillBE(instance),
8074         "serial_no": instance.serial_no,
8075         "mtime": instance.mtime,
8076         "ctime": instance.ctime,
8077         "uuid": instance.uuid,
8078         }
8079
8080       result[instance.name] = idict
8081
8082     return result
8083
8084
8085 class LUSetInstanceParams(LogicalUnit):
8086   """Modifies an instances's parameters.
8087
8088   """
8089   HPATH = "instance-modify"
8090   HTYPE = constants.HTYPE_INSTANCE
8091   _OP_REQP = ["instance_name"]
8092   REQ_BGL = False
8093
8094   def CheckArguments(self):
8095     if not hasattr(self.op, 'nics'):
8096       self.op.nics = []
8097     if not hasattr(self.op, 'disks'):
8098       self.op.disks = []
8099     if not hasattr(self.op, 'beparams'):
8100       self.op.beparams = {}
8101     if not hasattr(self.op, 'hvparams'):
8102       self.op.hvparams = {}
8103     if not hasattr(self.op, "disk_template"):
8104       self.op.disk_template = None
8105     if not hasattr(self.op, "remote_node"):
8106       self.op.remote_node = None
8107     if not hasattr(self.op, "os_name"):
8108       self.op.os_name = None
8109     if not hasattr(self.op, "force_variant"):
8110       self.op.force_variant = False
8111     self.op.force = getattr(self.op, "force", False)
8112     if not (self.op.nics or self.op.disks or self.op.disk_template or
8113             self.op.hvparams or self.op.beparams or self.op.os_name):
8114       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8115
8116     if self.op.hvparams:
8117       _CheckGlobalHvParams(self.op.hvparams)
8118
8119     # Disk validation
8120     disk_addremove = 0
8121     for disk_op, disk_dict in self.op.disks:
8122       if disk_op == constants.DDM_REMOVE:
8123         disk_addremove += 1
8124         continue
8125       elif disk_op == constants.DDM_ADD:
8126         disk_addremove += 1
8127       else:
8128         if not isinstance(disk_op, int):
8129           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8130         if not isinstance(disk_dict, dict):
8131           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8132           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8133
8134       if disk_op == constants.DDM_ADD:
8135         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8136         if mode not in constants.DISK_ACCESS_SET:
8137           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8138                                      errors.ECODE_INVAL)
8139         size = disk_dict.get('size', None)
8140         if size is None:
8141           raise errors.OpPrereqError("Required disk parameter size missing",
8142                                      errors.ECODE_INVAL)
8143         try:
8144           size = int(size)
8145         except (TypeError, ValueError), err:
8146           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8147                                      str(err), errors.ECODE_INVAL)
8148         disk_dict['size'] = size
8149       else:
8150         # modification of disk
8151         if 'size' in disk_dict:
8152           raise errors.OpPrereqError("Disk size change not possible, use"
8153                                      " grow-disk", errors.ECODE_INVAL)
8154
8155     if disk_addremove > 1:
8156       raise errors.OpPrereqError("Only one disk add or remove operation"
8157                                  " supported at a time", errors.ECODE_INVAL)
8158
8159     if self.op.disks and self.op.disk_template is not None:
8160       raise errors.OpPrereqError("Disk template conversion and other disk"
8161                                  " changes not supported at the same time",
8162                                  errors.ECODE_INVAL)
8163
8164     if self.op.disk_template:
8165       _CheckDiskTemplate(self.op.disk_template)
8166       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8167           self.op.remote_node is None):
8168         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8169                                    " one requires specifying a secondary node",
8170                                    errors.ECODE_INVAL)
8171
8172     # NIC validation
8173     nic_addremove = 0
8174     for nic_op, nic_dict in self.op.nics:
8175       if nic_op == constants.DDM_REMOVE:
8176         nic_addremove += 1
8177         continue
8178       elif nic_op == constants.DDM_ADD:
8179         nic_addremove += 1
8180       else:
8181         if not isinstance(nic_op, int):
8182           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8183         if not isinstance(nic_dict, dict):
8184           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8185           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8186
8187       # nic_dict should be a dict
8188       nic_ip = nic_dict.get('ip', None)
8189       if nic_ip is not None:
8190         if nic_ip.lower() == constants.VALUE_NONE:
8191           nic_dict['ip'] = None
8192         else:
8193           if not utils.IsValidIP(nic_ip):
8194             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8195                                        errors.ECODE_INVAL)
8196
8197       nic_bridge = nic_dict.get('bridge', None)
8198       nic_link = nic_dict.get('link', None)
8199       if nic_bridge and nic_link:
8200         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8201                                    " at the same time", errors.ECODE_INVAL)
8202       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8203         nic_dict['bridge'] = None
8204       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8205         nic_dict['link'] = None
8206
8207       if nic_op == constants.DDM_ADD:
8208         nic_mac = nic_dict.get('mac', None)
8209         if nic_mac is None:
8210           nic_dict['mac'] = constants.VALUE_AUTO
8211
8212       if 'mac' in nic_dict:
8213         nic_mac = nic_dict['mac']
8214         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8215           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8216
8217         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8218           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8219                                      " modifying an existing nic",
8220                                      errors.ECODE_INVAL)
8221
8222     if nic_addremove > 1:
8223       raise errors.OpPrereqError("Only one NIC add or remove operation"
8224                                  " supported at a time", errors.ECODE_INVAL)
8225
8226   def ExpandNames(self):
8227     self._ExpandAndLockInstance()
8228     self.needed_locks[locking.LEVEL_NODE] = []
8229     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8230
8231   def DeclareLocks(self, level):
8232     if level == locking.LEVEL_NODE:
8233       self._LockInstancesNodes()
8234       if self.op.disk_template and self.op.remote_node:
8235         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8236         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8237
8238   def BuildHooksEnv(self):
8239     """Build hooks env.
8240
8241     This runs on the master, primary and secondaries.
8242
8243     """
8244     args = dict()
8245     if constants.BE_MEMORY in self.be_new:
8246       args['memory'] = self.be_new[constants.BE_MEMORY]
8247     if constants.BE_VCPUS in self.be_new:
8248       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8249     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8250     # information at all.
8251     if self.op.nics:
8252       args['nics'] = []
8253       nic_override = dict(self.op.nics)
8254       c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8255       for idx, nic in enumerate(self.instance.nics):
8256         if idx in nic_override:
8257           this_nic_override = nic_override[idx]
8258         else:
8259           this_nic_override = {}
8260         if 'ip' in this_nic_override:
8261           ip = this_nic_override['ip']
8262         else:
8263           ip = nic.ip
8264         if 'mac' in this_nic_override:
8265           mac = this_nic_override['mac']
8266         else:
8267           mac = nic.mac
8268         if idx in self.nic_pnew:
8269           nicparams = self.nic_pnew[idx]
8270         else:
8271           nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8272         mode = nicparams[constants.NIC_MODE]
8273         link = nicparams[constants.NIC_LINK]
8274         args['nics'].append((ip, mac, mode, link))
8275       if constants.DDM_ADD in nic_override:
8276         ip = nic_override[constants.DDM_ADD].get('ip', None)
8277         mac = nic_override[constants.DDM_ADD]['mac']
8278         nicparams = self.nic_pnew[constants.DDM_ADD]
8279         mode = nicparams[constants.NIC_MODE]
8280         link = nicparams[constants.NIC_LINK]
8281         args['nics'].append((ip, mac, mode, link))
8282       elif constants.DDM_REMOVE in nic_override:
8283         del args['nics'][-1]
8284
8285     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8286     if self.op.disk_template:
8287       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8288     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8289     return env, nl, nl
8290
8291   @staticmethod
8292   def _GetUpdatedParams(old_params, update_dict,
8293                         default_values, parameter_types):
8294     """Return the new params dict for the given params.
8295
8296     @type old_params: dict
8297     @param old_params: old parameters
8298     @type update_dict: dict
8299     @param update_dict: dict containing new parameter values,
8300                         or constants.VALUE_DEFAULT to reset the
8301                         parameter to its default value
8302     @type default_values: dict
8303     @param default_values: default values for the filled parameters
8304     @type parameter_types: dict
8305     @param parameter_types: dict mapping target dict keys to types
8306                             in constants.ENFORCEABLE_TYPES
8307     @rtype: (dict, dict)
8308     @return: (new_parameters, filled_parameters)
8309
8310     """
8311     params_copy = copy.deepcopy(old_params)
8312     for key, val in update_dict.iteritems():
8313       if val == constants.VALUE_DEFAULT:
8314         try:
8315           del params_copy[key]
8316         except KeyError:
8317           pass
8318       else:
8319         params_copy[key] = val
8320     utils.ForceDictType(params_copy, parameter_types)
8321     params_filled = objects.FillDict(default_values, params_copy)
8322     return (params_copy, params_filled)
8323
8324   def CheckPrereq(self):
8325     """Check prerequisites.
8326
8327     This only checks the instance list against the existing names.
8328
8329     """
8330     self.force = self.op.force
8331
8332     # checking the new params on the primary/secondary nodes
8333
8334     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8335     cluster = self.cluster = self.cfg.GetClusterInfo()
8336     assert self.instance is not None, \
8337       "Cannot retrieve locked instance %s" % self.op.instance_name
8338     pnode = instance.primary_node
8339     nodelist = list(instance.all_nodes)
8340
8341     if self.op.disk_template:
8342       if instance.disk_template == self.op.disk_template:
8343         raise errors.OpPrereqError("Instance already has disk template %s" %
8344                                    instance.disk_template, errors.ECODE_INVAL)
8345
8346       if (instance.disk_template,
8347           self.op.disk_template) not in self._DISK_CONVERSIONS:
8348         raise errors.OpPrereqError("Unsupported disk template conversion from"
8349                                    " %s to %s" % (instance.disk_template,
8350                                                   self.op.disk_template),
8351                                    errors.ECODE_INVAL)
8352       if self.op.disk_template in constants.DTS_NET_MIRROR:
8353         _CheckNodeOnline(self, self.op.remote_node)
8354         _CheckNodeNotDrained(self, self.op.remote_node)
8355         disks = [{"size": d.size} for d in instance.disks]
8356         required = _ComputeDiskSize(self.op.disk_template, disks)
8357         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8358         _CheckInstanceDown(self, instance, "cannot change disk template")
8359
8360     # hvparams processing
8361     if self.op.hvparams:
8362       i_hvdict, hv_new = self._GetUpdatedParams(
8363                              instance.hvparams, self.op.hvparams,
8364                              cluster.hvparams[instance.hypervisor],
8365                              constants.HVS_PARAMETER_TYPES)
8366       # local check
8367       hypervisor.GetHypervisor(
8368         instance.hypervisor).CheckParameterSyntax(hv_new)
8369       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8370       self.hv_new = hv_new # the new actual values
8371       self.hv_inst = i_hvdict # the new dict (without defaults)
8372     else:
8373       self.hv_new = self.hv_inst = {}
8374
8375     # beparams processing
8376     if self.op.beparams:
8377       i_bedict, be_new = self._GetUpdatedParams(
8378                              instance.beparams, self.op.beparams,
8379                              cluster.beparams[constants.PP_DEFAULT],
8380                              constants.BES_PARAMETER_TYPES)
8381       self.be_new = be_new # the new actual values
8382       self.be_inst = i_bedict # the new dict (without defaults)
8383     else:
8384       self.be_new = self.be_inst = {}
8385
8386     self.warn = []
8387
8388     if constants.BE_MEMORY in self.op.beparams and not self.force:
8389       mem_check_list = [pnode]
8390       if be_new[constants.BE_AUTO_BALANCE]:
8391         # either we changed auto_balance to yes or it was from before
8392         mem_check_list.extend(instance.secondary_nodes)
8393       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8394                                                   instance.hypervisor)
8395       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8396                                          instance.hypervisor)
8397       pninfo = nodeinfo[pnode]
8398       msg = pninfo.fail_msg
8399       if msg:
8400         # Assume the primary node is unreachable and go ahead
8401         self.warn.append("Can't get info from primary node %s: %s" %
8402                          (pnode,  msg))
8403       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8404         self.warn.append("Node data from primary node %s doesn't contain"
8405                          " free memory information" % pnode)
8406       elif instance_info.fail_msg:
8407         self.warn.append("Can't get instance runtime information: %s" %
8408                         instance_info.fail_msg)
8409       else:
8410         if instance_info.payload:
8411           current_mem = int(instance_info.payload['memory'])
8412         else:
8413           # Assume instance not running
8414           # (there is a slight race condition here, but it's not very probable,
8415           # and we have no other way to check)
8416           current_mem = 0
8417         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8418                     pninfo.payload['memory_free'])
8419         if miss_mem > 0:
8420           raise errors.OpPrereqError("This change will prevent the instance"
8421                                      " from starting, due to %d MB of memory"
8422                                      " missing on its primary node" % miss_mem,
8423                                      errors.ECODE_NORES)
8424
8425       if be_new[constants.BE_AUTO_BALANCE]:
8426         for node, nres in nodeinfo.items():
8427           if node not in instance.secondary_nodes:
8428             continue
8429           msg = nres.fail_msg
8430           if msg:
8431             self.warn.append("Can't get info from secondary node %s: %s" %
8432                              (node, msg))
8433           elif not isinstance(nres.payload.get('memory_free', None), int):
8434             self.warn.append("Secondary node %s didn't return free"
8435                              " memory information" % node)
8436           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8437             self.warn.append("Not enough memory to failover instance to"
8438                              " secondary node %s" % node)
8439
8440     # NIC processing
8441     self.nic_pnew = {}
8442     self.nic_pinst = {}
8443     for nic_op, nic_dict in self.op.nics:
8444       if nic_op == constants.DDM_REMOVE:
8445         if not instance.nics:
8446           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8447                                      errors.ECODE_INVAL)
8448         continue
8449       if nic_op != constants.DDM_ADD:
8450         # an existing nic
8451         if not instance.nics:
8452           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8453                                      " no NICs" % nic_op,
8454                                      errors.ECODE_INVAL)
8455         if nic_op < 0 or nic_op >= len(instance.nics):
8456           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8457                                      " are 0 to %d" %
8458                                      (nic_op, len(instance.nics) - 1),
8459                                      errors.ECODE_INVAL)
8460         old_nic_params = instance.nics[nic_op].nicparams
8461         old_nic_ip = instance.nics[nic_op].ip
8462       else:
8463         old_nic_params = {}
8464         old_nic_ip = None
8465
8466       update_params_dict = dict([(key, nic_dict[key])
8467                                  for key in constants.NICS_PARAMETERS
8468                                  if key in nic_dict])
8469
8470       if 'bridge' in nic_dict:
8471         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8472
8473       new_nic_params, new_filled_nic_params = \
8474           self._GetUpdatedParams(old_nic_params, update_params_dict,
8475                                  cluster.nicparams[constants.PP_DEFAULT],
8476                                  constants.NICS_PARAMETER_TYPES)
8477       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8478       self.nic_pinst[nic_op] = new_nic_params
8479       self.nic_pnew[nic_op] = new_filled_nic_params
8480       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8481
8482       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8483         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8484         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8485         if msg:
8486           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8487           if self.force:
8488             self.warn.append(msg)
8489           else:
8490             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8491       if new_nic_mode == constants.NIC_MODE_ROUTED:
8492         if 'ip' in nic_dict:
8493           nic_ip = nic_dict['ip']
8494         else:
8495           nic_ip = old_nic_ip
8496         if nic_ip is None:
8497           raise errors.OpPrereqError('Cannot set the nic ip to None'
8498                                      ' on a routed nic', errors.ECODE_INVAL)
8499       if 'mac' in nic_dict:
8500         nic_mac = nic_dict['mac']
8501         if nic_mac is None:
8502           raise errors.OpPrereqError('Cannot set the nic mac to None',
8503                                      errors.ECODE_INVAL)
8504         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8505           # otherwise generate the mac
8506           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8507         else:
8508           # or validate/reserve the current one
8509           try:
8510             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8511           except errors.ReservationError:
8512             raise errors.OpPrereqError("MAC address %s already in use"
8513                                        " in cluster" % nic_mac,
8514                                        errors.ECODE_NOTUNIQUE)
8515
8516     # DISK processing
8517     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8518       raise errors.OpPrereqError("Disk operations not supported for"
8519                                  " diskless instances",
8520                                  errors.ECODE_INVAL)
8521     for disk_op, _ in self.op.disks:
8522       if disk_op == constants.DDM_REMOVE:
8523         if len(instance.disks) == 1:
8524           raise errors.OpPrereqError("Cannot remove the last disk of"
8525                                      " an instance", errors.ECODE_INVAL)
8526         _CheckInstanceDown(self, instance, "cannot remove disks")
8527
8528       if (disk_op == constants.DDM_ADD and
8529           len(instance.nics) >= constants.MAX_DISKS):
8530         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8531                                    " add more" % constants.MAX_DISKS,
8532                                    errors.ECODE_STATE)
8533       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8534         # an existing disk
8535         if disk_op < 0 or disk_op >= len(instance.disks):
8536           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8537                                      " are 0 to %d" %
8538                                      (disk_op, len(instance.disks)),
8539                                      errors.ECODE_INVAL)
8540
8541     # OS change
8542     if self.op.os_name and not self.op.force:
8543       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8544                       self.op.force_variant)
8545
8546     return
8547
8548   def _ConvertPlainToDrbd(self, feedback_fn):
8549     """Converts an instance from plain to drbd.
8550
8551     """
8552     feedback_fn("Converting template to drbd")
8553     instance = self.instance
8554     pnode = instance.primary_node
8555     snode = self.op.remote_node
8556
8557     # create a fake disk info for _GenerateDiskTemplate
8558     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8559     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8560                                       instance.name, pnode, [snode],
8561                                       disk_info, None, None, 0)
8562     info = _GetInstanceInfoText(instance)
8563     feedback_fn("Creating aditional volumes...")
8564     # first, create the missing data and meta devices
8565     for disk in new_disks:
8566       # unfortunately this is... not too nice
8567       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8568                             info, True)
8569       for child in disk.children:
8570         _CreateSingleBlockDev(self, snode, instance, child, info, True)
8571     # at this stage, all new LVs have been created, we can rename the
8572     # old ones
8573     feedback_fn("Renaming original volumes...")
8574     rename_list = [(o, n.children[0].logical_id)
8575                    for (o, n) in zip(instance.disks, new_disks)]
8576     result = self.rpc.call_blockdev_rename(pnode, rename_list)
8577     result.Raise("Failed to rename original LVs")
8578
8579     feedback_fn("Initializing DRBD devices...")
8580     # all child devices are in place, we can now create the DRBD devices
8581     for disk in new_disks:
8582       for node in [pnode, snode]:
8583         f_create = node == pnode
8584         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8585
8586     # at this point, the instance has been modified
8587     instance.disk_template = constants.DT_DRBD8
8588     instance.disks = new_disks
8589     self.cfg.Update(instance, feedback_fn)
8590
8591     # disks are created, waiting for sync
8592     disk_abort = not _WaitForSync(self, instance)
8593     if disk_abort:
8594       raise errors.OpExecError("There are some degraded disks for"
8595                                " this instance, please cleanup manually")
8596
8597   def _ConvertDrbdToPlain(self, feedback_fn):
8598     """Converts an instance from drbd to plain.
8599
8600     """
8601     instance = self.instance
8602     assert len(instance.secondary_nodes) == 1
8603     pnode = instance.primary_node
8604     snode = instance.secondary_nodes[0]
8605     feedback_fn("Converting template to plain")
8606
8607     old_disks = instance.disks
8608     new_disks = [d.children[0] for d in old_disks]
8609
8610     # copy over size and mode
8611     for parent, child in zip(old_disks, new_disks):
8612       child.size = parent.size
8613       child.mode = parent.mode
8614
8615     # update instance structure
8616     instance.disks = new_disks
8617     instance.disk_template = constants.DT_PLAIN
8618     self.cfg.Update(instance, feedback_fn)
8619
8620     feedback_fn("Removing volumes on the secondary node...")
8621     for disk in old_disks:
8622       self.cfg.SetDiskID(disk, snode)
8623       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8624       if msg:
8625         self.LogWarning("Could not remove block device %s on node %s,"
8626                         " continuing anyway: %s", disk.iv_name, snode, msg)
8627
8628     feedback_fn("Removing unneeded volumes on the primary node...")
8629     for idx, disk in enumerate(old_disks):
8630       meta = disk.children[1]
8631       self.cfg.SetDiskID(meta, pnode)
8632       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8633       if msg:
8634         self.LogWarning("Could not remove metadata for disk %d on node %s,"
8635                         " continuing anyway: %s", idx, pnode, msg)
8636
8637
8638   def Exec(self, feedback_fn):
8639     """Modifies an instance.
8640
8641     All parameters take effect only at the next restart of the instance.
8642
8643     """
8644     # Process here the warnings from CheckPrereq, as we don't have a
8645     # feedback_fn there.
8646     for warn in self.warn:
8647       feedback_fn("WARNING: %s" % warn)
8648
8649     result = []
8650     instance = self.instance
8651     # disk changes
8652     for disk_op, disk_dict in self.op.disks:
8653       if disk_op == constants.DDM_REMOVE:
8654         # remove the last disk
8655         device = instance.disks.pop()
8656         device_idx = len(instance.disks)
8657         for node, disk in device.ComputeNodeTree(instance.primary_node):
8658           self.cfg.SetDiskID(disk, node)
8659           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8660           if msg:
8661             self.LogWarning("Could not remove disk/%d on node %s: %s,"
8662                             " continuing anyway", device_idx, node, msg)
8663         result.append(("disk/%d" % device_idx, "remove"))
8664       elif disk_op == constants.DDM_ADD:
8665         # add a new disk
8666         if instance.disk_template == constants.DT_FILE:
8667           file_driver, file_path = instance.disks[0].logical_id
8668           file_path = os.path.dirname(file_path)
8669         else:
8670           file_driver = file_path = None
8671         disk_idx_base = len(instance.disks)
8672         new_disk = _GenerateDiskTemplate(self,
8673                                          instance.disk_template,
8674                                          instance.name, instance.primary_node,
8675                                          instance.secondary_nodes,
8676                                          [disk_dict],
8677                                          file_path,
8678                                          file_driver,
8679                                          disk_idx_base)[0]
8680         instance.disks.append(new_disk)
8681         info = _GetInstanceInfoText(instance)
8682
8683         logging.info("Creating volume %s for instance %s",
8684                      new_disk.iv_name, instance.name)
8685         # Note: this needs to be kept in sync with _CreateDisks
8686         #HARDCODE
8687         for node in instance.all_nodes:
8688           f_create = node == instance.primary_node
8689           try:
8690             _CreateBlockDev(self, node, instance, new_disk,
8691                             f_create, info, f_create)
8692           except errors.OpExecError, err:
8693             self.LogWarning("Failed to create volume %s (%s) on"
8694                             " node %s: %s",
8695                             new_disk.iv_name, new_disk, node, err)
8696         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8697                        (new_disk.size, new_disk.mode)))
8698       else:
8699         # change a given disk
8700         instance.disks[disk_op].mode = disk_dict['mode']
8701         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8702
8703     if self.op.disk_template:
8704       r_shut = _ShutdownInstanceDisks(self, instance)
8705       if not r_shut:
8706         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8707                                  " proceed with disk template conversion")
8708       mode = (instance.disk_template, self.op.disk_template)
8709       try:
8710         self._DISK_CONVERSIONS[mode](self, feedback_fn)
8711       except:
8712         self.cfg.ReleaseDRBDMinors(instance.name)
8713         raise
8714       result.append(("disk_template", self.op.disk_template))
8715
8716     # NIC changes
8717     for nic_op, nic_dict in self.op.nics:
8718       if nic_op == constants.DDM_REMOVE:
8719         # remove the last nic
8720         del instance.nics[-1]
8721         result.append(("nic.%d" % len(instance.nics), "remove"))
8722       elif nic_op == constants.DDM_ADD:
8723         # mac and bridge should be set, by now
8724         mac = nic_dict['mac']
8725         ip = nic_dict.get('ip', None)
8726         nicparams = self.nic_pinst[constants.DDM_ADD]
8727         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8728         instance.nics.append(new_nic)
8729         result.append(("nic.%d" % (len(instance.nics) - 1),
8730                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
8731                        (new_nic.mac, new_nic.ip,
8732                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8733                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8734                        )))
8735       else:
8736         for key in 'mac', 'ip':
8737           if key in nic_dict:
8738             setattr(instance.nics[nic_op], key, nic_dict[key])
8739         if nic_op in self.nic_pinst:
8740           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8741         for key, val in nic_dict.iteritems():
8742           result.append(("nic.%s/%d" % (key, nic_op), val))
8743
8744     # hvparams changes
8745     if self.op.hvparams:
8746       instance.hvparams = self.hv_inst
8747       for key, val in self.op.hvparams.iteritems():
8748         result.append(("hv/%s" % key, val))
8749
8750     # beparams changes
8751     if self.op.beparams:
8752       instance.beparams = self.be_inst
8753       for key, val in self.op.beparams.iteritems():
8754         result.append(("be/%s" % key, val))
8755
8756     # OS change
8757     if self.op.os_name:
8758       instance.os = self.op.os_name
8759
8760     self.cfg.Update(instance, feedback_fn)
8761
8762     return result
8763
8764   _DISK_CONVERSIONS = {
8765     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8766     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8767     }
8768
8769 class LUQueryExports(NoHooksLU):
8770   """Query the exports list
8771
8772   """
8773   _OP_REQP = ['nodes']
8774   REQ_BGL = False
8775
8776   def ExpandNames(self):
8777     self.needed_locks = {}
8778     self.share_locks[locking.LEVEL_NODE] = 1
8779     if not self.op.nodes:
8780       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8781     else:
8782       self.needed_locks[locking.LEVEL_NODE] = \
8783         _GetWantedNodes(self, self.op.nodes)
8784
8785   def CheckPrereq(self):
8786     """Check prerequisites.
8787
8788     """
8789     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8790
8791   def Exec(self, feedback_fn):
8792     """Compute the list of all the exported system images.
8793
8794     @rtype: dict
8795     @return: a dictionary with the structure node->(export-list)
8796         where export-list is a list of the instances exported on
8797         that node.
8798
8799     """
8800     rpcresult = self.rpc.call_export_list(self.nodes)
8801     result = {}
8802     for node in rpcresult:
8803       if rpcresult[node].fail_msg:
8804         result[node] = False
8805       else:
8806         result[node] = rpcresult[node].payload
8807
8808     return result
8809
8810
8811 class LUExportInstance(LogicalUnit):
8812   """Export an instance to an image in the cluster.
8813
8814   """
8815   HPATH = "instance-export"
8816   HTYPE = constants.HTYPE_INSTANCE
8817   _OP_REQP = ["instance_name", "target_node", "shutdown"]
8818   REQ_BGL = False
8819
8820   def CheckArguments(self):
8821     """Check the arguments.
8822
8823     """
8824     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8825                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
8826
8827   def ExpandNames(self):
8828     self._ExpandAndLockInstance()
8829     # FIXME: lock only instance primary and destination node
8830     #
8831     # Sad but true, for now we have do lock all nodes, as we don't know where
8832     # the previous export might be, and and in this LU we search for it and
8833     # remove it from its current node. In the future we could fix this by:
8834     #  - making a tasklet to search (share-lock all), then create the new one,
8835     #    then one to remove, after
8836     #  - removing the removal operation altogether
8837     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8838
8839   def DeclareLocks(self, level):
8840     """Last minute lock declaration."""
8841     # All nodes are locked anyway, so nothing to do here.
8842
8843   def BuildHooksEnv(self):
8844     """Build hooks env.
8845
8846     This will run on the master, primary node and target node.
8847
8848     """
8849     env = {
8850       "EXPORT_NODE": self.op.target_node,
8851       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8852       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8853       }
8854     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8855     nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8856           self.op.target_node]
8857     return env, nl, nl
8858
8859   def CheckPrereq(self):
8860     """Check prerequisites.
8861
8862     This checks that the instance and node names are valid.
8863
8864     """
8865     instance_name = self.op.instance_name
8866     self.instance = self.cfg.GetInstanceInfo(instance_name)
8867     assert self.instance is not None, \
8868           "Cannot retrieve locked instance %s" % self.op.instance_name
8869     _CheckNodeOnline(self, self.instance.primary_node)
8870
8871     self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8872     self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8873     assert self.dst_node is not None
8874
8875     _CheckNodeOnline(self, self.dst_node.name)
8876     _CheckNodeNotDrained(self, self.dst_node.name)
8877
8878     # instance disk type verification
8879     for disk in self.instance.disks:
8880       if disk.dev_type == constants.LD_FILE:
8881         raise errors.OpPrereqError("Export not supported for instances with"
8882                                    " file-based disks", errors.ECODE_INVAL)
8883
8884   def _CreateSnapshots(self, feedback_fn):
8885     """Creates an LVM snapshot for every disk of the instance.
8886
8887     @return: List of snapshots as L{objects.Disk} instances
8888
8889     """
8890     instance = self.instance
8891     src_node = instance.primary_node
8892
8893     vgname = self.cfg.GetVGName()
8894
8895     snap_disks = []
8896
8897     for idx, disk in enumerate(instance.disks):
8898       feedback_fn("Creating a snapshot of disk/%s on node %s" %
8899                   (idx, src_node))
8900
8901       # result.payload will be a snapshot of an lvm leaf of the one we
8902       # passed
8903       result = self.rpc.call_blockdev_snapshot(src_node, disk)
8904       msg = result.fail_msg
8905       if msg:
8906         self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8907                         idx, src_node, msg)
8908         snap_disks.append(False)
8909       else:
8910         disk_id = (vgname, result.payload)
8911         new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8912                                logical_id=disk_id, physical_id=disk_id,
8913                                iv_name=disk.iv_name)
8914         snap_disks.append(new_dev)
8915
8916     return snap_disks
8917
8918   def _RemoveSnapshot(self, feedback_fn, snap_disks, disk_index):
8919     """Removes an LVM snapshot.
8920
8921     @type snap_disks: list
8922     @param snap_disks: The list of all snapshots as returned by
8923                        L{_CreateSnapshots}
8924     @type disk_index: number
8925     @param disk_index: Index of the snapshot to be removed
8926     @rtype: bool
8927     @return: Whether removal was successful or not
8928
8929     """
8930     disk = snap_disks[disk_index]
8931     if disk:
8932       src_node = self.instance.primary_node
8933
8934       feedback_fn("Removing snapshot of disk/%s on node %s" %
8935                   (disk_index, src_node))
8936
8937       result = self.rpc.call_blockdev_remove(src_node, disk)
8938       if not result.fail_msg:
8939         return True
8940
8941       self.LogWarning("Could not remove snapshot for disk/%d from node"
8942                       " %s: %s", disk_index, src_node, result.fail_msg)
8943
8944     return False
8945
8946   def _CleanupExports(self, feedback_fn):
8947     """Removes exports of current instance from all other nodes.
8948
8949     If an instance in a cluster with nodes A..D was exported to node C, its
8950     exports will be removed from the nodes A, B and D.
8951
8952     """
8953     nodelist = self.cfg.GetNodeList()
8954     nodelist.remove(self.dst_node.name)
8955
8956     # on one-node clusters nodelist will be empty after the removal
8957     # if we proceed the backup would be removed because OpQueryExports
8958     # substitutes an empty list with the full cluster node list.
8959     iname = self.instance.name
8960     if nodelist:
8961       feedback_fn("Removing old exports for instance %s" % iname)
8962       exportlist = self.rpc.call_export_list(nodelist)
8963       for node in exportlist:
8964         if exportlist[node].fail_msg:
8965           continue
8966         if iname in exportlist[node].payload:
8967           msg = self.rpc.call_export_remove(node, iname).fail_msg
8968           if msg:
8969             self.LogWarning("Could not remove older export for instance %s"
8970                             " on node %s: %s", iname, node, msg)
8971
8972   def Exec(self, feedback_fn):
8973     """Export an instance to an image in the cluster.
8974
8975     """
8976     instance = self.instance
8977     dst_node = self.dst_node
8978     src_node = instance.primary_node
8979
8980     if self.op.shutdown:
8981       # shutdown the instance, but not the disks
8982       feedback_fn("Shutting down instance %s" % instance.name)
8983       result = self.rpc.call_instance_shutdown(src_node, instance,
8984                                                self.shutdown_timeout)
8985       result.Raise("Could not shutdown instance %s on"
8986                    " node %s" % (instance.name, src_node))
8987
8988     # set the disks ID correctly since call_instance_start needs the
8989     # correct drbd minor to create the symlinks
8990     for disk in instance.disks:
8991       self.cfg.SetDiskID(disk, src_node)
8992
8993     activate_disks = (not instance.admin_up)
8994
8995     if activate_disks:
8996       # Activate the instance disks if we'exporting a stopped instance
8997       feedback_fn("Activating disks for %s" % instance.name)
8998       _StartInstanceDisks(self, instance, None)
8999
9000     try:
9001       # per-disk results
9002       dresults = []
9003       removed_snaps = [False] * len(instance.disks)
9004
9005       snap_disks = None
9006       try:
9007         try:
9008           snap_disks = self._CreateSnapshots(feedback_fn)
9009         finally:
9010           if self.op.shutdown and instance.admin_up:
9011             feedback_fn("Starting instance %s" % instance.name)
9012             result = self.rpc.call_instance_start(src_node, instance,
9013                                                   None, None)
9014             msg = result.fail_msg
9015             if msg:
9016               _ShutdownInstanceDisks(self, instance)
9017               raise errors.OpExecError("Could not start instance: %s" % msg)
9018
9019         assert len(snap_disks) == len(instance.disks)
9020         assert len(removed_snaps) == len(instance.disks)
9021
9022         # TODO: check for size
9023
9024         cluster_name = self.cfg.GetClusterName()
9025         for idx, dev in enumerate(snap_disks):
9026           feedback_fn("Exporting snapshot %s from %s to %s" %
9027                       (idx, src_node, dst_node.name))
9028           if dev:
9029             # FIXME: pass debug from opcode to backend
9030             result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
9031                                                    instance, cluster_name,
9032                                                    idx, self.op.debug_level)
9033             msg = result.fail_msg
9034             if msg:
9035               self.LogWarning("Could not export disk/%s from node %s to"
9036                               " node %s: %s", idx, src_node, dst_node.name, msg)
9037               dresults.append(False)
9038             else:
9039               dresults.append(True)
9040
9041             # Remove snapshot
9042             if self._RemoveSnapshot(feedback_fn, snap_disks, idx):
9043               removed_snaps[idx] = True
9044           else:
9045             dresults.append(False)
9046
9047         assert len(dresults) == len(instance.disks)
9048
9049         # Check for backwards compatibility
9050         assert compat.all(isinstance(i, bool) for i in dresults), \
9051                "Not all results are boolean: %r" % dresults
9052
9053         feedback_fn("Finalizing export on %s" % dst_node.name)
9054         result = self.rpc.call_finalize_export(dst_node.name, instance,
9055                                                snap_disks)
9056         msg = result.fail_msg
9057         fin_resu = not msg
9058         if msg:
9059           self.LogWarning("Could not finalize export for instance %s"
9060                           " on node %s: %s", instance.name, dst_node.name, msg)
9061
9062       finally:
9063         # Remove all snapshots
9064         assert len(removed_snaps) == len(instance.disks)
9065         for idx, removed in enumerate(removed_snaps):
9066           if not removed:
9067             self._RemoveSnapshot(feedback_fn, snap_disks, idx)
9068
9069     finally:
9070       if activate_disks:
9071         feedback_fn("Deactivating disks for %s" % instance.name)
9072         _ShutdownInstanceDisks(self, instance)
9073
9074     self._CleanupExports(feedback_fn)
9075
9076     return fin_resu, dresults
9077
9078
9079 class LURemoveExport(NoHooksLU):
9080   """Remove exports related to the named instance.
9081
9082   """
9083   _OP_REQP = ["instance_name"]
9084   REQ_BGL = False
9085
9086   def ExpandNames(self):
9087     self.needed_locks = {}
9088     # We need all nodes to be locked in order for RemoveExport to work, but we
9089     # don't need to lock the instance itself, as nothing will happen to it (and
9090     # we can remove exports also for a removed instance)
9091     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9092
9093   def CheckPrereq(self):
9094     """Check prerequisites.
9095     """
9096     pass
9097
9098   def Exec(self, feedback_fn):
9099     """Remove any export.
9100
9101     """
9102     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9103     # If the instance was not found we'll try with the name that was passed in.
9104     # This will only work if it was an FQDN, though.
9105     fqdn_warn = False
9106     if not instance_name:
9107       fqdn_warn = True
9108       instance_name = self.op.instance_name
9109
9110     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9111     exportlist = self.rpc.call_export_list(locked_nodes)
9112     found = False
9113     for node in exportlist:
9114       msg = exportlist[node].fail_msg
9115       if msg:
9116         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9117         continue
9118       if instance_name in exportlist[node].payload:
9119         found = True
9120         result = self.rpc.call_export_remove(node, instance_name)
9121         msg = result.fail_msg
9122         if msg:
9123           logging.error("Could not remove export for instance %s"
9124                         " on node %s: %s", instance_name, node, msg)
9125
9126     if fqdn_warn and not found:
9127       feedback_fn("Export not found. If trying to remove an export belonging"
9128                   " to a deleted instance please use its Fully Qualified"
9129                   " Domain Name.")
9130
9131
9132 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9133   """Generic tags LU.
9134
9135   This is an abstract class which is the parent of all the other tags LUs.
9136
9137   """
9138
9139   def ExpandNames(self):
9140     self.needed_locks = {}
9141     if self.op.kind == constants.TAG_NODE:
9142       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9143       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9144     elif self.op.kind == constants.TAG_INSTANCE:
9145       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9146       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9147
9148   def CheckPrereq(self):
9149     """Check prerequisites.
9150
9151     """
9152     if self.op.kind == constants.TAG_CLUSTER:
9153       self.target = self.cfg.GetClusterInfo()
9154     elif self.op.kind == constants.TAG_NODE:
9155       self.target = self.cfg.GetNodeInfo(self.op.name)
9156     elif self.op.kind == constants.TAG_INSTANCE:
9157       self.target = self.cfg.GetInstanceInfo(self.op.name)
9158     else:
9159       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9160                                  str(self.op.kind), errors.ECODE_INVAL)
9161
9162
9163 class LUGetTags(TagsLU):
9164   """Returns the tags of a given object.
9165
9166   """
9167   _OP_REQP = ["kind", "name"]
9168   REQ_BGL = False
9169
9170   def Exec(self, feedback_fn):
9171     """Returns the tag list.
9172
9173     """
9174     return list(self.target.GetTags())
9175
9176
9177 class LUSearchTags(NoHooksLU):
9178   """Searches the tags for a given pattern.
9179
9180   """
9181   _OP_REQP = ["pattern"]
9182   REQ_BGL = False
9183
9184   def ExpandNames(self):
9185     self.needed_locks = {}
9186
9187   def CheckPrereq(self):
9188     """Check prerequisites.
9189
9190     This checks the pattern passed for validity by compiling it.
9191
9192     """
9193     try:
9194       self.re = re.compile(self.op.pattern)
9195     except re.error, err:
9196       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9197                                  (self.op.pattern, err), errors.ECODE_INVAL)
9198
9199   def Exec(self, feedback_fn):
9200     """Returns the tag list.
9201
9202     """
9203     cfg = self.cfg
9204     tgts = [("/cluster", cfg.GetClusterInfo())]
9205     ilist = cfg.GetAllInstancesInfo().values()
9206     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9207     nlist = cfg.GetAllNodesInfo().values()
9208     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9209     results = []
9210     for path, target in tgts:
9211       for tag in target.GetTags():
9212         if self.re.search(tag):
9213           results.append((path, tag))
9214     return results
9215
9216
9217 class LUAddTags(TagsLU):
9218   """Sets a tag on a given object.
9219
9220   """
9221   _OP_REQP = ["kind", "name", "tags"]
9222   REQ_BGL = False
9223
9224   def CheckPrereq(self):
9225     """Check prerequisites.
9226
9227     This checks the type and length of the tag name and value.
9228
9229     """
9230     TagsLU.CheckPrereq(self)
9231     for tag in self.op.tags:
9232       objects.TaggableObject.ValidateTag(tag)
9233
9234   def Exec(self, feedback_fn):
9235     """Sets the tag.
9236
9237     """
9238     try:
9239       for tag in self.op.tags:
9240         self.target.AddTag(tag)
9241     except errors.TagError, err:
9242       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9243     self.cfg.Update(self.target, feedback_fn)
9244
9245
9246 class LUDelTags(TagsLU):
9247   """Delete a list of tags from a given object.
9248
9249   """
9250   _OP_REQP = ["kind", "name", "tags"]
9251   REQ_BGL = False
9252
9253   def CheckPrereq(self):
9254     """Check prerequisites.
9255
9256     This checks that we have the given tag.
9257
9258     """
9259     TagsLU.CheckPrereq(self)
9260     for tag in self.op.tags:
9261       objects.TaggableObject.ValidateTag(tag)
9262     del_tags = frozenset(self.op.tags)
9263     cur_tags = self.target.GetTags()
9264     if not del_tags <= cur_tags:
9265       diff_tags = del_tags - cur_tags
9266       diff_names = ["'%s'" % tag for tag in diff_tags]
9267       diff_names.sort()
9268       raise errors.OpPrereqError("Tag(s) %s not found" %
9269                                  (",".join(diff_names)), errors.ECODE_NOENT)
9270
9271   def Exec(self, feedback_fn):
9272     """Remove the tag from the object.
9273
9274     """
9275     for tag in self.op.tags:
9276       self.target.RemoveTag(tag)
9277     self.cfg.Update(self.target, feedback_fn)
9278
9279
9280 class LUTestDelay(NoHooksLU):
9281   """Sleep for a specified amount of time.
9282
9283   This LU sleeps on the master and/or nodes for a specified amount of
9284   time.
9285
9286   """
9287   _OP_REQP = ["duration", "on_master", "on_nodes"]
9288   REQ_BGL = False
9289
9290   def ExpandNames(self):
9291     """Expand names and set required locks.
9292
9293     This expands the node list, if any.
9294
9295     """
9296     self.needed_locks = {}
9297     if self.op.on_nodes:
9298       # _GetWantedNodes can be used here, but is not always appropriate to use
9299       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9300       # more information.
9301       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9302       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9303
9304   def CheckPrereq(self):
9305     """Check prerequisites.
9306
9307     """
9308
9309   def Exec(self, feedback_fn):
9310     """Do the actual sleep.
9311
9312     """
9313     if self.op.on_master:
9314       if not utils.TestDelay(self.op.duration):
9315         raise errors.OpExecError("Error during master delay test")
9316     if self.op.on_nodes:
9317       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9318       for node, node_result in result.items():
9319         node_result.Raise("Failure during rpc call to node %s" % node)
9320
9321
9322 class IAllocator(object):
9323   """IAllocator framework.
9324
9325   An IAllocator instance has three sets of attributes:
9326     - cfg that is needed to query the cluster
9327     - input data (all members of the _KEYS class attribute are required)
9328     - four buffer attributes (in|out_data|text), that represent the
9329       input (to the external script) in text and data structure format,
9330       and the output from it, again in two formats
9331     - the result variables from the script (success, info, nodes) for
9332       easy usage
9333
9334   """
9335   # pylint: disable-msg=R0902
9336   # lots of instance attributes
9337   _ALLO_KEYS = [
9338     "name", "mem_size", "disks", "disk_template",
9339     "os", "tags", "nics", "vcpus", "hypervisor",
9340     ]
9341   _RELO_KEYS = [
9342     "name", "relocate_from",
9343     ]
9344   _EVAC_KEYS = [
9345     "evac_nodes",
9346     ]
9347
9348   def __init__(self, cfg, rpc, mode, **kwargs):
9349     self.cfg = cfg
9350     self.rpc = rpc
9351     # init buffer variables
9352     self.in_text = self.out_text = self.in_data = self.out_data = None
9353     # init all input fields so that pylint is happy
9354     self.mode = mode
9355     self.mem_size = self.disks = self.disk_template = None
9356     self.os = self.tags = self.nics = self.vcpus = None
9357     self.hypervisor = None
9358     self.relocate_from = None
9359     self.name = None
9360     self.evac_nodes = None
9361     # computed fields
9362     self.required_nodes = None
9363     # init result fields
9364     self.success = self.info = self.result = None
9365     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9366       keyset = self._ALLO_KEYS
9367       fn = self._AddNewInstance
9368     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9369       keyset = self._RELO_KEYS
9370       fn = self._AddRelocateInstance
9371     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9372       keyset = self._EVAC_KEYS
9373       fn = self._AddEvacuateNodes
9374     else:
9375       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9376                                    " IAllocator" % self.mode)
9377     for key in kwargs:
9378       if key not in keyset:
9379         raise errors.ProgrammerError("Invalid input parameter '%s' to"
9380                                      " IAllocator" % key)
9381       setattr(self, key, kwargs[key])
9382
9383     for key in keyset:
9384       if key not in kwargs:
9385         raise errors.ProgrammerError("Missing input parameter '%s' to"
9386                                      " IAllocator" % key)
9387     self._BuildInputData(fn)
9388
9389   def _ComputeClusterData(self):
9390     """Compute the generic allocator input data.
9391
9392     This is the data that is independent of the actual operation.
9393
9394     """
9395     cfg = self.cfg
9396     cluster_info = cfg.GetClusterInfo()
9397     # cluster data
9398     data = {
9399       "version": constants.IALLOCATOR_VERSION,
9400       "cluster_name": cfg.GetClusterName(),
9401       "cluster_tags": list(cluster_info.GetTags()),
9402       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9403       # we don't have job IDs
9404       }
9405     iinfo = cfg.GetAllInstancesInfo().values()
9406     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9407
9408     # node data
9409     node_results = {}
9410     node_list = cfg.GetNodeList()
9411
9412     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9413       hypervisor_name = self.hypervisor
9414     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9415       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9416     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9417       hypervisor_name = cluster_info.enabled_hypervisors[0]
9418
9419     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9420                                         hypervisor_name)
9421     node_iinfo = \
9422       self.rpc.call_all_instances_info(node_list,
9423                                        cluster_info.enabled_hypervisors)
9424     for nname, nresult in node_data.items():
9425       # first fill in static (config-based) values
9426       ninfo = cfg.GetNodeInfo(nname)
9427       pnr = {
9428         "tags": list(ninfo.GetTags()),
9429         "primary_ip": ninfo.primary_ip,
9430         "secondary_ip": ninfo.secondary_ip,
9431         "offline": ninfo.offline,
9432         "drained": ninfo.drained,
9433         "master_candidate": ninfo.master_candidate,
9434         }
9435
9436       if not (ninfo.offline or ninfo.drained):
9437         nresult.Raise("Can't get data for node %s" % nname)
9438         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9439                                 nname)
9440         remote_info = nresult.payload
9441
9442         for attr in ['memory_total', 'memory_free', 'memory_dom0',
9443                      'vg_size', 'vg_free', 'cpu_total']:
9444           if attr not in remote_info:
9445             raise errors.OpExecError("Node '%s' didn't return attribute"
9446                                      " '%s'" % (nname, attr))
9447           if not isinstance(remote_info[attr], int):
9448             raise errors.OpExecError("Node '%s' returned invalid value"
9449                                      " for '%s': %s" %
9450                                      (nname, attr, remote_info[attr]))
9451         # compute memory used by primary instances
9452         i_p_mem = i_p_up_mem = 0
9453         for iinfo, beinfo in i_list:
9454           if iinfo.primary_node == nname:
9455             i_p_mem += beinfo[constants.BE_MEMORY]
9456             if iinfo.name not in node_iinfo[nname].payload:
9457               i_used_mem = 0
9458             else:
9459               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9460             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9461             remote_info['memory_free'] -= max(0, i_mem_diff)
9462
9463             if iinfo.admin_up:
9464               i_p_up_mem += beinfo[constants.BE_MEMORY]
9465
9466         # compute memory used by instances
9467         pnr_dyn = {
9468           "total_memory": remote_info['memory_total'],
9469           "reserved_memory": remote_info['memory_dom0'],
9470           "free_memory": remote_info['memory_free'],
9471           "total_disk": remote_info['vg_size'],
9472           "free_disk": remote_info['vg_free'],
9473           "total_cpus": remote_info['cpu_total'],
9474           "i_pri_memory": i_p_mem,
9475           "i_pri_up_memory": i_p_up_mem,
9476           }
9477         pnr.update(pnr_dyn)
9478
9479       node_results[nname] = pnr
9480     data["nodes"] = node_results
9481
9482     # instance data
9483     instance_data = {}
9484     for iinfo, beinfo in i_list:
9485       nic_data = []
9486       for nic in iinfo.nics:
9487         filled_params = objects.FillDict(
9488             cluster_info.nicparams[constants.PP_DEFAULT],
9489             nic.nicparams)
9490         nic_dict = {"mac": nic.mac,
9491                     "ip": nic.ip,
9492                     "mode": filled_params[constants.NIC_MODE],
9493                     "link": filled_params[constants.NIC_LINK],
9494                    }
9495         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9496           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9497         nic_data.append(nic_dict)
9498       pir = {
9499         "tags": list(iinfo.GetTags()),
9500         "admin_up": iinfo.admin_up,
9501         "vcpus": beinfo[constants.BE_VCPUS],
9502         "memory": beinfo[constants.BE_MEMORY],
9503         "os": iinfo.os,
9504         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9505         "nics": nic_data,
9506         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9507         "disk_template": iinfo.disk_template,
9508         "hypervisor": iinfo.hypervisor,
9509         }
9510       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9511                                                  pir["disks"])
9512       instance_data[iinfo.name] = pir
9513
9514     data["instances"] = instance_data
9515
9516     self.in_data = data
9517
9518   def _AddNewInstance(self):
9519     """Add new instance data to allocator structure.
9520
9521     This in combination with _AllocatorGetClusterData will create the
9522     correct structure needed as input for the allocator.
9523
9524     The checks for the completeness of the opcode must have already been
9525     done.
9526
9527     """
9528     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9529
9530     if self.disk_template in constants.DTS_NET_MIRROR:
9531       self.required_nodes = 2
9532     else:
9533       self.required_nodes = 1
9534     request = {
9535       "name": self.name,
9536       "disk_template": self.disk_template,
9537       "tags": self.tags,
9538       "os": self.os,
9539       "vcpus": self.vcpus,
9540       "memory": self.mem_size,
9541       "disks": self.disks,
9542       "disk_space_total": disk_space,
9543       "nics": self.nics,
9544       "required_nodes": self.required_nodes,
9545       }
9546     return request
9547
9548   def _AddRelocateInstance(self):
9549     """Add relocate instance data to allocator structure.
9550
9551     This in combination with _IAllocatorGetClusterData will create the
9552     correct structure needed as input for the allocator.
9553
9554     The checks for the completeness of the opcode must have already been
9555     done.
9556
9557     """
9558     instance = self.cfg.GetInstanceInfo(self.name)
9559     if instance is None:
9560       raise errors.ProgrammerError("Unknown instance '%s' passed to"
9561                                    " IAllocator" % self.name)
9562
9563     if instance.disk_template not in constants.DTS_NET_MIRROR:
9564       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9565                                  errors.ECODE_INVAL)
9566
9567     if len(instance.secondary_nodes) != 1:
9568       raise errors.OpPrereqError("Instance has not exactly one secondary node",
9569                                  errors.ECODE_STATE)
9570
9571     self.required_nodes = 1
9572     disk_sizes = [{'size': disk.size} for disk in instance.disks]
9573     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9574
9575     request = {
9576       "name": self.name,
9577       "disk_space_total": disk_space,
9578       "required_nodes": self.required_nodes,
9579       "relocate_from": self.relocate_from,
9580       }
9581     return request
9582
9583   def _AddEvacuateNodes(self):
9584     """Add evacuate nodes data to allocator structure.
9585
9586     """
9587     request = {
9588       "evac_nodes": self.evac_nodes
9589       }
9590     return request
9591
9592   def _BuildInputData(self, fn):
9593     """Build input data structures.
9594
9595     """
9596     self._ComputeClusterData()
9597
9598     request = fn()
9599     request["type"] = self.mode
9600     self.in_data["request"] = request
9601
9602     self.in_text = serializer.Dump(self.in_data)
9603
9604   def Run(self, name, validate=True, call_fn=None):
9605     """Run an instance allocator and return the results.
9606
9607     """
9608     if call_fn is None:
9609       call_fn = self.rpc.call_iallocator_runner
9610
9611     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9612     result.Raise("Failure while running the iallocator script")
9613
9614     self.out_text = result.payload
9615     if validate:
9616       self._ValidateResult()
9617
9618   def _ValidateResult(self):
9619     """Process the allocator results.
9620
9621     This will process and if successful save the result in
9622     self.out_data and the other parameters.
9623
9624     """
9625     try:
9626       rdict = serializer.Load(self.out_text)
9627     except Exception, err:
9628       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9629
9630     if not isinstance(rdict, dict):
9631       raise errors.OpExecError("Can't parse iallocator results: not a dict")
9632
9633     # TODO: remove backwards compatiblity in later versions
9634     if "nodes" in rdict and "result" not in rdict:
9635       rdict["result"] = rdict["nodes"]
9636       del rdict["nodes"]
9637
9638     for key in "success", "info", "result":
9639       if key not in rdict:
9640         raise errors.OpExecError("Can't parse iallocator results:"
9641                                  " missing key '%s'" % key)
9642       setattr(self, key, rdict[key])
9643
9644     if not isinstance(rdict["result"], list):
9645       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9646                                " is not a list")
9647     self.out_data = rdict
9648
9649
9650 class LUTestAllocator(NoHooksLU):
9651   """Run allocator tests.
9652
9653   This LU runs the allocator tests
9654
9655   """
9656   _OP_REQP = ["direction", "mode", "name"]
9657
9658   def CheckPrereq(self):
9659     """Check prerequisites.
9660
9661     This checks the opcode parameters depending on the director and mode test.
9662
9663     """
9664     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9665       for attr in ["name", "mem_size", "disks", "disk_template",
9666                    "os", "tags", "nics", "vcpus"]:
9667         if not hasattr(self.op, attr):
9668           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9669                                      attr, errors.ECODE_INVAL)
9670       iname = self.cfg.ExpandInstanceName(self.op.name)
9671       if iname is not None:
9672         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9673                                    iname, errors.ECODE_EXISTS)
9674       if not isinstance(self.op.nics, list):
9675         raise errors.OpPrereqError("Invalid parameter 'nics'",
9676                                    errors.ECODE_INVAL)
9677       for row in self.op.nics:
9678         if (not isinstance(row, dict) or
9679             "mac" not in row or
9680             "ip" not in row or
9681             "bridge" not in row):
9682           raise errors.OpPrereqError("Invalid contents of the 'nics'"
9683                                      " parameter", errors.ECODE_INVAL)
9684       if not isinstance(self.op.disks, list):
9685         raise errors.OpPrereqError("Invalid parameter 'disks'",
9686                                    errors.ECODE_INVAL)
9687       for row in self.op.disks:
9688         if (not isinstance(row, dict) or
9689             "size" not in row or
9690             not isinstance(row["size"], int) or
9691             "mode" not in row or
9692             row["mode"] not in ['r', 'w']):
9693           raise errors.OpPrereqError("Invalid contents of the 'disks'"
9694                                      " parameter", errors.ECODE_INVAL)
9695       if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9696         self.op.hypervisor = self.cfg.GetHypervisorType()
9697     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9698       if not hasattr(self.op, "name"):
9699         raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9700                                    errors.ECODE_INVAL)
9701       fname = _ExpandInstanceName(self.cfg, self.op.name)
9702       self.op.name = fname
9703       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9704     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9705       if not hasattr(self.op, "evac_nodes"):
9706         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9707                                    " opcode input", errors.ECODE_INVAL)
9708     else:
9709       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9710                                  self.op.mode, errors.ECODE_INVAL)
9711
9712     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9713       if not hasattr(self.op, "allocator") or self.op.allocator is None:
9714         raise errors.OpPrereqError("Missing allocator name",
9715                                    errors.ECODE_INVAL)
9716     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9717       raise errors.OpPrereqError("Wrong allocator test '%s'" %
9718                                  self.op.direction, errors.ECODE_INVAL)
9719
9720   def Exec(self, feedback_fn):
9721     """Run the allocator test.
9722
9723     """
9724     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9725       ial = IAllocator(self.cfg, self.rpc,
9726                        mode=self.op.mode,
9727                        name=self.op.name,
9728                        mem_size=self.op.mem_size,
9729                        disks=self.op.disks,
9730                        disk_template=self.op.disk_template,
9731                        os=self.op.os,
9732                        tags=self.op.tags,
9733                        nics=self.op.nics,
9734                        vcpus=self.op.vcpus,
9735                        hypervisor=self.op.hypervisor,
9736                        )
9737     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9738       ial = IAllocator(self.cfg, self.rpc,
9739                        mode=self.op.mode,
9740                        name=self.op.name,
9741                        relocate_from=list(self.relocate_from),
9742                        )
9743     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9744       ial = IAllocator(self.cfg, self.rpc,
9745                        mode=self.op.mode,
9746                        evac_nodes=self.op.evac_nodes)
9747     else:
9748       raise errors.ProgrammerError("Uncatched mode %s in"
9749                                    " LUTestAllocator.Exec", self.op.mode)
9750
9751     if self.op.direction == constants.IALLOCATOR_DIR_IN:
9752       result = ial.in_text
9753     else:
9754       ial.Run(self.op.allocator, validate=False)
9755       result = ial.out_text
9756     return result