code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 import os
  30 import os.path
  31 import time
  32 import re
  33 import platform
  34 import logging
  35 import copy
  36 import OpenSSL
  37
  38 from ganeti import ssh
  39 from ganeti import utils
  40 from ganeti import errors
  41 from ganeti import hypervisor
  42 from ganeti import locking
  43 from ganeti import constants
  44 from ganeti import objects
  45 from ganeti import serializer
  46 from ganeti import ssconf
  47 from ganeti import uidpool
  48 from ganeti import compat
  49
  50
  51 class LogicalUnit(object):
  52   """Logical Unit base class.
  53
  54   Subclasses must follow these rules:
  55     - implement ExpandNames
  56     - implement CheckPrereq (except when tasklets are used)
  57     - implement Exec (except when tasklets are used)
  58     - implement BuildHooksEnv
  59     - redefine HPATH and HTYPE
  60     - optionally redefine their run requirements:
  61         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  62
  63   Note that all commands require root permissions.
  64
  65   @ivar dry_run_result: the value (if any) that will be returned to the caller
  66       in dry-run mode (signalled by opcode dry_run parameter)
  67
  68   """
  69   HPATH = None
  70   HTYPE = None
  71   _OP_REQP = []
  72   REQ_BGL = True
  73
  74   def __init__(self, processor, op, context, rpc):
  75     """Constructor for LogicalUnit.
  76
  77     This needs to be overridden in derived classes in order to check op
  78     validity.
  79
  80     """
  81     self.proc = processor
  82     self.op = op
  83     self.cfg = context.cfg
  84     self.context = context
  85     self.rpc = rpc
  86     # Dicts used to declare locking needs to mcpu
  87     self.needed_locks = None
  88     self.acquired_locks = {}
  89     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
  90     self.add_locks = {}
  91     self.remove_locks = {}
  92     # Used to force good behavior when calling helper functions
  93     self.recalculate_locks = {}
  94     self.__ssh = None
  95     # logging
  96     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
  97     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
  98     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
  99     # support for dry-run
 100     self.dry_run_result = None
 101     # support for generic debug attribute
 102     if (not hasattr(self.op, "debug_level") or
 103         not isinstance(self.op.debug_level, int)):
 104       self.op.debug_level = 0
 105
 106     # Tasklets
 107     self.tasklets = None
 108
 109     for attr_name in self._OP_REQP:
 110       attr_val = getattr(op, attr_name, None)
 111       if attr_val is None:
 112         raise errors.OpPrereqError("Required parameter '%s' missing" %
 113                                    attr_name, errors.ECODE_INVAL)
 114
 115     self.CheckArguments()
 116
 117   def __GetSSH(self):
 118     """Returns the SshRunner object
 119
 120     """
 121     if not self.__ssh:
 122       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 123     return self.__ssh
 124
 125   ssh = property(fget=__GetSSH)
 126
 127   def CheckArguments(self):
 128     """Check syntactic validity for the opcode arguments.
 129
 130     This method is for doing a simple syntactic check and ensure
 131     validity of opcode parameters, without any cluster-related
 132     checks. While the same can be accomplished in ExpandNames and/or
 133     CheckPrereq, doing these separate is better because:
 134
 135       - ExpandNames is left as as purely a lock-related function
 136       - CheckPrereq is run after we have acquired locks (and possible
 137         waited for them)
 138
 139     The function is allowed to change the self.op attribute so that
 140     later methods can no longer worry about missing parameters.
 141
 142     """
 143     pass
 144
 145   def ExpandNames(self):
 146     """Expand names for this LU.
 147
 148     This method is called before starting to execute the opcode, and it should
 149     update all the parameters of the opcode to their canonical form (e.g. a
 150     short node name must be fully expanded after this method has successfully
 151     completed). This way locking, hooks, logging, ecc. can work correctly.
 152
 153     LUs which implement this method must also populate the self.needed_locks
 154     member, as a dict with lock levels as keys, and a list of needed lock names
 155     as values. Rules:
 156
 157       - use an empty dict if you don't need any lock
 158       - if you don't need any lock at a particular level omit that level
 159       - don't put anything for the BGL level
 160       - if you want all locks at a level use locking.ALL_SET as a value
 161
 162     If you need to share locks (rather than acquire them exclusively) at one
 163     level you can modify self.share_locks, setting a true value (usually 1) for
 164     that level. By default locks are not shared.
 165
 166     This function can also define a list of tasklets, which then will be
 167     executed in order instead of the usual LU-level CheckPrereq and Exec
 168     functions, if those are not defined by the LU.
 169
 170     Examples::
 171
 172       # Acquire all nodes and one instance
 173       self.needed_locks = {
 174         locking.LEVEL_NODE: locking.ALL_SET,
 175         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 176       }
 177       # Acquire just two nodes
 178       self.needed_locks = {
 179         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 180       }
 181       # Acquire no locks
 182       self.needed_locks = {} # No, you can't leave it to the default value None
 183
 184     """
 185     # The implementation of this method is mandatory only if the new LU is
 186     # concurrent, so that old LUs don't need to be changed all at the same
 187     # time.
 188     if self.REQ_BGL:
 189       self.needed_locks = {} # Exclusive LUs don't need locks.
 190     else:
 191       raise NotImplementedError
 192
 193   def DeclareLocks(self, level):
 194     """Declare LU locking needs for a level
 195
 196     While most LUs can just declare their locking needs at ExpandNames time,
 197     sometimes there's the need to calculate some locks after having acquired
 198     the ones before. This function is called just before acquiring locks at a
 199     particular level, but after acquiring the ones at lower levels, and permits
 200     such calculations. It can be used to modify self.needed_locks, and by
 201     default it does nothing.
 202
 203     This function is only called if you have something already set in
 204     self.needed_locks for the level.
 205
 206     @param level: Locking level which is going to be locked
 207     @type level: member of ganeti.locking.LEVELS
 208
 209     """
 210
 211   def CheckPrereq(self):
 212     """Check prerequisites for this LU.
 213
 214     This method should check that the prerequisites for the execution
 215     of this LU are fulfilled. It can do internode communication, but
 216     it should be idempotent - no cluster or system changes are
 217     allowed.
 218
 219     The method should raise errors.OpPrereqError in case something is
 220     not fulfilled. Its return value is ignored.
 221
 222     This method should also update all the parameters of the opcode to
 223     their canonical form if it hasn't been done by ExpandNames before.
 224
 225     """
 226     if self.tasklets is not None:
 227       for (idx, tl) in enumerate(self.tasklets):
 228         logging.debug("Checking prerequisites for tasklet %s/%s",
 229                       idx + 1, len(self.tasklets))
 230         tl.CheckPrereq()
 231     else:
 232       raise NotImplementedError
 233
 234   def Exec(self, feedback_fn):
 235     """Execute the LU.
 236
 237     This method should implement the actual work. It should raise
 238     errors.OpExecError for failures that are somewhat dealt with in
 239     code, or expected.
 240
 241     """
 242     if self.tasklets is not None:
 243       for (idx, tl) in enumerate(self.tasklets):
 244         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 245         tl.Exec(feedback_fn)
 246     else:
 247       raise NotImplementedError
 248
 249   def BuildHooksEnv(self):
 250     """Build hooks environment for this LU.
 251
 252     This method should return a three-node tuple consisting of: a dict
 253     containing the environment that will be used for running the
 254     specific hook for this LU, a list of node names on which the hook
 255     should run before the execution, and a list of node names on which
 256     the hook should run after the execution.
 257
 258     The keys of the dict must not have 'GANETI_' prefixed as this will
 259     be handled in the hooks runner. Also note additional keys will be
 260     added by the hooks runner. If the LU doesn't define any
 261     environment, an empty dict (and not None) should be returned.
 262
 263     No nodes should be returned as an empty list (and not None).
 264
 265     Note that if the HPATH for a LU class is None, this function will
 266     not be called.
 267
 268     """
 269     raise NotImplementedError
 270
 271   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 272     """Notify the LU about the results of its hooks.
 273
 274     This method is called every time a hooks phase is executed, and notifies
 275     the Logical Unit about the hooks' result. The LU can then use it to alter
 276     its result based on the hooks.  By default the method does nothing and the
 277     previous result is passed back unchanged but any LU can define it if it
 278     wants to use the local cluster hook-scripts somehow.
 279
 280     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 281         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 282     @param hook_results: the results of the multi-node hooks rpc call
 283     @param feedback_fn: function used send feedback back to the caller
 284     @param lu_result: the previous Exec result this LU had, or None
 285         in the PRE phase
 286     @return: the new Exec result, based on the previous result
 287         and hook results
 288
 289     """
 290     # API must be kept, thus we ignore the unused argument and could
 291     # be a function warnings
 292     # pylint: disable-msg=W0613,R0201
 293     return lu_result
 294
 295   def _ExpandAndLockInstance(self):
 296     """Helper function to expand and lock an instance.
 297
 298     Many LUs that work on an instance take its name in self.op.instance_name
 299     and need to expand it and then declare the expanded name for locking. This
 300     function does it, and then updates self.op.instance_name to the expanded
 301     name. It also initializes needed_locks as a dict, if this hasn't been done
 302     before.
 303
 304     """
 305     if self.needed_locks is None:
 306       self.needed_locks = {}
 307     else:
 308       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 309         "_ExpandAndLockInstance called with instance-level locks set"
 310     self.op.instance_name = _ExpandInstanceName(self.cfg,
 311                                                 self.op.instance_name)
 312     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 313
 314   def _LockInstancesNodes(self, primary_only=False):
 315     """Helper function to declare instances' nodes for locking.
 316
 317     This function should be called after locking one or more instances to lock
 318     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 319     with all primary or secondary nodes for instances already locked and
 320     present in self.needed_locks[locking.LEVEL_INSTANCE].
 321
 322     It should be called from DeclareLocks, and for safety only works if
 323     self.recalculate_locks[locking.LEVEL_NODE] is set.
 324
 325     In the future it may grow parameters to just lock some instance's nodes, or
 326     to just lock primaries or secondary nodes, if needed.
 327
 328     If should be called in DeclareLocks in a way similar to::
 329
 330       if level == locking.LEVEL_NODE:
 331         self._LockInstancesNodes()
 332
 333     @type primary_only: boolean
 334     @param primary_only: only lock primary nodes of locked instances
 335
 336     """
 337     assert locking.LEVEL_NODE in self.recalculate_locks, \
 338       "_LockInstancesNodes helper function called with no nodes to recalculate"
 339
 340     # TODO: check if we're really been called with the instance locks held
 341
 342     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 343     # future we might want to have different behaviors depending on the value
 344     # of self.recalculate_locks[locking.LEVEL_NODE]
 345     wanted_nodes = []
 346     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 347       instance = self.context.cfg.GetInstanceInfo(instance_name)
 348       wanted_nodes.append(instance.primary_node)
 349       if not primary_only:
 350         wanted_nodes.extend(instance.secondary_nodes)
 351
 352     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 353       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 354     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 355       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 356
 357     del self.recalculate_locks[locking.LEVEL_NODE]
 358
 359
 360 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 361   """Simple LU which runs no hooks.
 362
 363   This LU is intended as a parent for other LogicalUnits which will
 364   run no hooks, in order to reduce duplicate code.
 365
 366   """
 367   HPATH = None
 368   HTYPE = None
 369
 370   def BuildHooksEnv(self):
 371     """Empty BuildHooksEnv for NoHooksLu.
 372
 373     This just raises an error.
 374
 375     """
 376     assert False, "BuildHooksEnv called for NoHooksLUs"
 377
 378
 379 class Tasklet:
 380   """Tasklet base class.
 381
 382   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 383   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 384   tasklets know nothing about locks.
 385
 386   Subclasses must follow these rules:
 387     - Implement CheckPrereq
 388     - Implement Exec
 389
 390   """
 391   def __init__(self, lu):
 392     self.lu = lu
 393
 394     # Shortcuts
 395     self.cfg = lu.cfg
 396     self.rpc = lu.rpc
 397
 398   def CheckPrereq(self):
 399     """Check prerequisites for this tasklets.
 400
 401     This method should check whether the prerequisites for the execution of
 402     this tasklet are fulfilled. It can do internode communication, but it
 403     should be idempotent - no cluster or system changes are allowed.
 404
 405     The method should raise errors.OpPrereqError in case something is not
 406     fulfilled. Its return value is ignored.
 407
 408     This method should also update all parameters to their canonical form if it
 409     hasn't been done before.
 410
 411     """
 412     raise NotImplementedError
 413
 414   def Exec(self, feedback_fn):
 415     """Execute the tasklet.
 416
 417     This method should implement the actual work. It should raise
 418     errors.OpExecError for failures that are somewhat dealt with in code, or
 419     expected.
 420
 421     """
 422     raise NotImplementedError
 423
 424
 425 def _GetWantedNodes(lu, nodes):
 426   """Returns list of checked and expanded node names.
 427
 428   @type lu: L{LogicalUnit}
 429   @param lu: the logical unit on whose behalf we execute
 430   @type nodes: list
 431   @param nodes: list of node names or None for all nodes
 432   @rtype: list
 433   @return: the list of nodes, sorted
 434   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 435
 436   """
 437   if not isinstance(nodes, list):
 438     raise errors.OpPrereqError("Invalid argument type 'nodes'",
 439                                errors.ECODE_INVAL)
 440
 441   if not nodes:
 442     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 443       " non-empty list of nodes whose name is to be expanded.")
 444
 445   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 446   return utils.NiceSort(wanted)
 447
 448
 449 def _GetWantedInstances(lu, instances):
 450   """Returns list of checked and expanded instance names.
 451
 452   @type lu: L{LogicalUnit}
 453   @param lu: the logical unit on whose behalf we execute
 454   @type instances: list
 455   @param instances: list of instance names or None for all instances
 456   @rtype: list
 457   @return: the list of instances, sorted
 458   @raise errors.OpPrereqError: if the instances parameter is wrong type
 459   @raise errors.OpPrereqError: if any of the passed instances is not found
 460
 461   """
 462   if not isinstance(instances, list):
 463     raise errors.OpPrereqError("Invalid argument type 'instances'",
 464                                errors.ECODE_INVAL)
 465
 466   if instances:
 467     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 468   else:
 469     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 470   return wanted
 471
 472
 473 def _CheckOutputFields(static, dynamic, selected):
 474   """Checks whether all selected fields are valid.
 475
 476   @type static: L{utils.FieldSet}
 477   @param static: static fields set
 478   @type dynamic: L{utils.FieldSet}
 479   @param dynamic: dynamic fields set
 480
 481   """
 482   f = utils.FieldSet()
 483   f.Extend(static)
 484   f.Extend(dynamic)
 485
 486   delta = f.NonMatching(selected)
 487   if delta:
 488     raise errors.OpPrereqError("Unknown output fields selected: %s"
 489                                % ",".join(delta), errors.ECODE_INVAL)
 490
 491
 492 def _CheckBooleanOpField(op, name):
 493   """Validates boolean opcode parameters.
 494
 495   This will ensure that an opcode parameter is either a boolean value,
 496   or None (but that it always exists).
 497
 498   """
 499   val = getattr(op, name, None)
 500   if not (val is None or isinstance(val, bool)):
 501     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 502                                (name, str(val)), errors.ECODE_INVAL)
 503   setattr(op, name, val)
 504
 505
 506 def _CheckGlobalHvParams(params):
 507   """Validates that given hypervisor params are not global ones.
 508
 509   This will ensure that instances don't get customised versions of
 510   global params.
 511
 512   """
 513   used_globals = constants.HVC_GLOBALS.intersection(params)
 514   if used_globals:
 515     msg = ("The following hypervisor parameters are global and cannot"
 516            " be customized at instance level, please modify them at"
 517            " cluster level: %s" % utils.CommaJoin(used_globals))
 518     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 519
 520
 521 def _CheckNodeOnline(lu, node):
 522   """Ensure that a given node is online.
 523
 524   @param lu: the LU on behalf of which we make the check
 525   @param node: the node to check
 526   @raise errors.OpPrereqError: if the node is offline
 527
 528   """
 529   if lu.cfg.GetNodeInfo(node).offline:
 530     raise errors.OpPrereqError("Can't use offline node %s" % node,
 531                                errors.ECODE_INVAL)
 532
 533
 534 def _CheckNodeNotDrained(lu, node):
 535   """Ensure that a given node is not drained.
 536
 537   @param lu: the LU on behalf of which we make the check
 538   @param node: the node to check
 539   @raise errors.OpPrereqError: if the node is drained
 540
 541   """
 542   if lu.cfg.GetNodeInfo(node).drained:
 543     raise errors.OpPrereqError("Can't use drained node %s" % node,
 544                                errors.ECODE_INVAL)
 545
 546
 547 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 548   """Ensure that a node supports a given OS.
 549
 550   @param lu: the LU on behalf of which we make the check
 551   @param node: the node to check
 552   @param os_name: the OS to query about
 553   @param force_variant: whether to ignore variant errors
 554   @raise errors.OpPrereqError: if the node is not supporting the OS
 555
 556   """
 557   result = lu.rpc.call_os_get(node, os_name)
 558   result.Raise("OS '%s' not in supported OS list for node %s" %
 559                (os_name, node),
 560                prereq=True, ecode=errors.ECODE_INVAL)
 561   if not force_variant:
 562     _CheckOSVariant(result.payload, os_name)
 563
 564
 565 def _RequireFileStorage():
 566   """Checks that file storage is enabled.
 567
 568   @raise errors.OpPrereqError: when file storage is disabled
 569
 570   """
 571   if not constants.ENABLE_FILE_STORAGE:
 572     raise errors.OpPrereqError("File storage disabled at configure time",
 573                                errors.ECODE_INVAL)
 574
 575
 576 def _CheckDiskTemplate(template):
 577   """Ensure a given disk template is valid.
 578
 579   """
 580   if template not in constants.DISK_TEMPLATES:
 581     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 582            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 583     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 584   if template == constants.DT_FILE:
 585     _RequireFileStorage()
 586
 587
 588 def _CheckStorageType(storage_type):
 589   """Ensure a given storage type is valid.
 590
 591   """
 592   if storage_type not in constants.VALID_STORAGE_TYPES:
 593     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 594                                errors.ECODE_INVAL)
 595   if storage_type == constants.ST_FILE:
 596     _RequireFileStorage()
 597
 598
 599
 600 def _CheckInstanceDown(lu, instance, reason):
 601   """Ensure that an instance is not running."""
 602   if instance.admin_up:
 603     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 604                                (instance.name, reason), errors.ECODE_STATE)
 605
 606   pnode = instance.primary_node
 607   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 608   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 609               prereq=True, ecode=errors.ECODE_ENVIRON)
 610
 611   if instance.name in ins_l.payload:
 612     raise errors.OpPrereqError("Instance %s is running, %s" %
 613                                (instance.name, reason), errors.ECODE_STATE)
 614
 615
 616 def _ExpandItemName(fn, name, kind):
 617   """Expand an item name.
 618
 619   @param fn: the function to use for expansion
 620   @param name: requested item name
 621   @param kind: text description ('Node' or 'Instance')
 622   @return: the resolved (full) name
 623   @raise errors.OpPrereqError: if the item is not found
 624
 625   """
 626   full_name = fn(name)
 627   if full_name is None:
 628     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 629                                errors.ECODE_NOENT)
 630   return full_name
 631
 632
 633 def _ExpandNodeName(cfg, name):
 634   """Wrapper over L{_ExpandItemName} for nodes."""
 635   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 636
 637
 638 def _ExpandInstanceName(cfg, name):
 639   """Wrapper over L{_ExpandItemName} for instance."""
 640   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 641
 642
 643 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 644                           memory, vcpus, nics, disk_template, disks,
 645                           bep, hvp, hypervisor_name):
 646   """Builds instance related env variables for hooks
 647
 648   This builds the hook environment from individual variables.
 649
 650   @type name: string
 651   @param name: the name of the instance
 652   @type primary_node: string
 653   @param primary_node: the name of the instance's primary node
 654   @type secondary_nodes: list
 655   @param secondary_nodes: list of secondary nodes as strings
 656   @type os_type: string
 657   @param os_type: the name of the instance's OS
 658   @type status: boolean
 659   @param status: the should_run status of the instance
 660   @type memory: string
 661   @param memory: the memory size of the instance
 662   @type vcpus: string
 663   @param vcpus: the count of VCPUs the instance has
 664   @type nics: list
 665   @param nics: list of tuples (ip, mac, mode, link) representing
 666       the NICs the instance has
 667   @type disk_template: string
 668   @param disk_template: the disk template of the instance
 669   @type disks: list
 670   @param disks: the list of (size, mode) pairs
 671   @type bep: dict
 672   @param bep: the backend parameters for the instance
 673   @type hvp: dict
 674   @param hvp: the hypervisor parameters for the instance
 675   @type hypervisor_name: string
 676   @param hypervisor_name: the hypervisor for the instance
 677   @rtype: dict
 678   @return: the hook environment for this instance
 679
 680   """
 681   if status:
 682     str_status = "up"
 683   else:
 684     str_status = "down"
 685   env = {
 686     "OP_TARGET": name,
 687     "INSTANCE_NAME": name,
 688     "INSTANCE_PRIMARY": primary_node,
 689     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 690     "INSTANCE_OS_TYPE": os_type,
 691     "INSTANCE_STATUS": str_status,
 692     "INSTANCE_MEMORY": memory,
 693     "INSTANCE_VCPUS": vcpus,
 694     "INSTANCE_DISK_TEMPLATE": disk_template,
 695     "INSTANCE_HYPERVISOR": hypervisor_name,
 696   }
 697
 698   if nics:
 699     nic_count = len(nics)
 700     for idx, (ip, mac, mode, link) in enumerate(nics):
 701       if ip is None:
 702         ip = ""
 703       env["INSTANCE_NIC%d_IP" % idx] = ip
 704       env["INSTANCE_NIC%d_MAC" % idx] = mac
 705       env["INSTANCE_NIC%d_MODE" % idx] = mode
 706       env["INSTANCE_NIC%d_LINK" % idx] = link
 707       if mode == constants.NIC_MODE_BRIDGED:
 708         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 709   else:
 710     nic_count = 0
 711
 712   env["INSTANCE_NIC_COUNT"] = nic_count
 713
 714   if disks:
 715     disk_count = len(disks)
 716     for idx, (size, mode) in enumerate(disks):
 717       env["INSTANCE_DISK%d_SIZE" % idx] = size
 718       env["INSTANCE_DISK%d_MODE" % idx] = mode
 719   else:
 720     disk_count = 0
 721
 722   env["INSTANCE_DISK_COUNT"] = disk_count
 723
 724   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 725     for key, value in source.items():
 726       env["INSTANCE_%s_%s" % (kind, key)] = value
 727
 728   return env
 729
 730
 731 def _NICListToTuple(lu, nics):
 732   """Build a list of nic information tuples.
 733
 734   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 735   value in LUQueryInstanceData.
 736
 737   @type lu:  L{LogicalUnit}
 738   @param lu: the logical unit on whose behalf we execute
 739   @type nics: list of L{objects.NIC}
 740   @param nics: list of nics to convert to hooks tuples
 741
 742   """
 743   hooks_nics = []
 744   c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
 745   for nic in nics:
 746     ip = nic.ip
 747     mac = nic.mac
 748     filled_params = objects.FillDict(c_nicparams, nic.nicparams)
 749     mode = filled_params[constants.NIC_MODE]
 750     link = filled_params[constants.NIC_LINK]
 751     hooks_nics.append((ip, mac, mode, link))
 752   return hooks_nics
 753
 754
 755 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 756   """Builds instance related env variables for hooks from an object.
 757
 758   @type lu: L{LogicalUnit}
 759   @param lu: the logical unit on whose behalf we execute
 760   @type instance: L{objects.Instance}
 761   @param instance: the instance for which we should build the
 762       environment
 763   @type override: dict
 764   @param override: dictionary with key/values that will override
 765       our values
 766   @rtype: dict
 767   @return: the hook environment dictionary
 768
 769   """
 770   cluster = lu.cfg.GetClusterInfo()
 771   bep = cluster.FillBE(instance)
 772   hvp = cluster.FillHV(instance)
 773   args = {
 774     'name': instance.name,
 775     'primary_node': instance.primary_node,
 776     'secondary_nodes': instance.secondary_nodes,
 777     'os_type': instance.os,
 778     'status': instance.admin_up,
 779     'memory': bep[constants.BE_MEMORY],
 780     'vcpus': bep[constants.BE_VCPUS],
 781     'nics': _NICListToTuple(lu, instance.nics),
 782     'disk_template': instance.disk_template,
 783     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 784     'bep': bep,
 785     'hvp': hvp,
 786     'hypervisor_name': instance.hypervisor,
 787   }
 788   if override:
 789     args.update(override)
 790   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 791
 792
 793 def _AdjustCandidatePool(lu, exceptions):
 794   """Adjust the candidate pool after node operations.
 795
 796   """
 797   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 798   if mod_list:
 799     lu.LogInfo("Promoted nodes to master candidate role: %s",
 800                utils.CommaJoin(node.name for node in mod_list))
 801     for name in mod_list:
 802       lu.context.ReaddNode(name)
 803   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 804   if mc_now > mc_max:
 805     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 806                (mc_now, mc_max))
 807
 808
 809 def _DecideSelfPromotion(lu, exceptions=None):
 810   """Decide whether I should promote myself as a master candidate.
 811
 812   """
 813   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 814   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 815   # the new node will increase mc_max with one, so:
 816   mc_should = min(mc_should + 1, cp_size)
 817   return mc_now < mc_should
 818
 819
 820 def _CheckNicsBridgesExist(lu, target_nics, target_node,
 821                                profile=constants.PP_DEFAULT):
 822   """Check that the brigdes needed by a list of nics exist.
 823
 824   """
 825   c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
 826   paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
 827                 for nic in target_nics]
 828   brlist = [params[constants.NIC_LINK] for params in paramslist
 829             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 830   if brlist:
 831     result = lu.rpc.call_bridges_exist(target_node, brlist)
 832     result.Raise("Error checking bridges on destination node '%s'" %
 833                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 834
 835
 836 def _CheckInstanceBridgesExist(lu, instance, node=None):
 837   """Check that the brigdes needed by an instance exist.
 838
 839   """
 840   if node is None:
 841     node = instance.primary_node
 842   _CheckNicsBridgesExist(lu, instance.nics, node)
 843
 844
 845 def _CheckOSVariant(os_obj, name):
 846   """Check whether an OS name conforms to the os variants specification.
 847
 848   @type os_obj: L{objects.OS}
 849   @param os_obj: OS object to check
 850   @type name: string
 851   @param name: OS name passed by the user, to check for validity
 852
 853   """
 854   if not os_obj.supported_variants:
 855     return
 856   try:
 857     variant = name.split("+", 1)[1]
 858   except IndexError:
 859     raise errors.OpPrereqError("OS name must include a variant",
 860                                errors.ECODE_INVAL)
 861
 862   if variant not in os_obj.supported_variants:
 863     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
 864
 865
 866 def _GetNodeInstancesInner(cfg, fn):
 867   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 868
 869
 870 def _GetNodeInstances(cfg, node_name):
 871   """Returns a list of all primary and secondary instances on a node.
 872
 873   """
 874
 875   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
 876
 877
 878 def _GetNodePrimaryInstances(cfg, node_name):
 879   """Returns primary instances on a node.
 880
 881   """
 882   return _GetNodeInstancesInner(cfg,
 883                                 lambda inst: node_name == inst.primary_node)
 884
 885
 886 def _GetNodeSecondaryInstances(cfg, node_name):
 887   """Returns secondary instances on a node.
 888
 889   """
 890   return _GetNodeInstancesInner(cfg,
 891                                 lambda inst: node_name in inst.secondary_nodes)
 892
 893
 894 def _GetStorageTypeArgs(cfg, storage_type):
 895   """Returns the arguments for a storage type.
 896
 897   """
 898   # Special case for file storage
 899   if storage_type == constants.ST_FILE:
 900     # storage.FileStorage wants a list of storage directories
 901     return [[cfg.GetFileStorageDir()]]
 902
 903   return []
 904
 905
 906 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
 907   faulty = []
 908
 909   for dev in instance.disks:
 910     cfg.SetDiskID(dev, node_name)
 911
 912   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
 913   result.Raise("Failed to get disk status from node %s" % node_name,
 914                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 915
 916   for idx, bdev_status in enumerate(result.payload):
 917     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
 918       faulty.append(idx)
 919
 920   return faulty
 921
 922
 923 def _FormatTimestamp(secs):
 924   """Formats a Unix timestamp with the local timezone.
 925
 926   """
 927   return time.strftime("%F %T %Z", time.gmtime(secs))
 928
 929
 930 class LUPostInitCluster(LogicalUnit):
 931   """Logical unit for running hooks after cluster initialization.
 932
 933   """
 934   HPATH = "cluster-init"
 935   HTYPE = constants.HTYPE_CLUSTER
 936   _OP_REQP = []
 937
 938   def BuildHooksEnv(self):
 939     """Build hooks env.
 940
 941     """
 942     env = {"OP_TARGET": self.cfg.GetClusterName()}
 943     mn = self.cfg.GetMasterNode()
 944     return env, [], [mn]
 945
 946   def CheckPrereq(self):
 947     """No prerequisites to check.
 948
 949     """
 950     return True
 951
 952   def Exec(self, feedback_fn):
 953     """Nothing to do.
 954
 955     """
 956     return True
 957
 958
 959 class LUDestroyCluster(LogicalUnit):
 960   """Logical unit for destroying the cluster.
 961
 962   """
 963   HPATH = "cluster-destroy"
 964   HTYPE = constants.HTYPE_CLUSTER
 965   _OP_REQP = []
 966
 967   def BuildHooksEnv(self):
 968     """Build hooks env.
 969
 970     """
 971     env = {"OP_TARGET": self.cfg.GetClusterName()}
 972     return env, [], []
 973
 974   def CheckPrereq(self):
 975     """Check prerequisites.
 976
 977     This checks whether the cluster is empty.
 978
 979     Any errors are signaled by raising errors.OpPrereqError.
 980
 981     """
 982     master = self.cfg.GetMasterNode()
 983
 984     nodelist = self.cfg.GetNodeList()
 985     if len(nodelist) != 1 or nodelist[0] != master:
 986       raise errors.OpPrereqError("There are still %d node(s) in"
 987                                  " this cluster." % (len(nodelist) - 1),
 988                                  errors.ECODE_INVAL)
 989     instancelist = self.cfg.GetInstanceList()
 990     if instancelist:
 991       raise errors.OpPrereqError("There are still %d instance(s) in"
 992                                  " this cluster." % len(instancelist),
 993                                  errors.ECODE_INVAL)
 994
 995   def Exec(self, feedback_fn):
 996     """Destroys the cluster.
 997
 998     """
 999     master = self.cfg.GetMasterNode()
1000     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1001
1002     # Run post hooks on master node before it's removed
1003     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1004     try:
1005       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1006     except:
1007       # pylint: disable-msg=W0702
1008       self.LogWarning("Errors occurred running hooks on %s" % master)
1009
1010     result = self.rpc.call_node_stop_master(master, False)
1011     result.Raise("Could not disable the master role")
1012
1013     if modify_ssh_setup:
1014       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1015       utils.CreateBackup(priv_key)
1016       utils.CreateBackup(pub_key)
1017
1018     return master
1019
1020
1021 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
1022                             warn_days=constants.SSL_CERT_EXPIRATION_WARN,
1023                             error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1024   """Verifies certificate details for LUVerifyCluster.
1025
1026   """
1027   if expired:
1028     msg = "Certificate %s is expired" % filename
1029
1030     if not_before is not None and not_after is not None:
1031       msg += (" (valid from %s to %s)" %
1032               (_FormatTimestamp(not_before),
1033                _FormatTimestamp(not_after)))
1034     elif not_before is not None:
1035       msg += " (valid from %s)" % _FormatTimestamp(not_before)
1036     elif not_after is not None:
1037       msg += " (valid until %s)" % _FormatTimestamp(not_after)
1038
1039     return (LUVerifyCluster.ETYPE_ERROR, msg)
1040
1041   elif not_before is not None and not_before > now:
1042     return (LUVerifyCluster.ETYPE_WARNING,
1043             "Certificate %s not yet valid (valid from %s)" %
1044             (filename, _FormatTimestamp(not_before)))
1045
1046   elif not_after is not None:
1047     remaining_days = int((not_after - now) / (24 * 3600))
1048
1049     msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1050
1051     if remaining_days <= error_days:
1052       return (LUVerifyCluster.ETYPE_ERROR, msg)
1053
1054     if remaining_days <= warn_days:
1055       return (LUVerifyCluster.ETYPE_WARNING, msg)
1056
1057   return (None, None)
1058
1059
1060 def _VerifyCertificate(filename):
1061   """Verifies a certificate for LUVerifyCluster.
1062
1063   @type filename: string
1064   @param filename: Path to PEM file
1065
1066   """
1067   try:
1068     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1069                                            utils.ReadFile(filename))
1070   except Exception, err: # pylint: disable-msg=W0703
1071     return (LUVerifyCluster.ETYPE_ERROR,
1072             "Failed to load X509 certificate %s: %s" % (filename, err))
1073
1074   # Depending on the pyOpenSSL version, this can just return (None, None)
1075   (not_before, not_after) = utils.GetX509CertValidity(cert)
1076
1077   return _VerifyCertificateInner(filename, cert.has_expired(),
1078                                  not_before, not_after, time.time())
1079
1080
1081 class LUVerifyCluster(LogicalUnit):
1082   """Verifies the cluster status.
1083
1084   """
1085   HPATH = "cluster-verify"
1086   HTYPE = constants.HTYPE_CLUSTER
1087   _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1088   REQ_BGL = False
1089
1090   TCLUSTER = "cluster"
1091   TNODE = "node"
1092   TINSTANCE = "instance"
1093
1094   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1095   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1096   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1097   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1098   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1099   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1100   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1101   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1102   ENODEDRBD = (TNODE, "ENODEDRBD")
1103   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1104   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1105   ENODEHV = (TNODE, "ENODEHV")
1106   ENODELVM = (TNODE, "ENODELVM")
1107   ENODEN1 = (TNODE, "ENODEN1")
1108   ENODENET = (TNODE, "ENODENET")
1109   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1110   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1111   ENODERPC = (TNODE, "ENODERPC")
1112   ENODESSH = (TNODE, "ENODESSH")
1113   ENODEVERSION = (TNODE, "ENODEVERSION")
1114   ENODESETUP = (TNODE, "ENODESETUP")
1115   ENODETIME = (TNODE, "ENODETIME")
1116
1117   ETYPE_FIELD = "code"
1118   ETYPE_ERROR = "ERROR"
1119   ETYPE_WARNING = "WARNING"
1120
1121   class NodeImage(object):
1122     """A class representing the logical and physical status of a node.
1123
1124     @ivar volumes: a structure as returned from
1125         L{ganeti.backend.GetVolumeList} (runtime)
1126     @ivar instances: a list of running instances (runtime)
1127     @ivar pinst: list of configured primary instances (config)
1128     @ivar sinst: list of configured secondary instances (config)
1129     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1130         of this node (config)
1131     @ivar mfree: free memory, as reported by hypervisor (runtime)
1132     @ivar dfree: free disk, as reported by the node (runtime)
1133     @ivar offline: the offline status (config)
1134     @type rpc_fail: boolean
1135     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1136         not whether the individual keys were correct) (runtime)
1137     @type lvm_fail: boolean
1138     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1139     @type hyp_fail: boolean
1140     @ivar hyp_fail: whether the RPC call didn't return the instance list
1141     @type ghost: boolean
1142     @ivar ghost: whether this is a known node or not (config)
1143
1144     """
1145     def __init__(self, offline=False):
1146       self.volumes = {}
1147       self.instances = []
1148       self.pinst = []
1149       self.sinst = []
1150       self.sbp = {}
1151       self.mfree = 0
1152       self.dfree = 0
1153       self.offline = offline
1154       self.rpc_fail = False
1155       self.lvm_fail = False
1156       self.hyp_fail = False
1157       self.ghost = False
1158
1159   def ExpandNames(self):
1160     self.needed_locks = {
1161       locking.LEVEL_NODE: locking.ALL_SET,
1162       locking.LEVEL_INSTANCE: locking.ALL_SET,
1163     }
1164     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1165
1166   def _Error(self, ecode, item, msg, *args, **kwargs):
1167     """Format an error message.
1168
1169     Based on the opcode's error_codes parameter, either format a
1170     parseable error code, or a simpler error string.
1171
1172     This must be called only from Exec and functions called from Exec.
1173
1174     """
1175     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1176     itype, etxt = ecode
1177     # first complete the msg
1178     if args:
1179       msg = msg % args
1180     # then format the whole message
1181     if self.op.error_codes:
1182       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1183     else:
1184       if item:
1185         item = " " + item
1186       else:
1187         item = ""
1188       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1189     # and finally report it via the feedback_fn
1190     self._feedback_fn("  - %s" % msg)
1191
1192   def _ErrorIf(self, cond, *args, **kwargs):
1193     """Log an error message if the passed condition is True.
1194
1195     """
1196     cond = bool(cond) or self.op.debug_simulate_errors
1197     if cond:
1198       self._Error(*args, **kwargs)
1199     # do not mark the operation as failed for WARN cases only
1200     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1201       self.bad = self.bad or cond
1202
1203   def _VerifyNode(self, ninfo, nresult):
1204     """Run multiple tests against a node.
1205
1206     Test list:
1207
1208       - compares ganeti version
1209       - checks vg existence and size > 20G
1210       - checks config file checksum
1211       - checks ssh to other nodes
1212
1213     @type ninfo: L{objects.Node}
1214     @param ninfo: the node to check
1215     @param nresult: the results from the node
1216     @rtype: boolean
1217     @return: whether overall this call was successful (and we can expect
1218          reasonable values in the respose)
1219
1220     """
1221     node = ninfo.name
1222     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1223
1224     # main result, nresult should be a non-empty dict
1225     test = not nresult or not isinstance(nresult, dict)
1226     _ErrorIf(test, self.ENODERPC, node,
1227                   "unable to verify node: no data returned")
1228     if test:
1229       return False
1230
1231     # compares ganeti version
1232     local_version = constants.PROTOCOL_VERSION
1233     remote_version = nresult.get("version", None)
1234     test = not (remote_version and
1235                 isinstance(remote_version, (list, tuple)) and
1236                 len(remote_version) == 2)
1237     _ErrorIf(test, self.ENODERPC, node,
1238              "connection to node returned invalid data")
1239     if test:
1240       return False
1241
1242     test = local_version != remote_version[0]
1243     _ErrorIf(test, self.ENODEVERSION, node,
1244              "incompatible protocol versions: master %s,"
1245              " node %s", local_version, remote_version[0])
1246     if test:
1247       return False
1248
1249     # node seems compatible, we can actually try to look into its results
1250
1251     # full package version
1252     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1253                   self.ENODEVERSION, node,
1254                   "software version mismatch: master %s, node %s",
1255                   constants.RELEASE_VERSION, remote_version[1],
1256                   code=self.ETYPE_WARNING)
1257
1258     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1259     if isinstance(hyp_result, dict):
1260       for hv_name, hv_result in hyp_result.iteritems():
1261         test = hv_result is not None
1262         _ErrorIf(test, self.ENODEHV, node,
1263                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1264
1265
1266     test = nresult.get(constants.NV_NODESETUP,
1267                            ["Missing NODESETUP results"])
1268     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1269              "; ".join(test))
1270
1271     return True
1272
1273   def _VerifyNodeTime(self, ninfo, nresult,
1274                       nvinfo_starttime, nvinfo_endtime):
1275     """Check the node time.
1276
1277     @type ninfo: L{objects.Node}
1278     @param ninfo: the node to check
1279     @param nresult: the remote results for the node
1280     @param nvinfo_starttime: the start time of the RPC call
1281     @param nvinfo_endtime: the end time of the RPC call
1282
1283     """
1284     node = ninfo.name
1285     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1286
1287     ntime = nresult.get(constants.NV_TIME, None)
1288     try:
1289       ntime_merged = utils.MergeTime(ntime)
1290     except (ValueError, TypeError):
1291       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1292       return
1293
1294     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1295       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1296     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1297       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1298     else:
1299       ntime_diff = None
1300
1301     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1302              "Node time diverges by at least %s from master node time",
1303              ntime_diff)
1304
1305   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1306     """Check the node time.
1307
1308     @type ninfo: L{objects.Node}
1309     @param ninfo: the node to check
1310     @param nresult: the remote results for the node
1311     @param vg_name: the configured VG name
1312
1313     """
1314     if vg_name is None:
1315       return
1316
1317     node = ninfo.name
1318     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1319
1320     # checks vg existence and size > 20G
1321     vglist = nresult.get(constants.NV_VGLIST, None)
1322     test = not vglist
1323     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1324     if not test:
1325       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1326                                             constants.MIN_VG_SIZE)
1327       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1328
1329     # check pv names
1330     pvlist = nresult.get(constants.NV_PVLIST, None)
1331     test = pvlist is None
1332     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1333     if not test:
1334       # check that ':' is not present in PV names, since it's a
1335       # special character for lvcreate (denotes the range of PEs to
1336       # use on the PV)
1337       for _, pvname, owner_vg in pvlist:
1338         test = ":" in pvname
1339         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1340                  " '%s' of VG '%s'", pvname, owner_vg)
1341
1342   def _VerifyNodeNetwork(self, ninfo, nresult):
1343     """Check the node time.
1344
1345     @type ninfo: L{objects.Node}
1346     @param ninfo: the node to check
1347     @param nresult: the remote results for the node
1348
1349     """
1350     node = ninfo.name
1351     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1352
1353     test = constants.NV_NODELIST not in nresult
1354     _ErrorIf(test, self.ENODESSH, node,
1355              "node hasn't returned node ssh connectivity data")
1356     if not test:
1357       if nresult[constants.NV_NODELIST]:
1358         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1359           _ErrorIf(True, self.ENODESSH, node,
1360                    "ssh communication with node '%s': %s", a_node, a_msg)
1361
1362     test = constants.NV_NODENETTEST not in nresult
1363     _ErrorIf(test, self.ENODENET, node,
1364              "node hasn't returned node tcp connectivity data")
1365     if not test:
1366       if nresult[constants.NV_NODENETTEST]:
1367         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1368         for anode in nlist:
1369           _ErrorIf(True, self.ENODENET, node,
1370                    "tcp communication with node '%s': %s",
1371                    anode, nresult[constants.NV_NODENETTEST][anode])
1372
1373     test = constants.NV_MASTERIP not in nresult
1374     _ErrorIf(test, self.ENODENET, node,
1375              "node hasn't returned node master IP reachability data")
1376     if not test:
1377       if not nresult[constants.NV_MASTERIP]:
1378         if node == self.master_node:
1379           msg = "the master node cannot reach the master IP (not configured?)"
1380         else:
1381           msg = "cannot reach the master IP"
1382         _ErrorIf(True, self.ENODENET, node, msg)
1383
1384
1385   def _VerifyInstance(self, instance, instanceconfig, node_image):
1386     """Verify an instance.
1387
1388     This function checks to see if the required block devices are
1389     available on the instance's node.
1390
1391     """
1392     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1393     node_current = instanceconfig.primary_node
1394
1395     node_vol_should = {}
1396     instanceconfig.MapLVsByNode(node_vol_should)
1397
1398     for node in node_vol_should:
1399       n_img = node_image[node]
1400       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1401         # ignore missing volumes on offline or broken nodes
1402         continue
1403       for volume in node_vol_should[node]:
1404         test = volume not in n_img.volumes
1405         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1406                  "volume %s missing on node %s", volume, node)
1407
1408     if instanceconfig.admin_up:
1409       pri_img = node_image[node_current]
1410       test = instance not in pri_img.instances and not pri_img.offline
1411       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1412                "instance not running on its primary node %s",
1413                node_current)
1414
1415     for node, n_img in node_image.items():
1416       if (not node == node_current):
1417         test = instance in n_img.instances
1418         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1419                  "instance should not run on node %s", node)
1420
1421   def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1422     """Verify if there are any unknown volumes in the cluster.
1423
1424     The .os, .swap and backup volumes are ignored. All other volumes are
1425     reported as unknown.
1426
1427     """
1428     for node, n_img in node_image.items():
1429       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1430         # skip non-healthy nodes
1431         continue
1432       for volume in n_img.volumes:
1433         test = (node not in node_vol_should or
1434                 volume not in node_vol_should[node])
1435         self._ErrorIf(test, self.ENODEORPHANLV, node,
1436                       "volume %s is unknown", volume)
1437
1438   def _VerifyOrphanInstances(self, instancelist, node_image):
1439     """Verify the list of running instances.
1440
1441     This checks what instances are running but unknown to the cluster.
1442
1443     """
1444     for node, n_img in node_image.items():
1445       for o_inst in n_img.instances:
1446         test = o_inst not in instancelist
1447         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1448                       "instance %s on node %s should not exist", o_inst, node)
1449
1450   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1451     """Verify N+1 Memory Resilience.
1452
1453     Check that if one single node dies we can still start all the
1454     instances it was primary for.
1455
1456     """
1457     for node, n_img in node_image.items():
1458       # This code checks that every node which is now listed as
1459       # secondary has enough memory to host all instances it is
1460       # supposed to should a single other node in the cluster fail.
1461       # FIXME: not ready for failover to an arbitrary node
1462       # FIXME: does not support file-backed instances
1463       # WARNING: we currently take into account down instances as well
1464       # as up ones, considering that even if they're down someone
1465       # might want to start them even in the event of a node failure.
1466       for prinode, instances in n_img.sbp.items():
1467         needed_mem = 0
1468         for instance in instances:
1469           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1470           if bep[constants.BE_AUTO_BALANCE]:
1471             needed_mem += bep[constants.BE_MEMORY]
1472         test = n_img.mfree < needed_mem
1473         self._ErrorIf(test, self.ENODEN1, node,
1474                       "not enough memory on to accommodate"
1475                       " failovers should peer node %s fail", prinode)
1476
1477   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1478                        master_files):
1479     """Verifies and computes the node required file checksums.
1480
1481     @type ninfo: L{objects.Node}
1482     @param ninfo: the node to check
1483     @param nresult: the remote results for the node
1484     @param file_list: required list of files
1485     @param local_cksum: dictionary of local files and their checksums
1486     @param master_files: list of files that only masters should have
1487
1488     """
1489     node = ninfo.name
1490     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1491
1492     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1493     test = not isinstance(remote_cksum, dict)
1494     _ErrorIf(test, self.ENODEFILECHECK, node,
1495              "node hasn't returned file checksum data")
1496     if test:
1497       return
1498
1499     for file_name in file_list:
1500       node_is_mc = ninfo.master_candidate
1501       must_have = (file_name not in master_files) or node_is_mc
1502       # missing
1503       test1 = file_name not in remote_cksum
1504       # invalid checksum
1505       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1506       # existing and good
1507       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1508       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1509                "file '%s' missing", file_name)
1510       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1511                "file '%s' has wrong checksum", file_name)
1512       # not candidate and this is not a must-have file
1513       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1514                "file '%s' should not exist on non master"
1515                " candidates (and the file is outdated)", file_name)
1516       # all good, except non-master/non-must have combination
1517       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1518                "file '%s' should not exist"
1519                " on non master candidates", file_name)
1520
1521   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1522     """Verifies and the node DRBD status.
1523
1524     @type ninfo: L{objects.Node}
1525     @param ninfo: the node to check
1526     @param nresult: the remote results for the node
1527     @param instanceinfo: the dict of instances
1528     @param drbd_map: the DRBD map as returned by
1529         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1530
1531     """
1532     node = ninfo.name
1533     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1534
1535     # compute the DRBD minors
1536     node_drbd = {}
1537     for minor, instance in drbd_map[node].items():
1538       test = instance not in instanceinfo
1539       _ErrorIf(test, self.ECLUSTERCFG, None,
1540                "ghost instance '%s' in temporary DRBD map", instance)
1541         # ghost instance should not be running, but otherwise we
1542         # don't give double warnings (both ghost instance and
1543         # unallocated minor in use)
1544       if test:
1545         node_drbd[minor] = (instance, False)
1546       else:
1547         instance = instanceinfo[instance]
1548         node_drbd[minor] = (instance.name, instance.admin_up)
1549
1550     # and now check them
1551     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1552     test = not isinstance(used_minors, (tuple, list))
1553     _ErrorIf(test, self.ENODEDRBD, node,
1554              "cannot parse drbd status file: %s", str(used_minors))
1555     if test:
1556       # we cannot check drbd status
1557       return
1558
1559     for minor, (iname, must_exist) in node_drbd.items():
1560       test = minor not in used_minors and must_exist
1561       _ErrorIf(test, self.ENODEDRBD, node,
1562                "drbd minor %d of instance %s is not active", minor, iname)
1563     for minor in used_minors:
1564       test = minor not in node_drbd
1565       _ErrorIf(test, self.ENODEDRBD, node,
1566                "unallocated drbd minor %d is in use", minor)
1567
1568   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1569     """Verifies and updates the node volume data.
1570
1571     This function will update a L{NodeImage}'s internal structures
1572     with data from the remote call.
1573
1574     @type ninfo: L{objects.Node}
1575     @param ninfo: the node to check
1576     @param nresult: the remote results for the node
1577     @param nimg: the node image object
1578     @param vg_name: the configured VG name
1579
1580     """
1581     node = ninfo.name
1582     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1583
1584     nimg.lvm_fail = True
1585     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1586     if vg_name is None:
1587       pass
1588     elif isinstance(lvdata, basestring):
1589       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1590                utils.SafeEncode(lvdata))
1591     elif not isinstance(lvdata, dict):
1592       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1593     else:
1594       nimg.volumes = lvdata
1595       nimg.lvm_fail = False
1596
1597   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1598     """Verifies and updates the node instance list.
1599
1600     If the listing was successful, then updates this node's instance
1601     list. Otherwise, it marks the RPC call as failed for the instance
1602     list key.
1603
1604     @type ninfo: L{objects.Node}
1605     @param ninfo: the node to check
1606     @param nresult: the remote results for the node
1607     @param nimg: the node image object
1608
1609     """
1610     idata = nresult.get(constants.NV_INSTANCELIST, None)
1611     test = not isinstance(idata, list)
1612     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1613                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1614     if test:
1615       nimg.hyp_fail = True
1616     else:
1617       nimg.instances = idata
1618
1619   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1620     """Verifies and computes a node information map
1621
1622     @type ninfo: L{objects.Node}
1623     @param ninfo: the node to check
1624     @param nresult: the remote results for the node
1625     @param nimg: the node image object
1626     @param vg_name: the configured VG name
1627
1628     """
1629     node = ninfo.name
1630     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1631
1632     # try to read free memory (from the hypervisor)
1633     hv_info = nresult.get(constants.NV_HVINFO, None)
1634     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1635     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1636     if not test:
1637       try:
1638         nimg.mfree = int(hv_info["memory_free"])
1639       except (ValueError, TypeError):
1640         _ErrorIf(True, self.ENODERPC, node,
1641                  "node returned invalid nodeinfo, check hypervisor")
1642
1643     # FIXME: devise a free space model for file based instances as well
1644     if vg_name is not None:
1645       test = (constants.NV_VGLIST not in nresult or
1646               vg_name not in nresult[constants.NV_VGLIST])
1647       _ErrorIf(test, self.ENODELVM, node,
1648                "node didn't return data for the volume group '%s'"
1649                " - it is either missing or broken", vg_name)
1650       if not test:
1651         try:
1652           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1653         except (ValueError, TypeError):
1654           _ErrorIf(True, self.ENODERPC, node,
1655                    "node returned invalid LVM info, check LVM status")
1656
1657   def CheckPrereq(self):
1658     """Check prerequisites.
1659
1660     Transform the list of checks we're going to skip into a set and check that
1661     all its members are valid.
1662
1663     """
1664     self.skip_set = frozenset(self.op.skip_checks)
1665     if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1666       raise errors.OpPrereqError("Invalid checks to be skipped specified",
1667                                  errors.ECODE_INVAL)
1668
1669   def BuildHooksEnv(self):
1670     """Build hooks env.
1671
1672     Cluster-Verify hooks just ran in the post phase and their failure makes
1673     the output be logged in the verify output and the verification to fail.
1674
1675     """
1676     all_nodes = self.cfg.GetNodeList()
1677     env = {
1678       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1679       }
1680     for node in self.cfg.GetAllNodesInfo().values():
1681       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1682
1683     return env, [], all_nodes
1684
1685   def Exec(self, feedback_fn):
1686     """Verify integrity of cluster, performing various test on nodes.
1687
1688     """
1689     self.bad = False
1690     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1691     verbose = self.op.verbose
1692     self._feedback_fn = feedback_fn
1693     feedback_fn("* Verifying global settings")
1694     for msg in self.cfg.VerifyConfig():
1695       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1696
1697     # Check the cluster certificates
1698     for cert_filename in constants.ALL_CERT_FILES:
1699       (errcode, msg) = _VerifyCertificate(cert_filename)
1700       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1701
1702     vg_name = self.cfg.GetVGName()
1703     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1704     cluster = self.cfg.GetClusterInfo()
1705     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1706     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1707     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1708     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1709                         for iname in instancelist)
1710     i_non_redundant = [] # Non redundant instances
1711     i_non_a_balanced = [] # Non auto-balanced instances
1712     n_offline = 0 # Count of offline nodes
1713     n_drained = 0 # Count of nodes being drained
1714     node_vol_should = {}
1715
1716     # FIXME: verify OS list
1717     # do local checksums
1718     master_files = [constants.CLUSTER_CONF_FILE]
1719     master_node = self.master_node = self.cfg.GetMasterNode()
1720     master_ip = self.cfg.GetMasterIP()
1721
1722     file_names = ssconf.SimpleStore().GetFileList()
1723     file_names.extend(constants.ALL_CERT_FILES)
1724     file_names.extend(master_files)
1725     if cluster.modify_etc_hosts:
1726       file_names.append(constants.ETC_HOSTS)
1727
1728     local_checksums = utils.FingerprintFiles(file_names)
1729
1730     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1731     node_verify_param = {
1732       constants.NV_FILELIST: file_names,
1733       constants.NV_NODELIST: [node.name for node in nodeinfo
1734                               if not node.offline],
1735       constants.NV_HYPERVISOR: hypervisors,
1736       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1737                                   node.secondary_ip) for node in nodeinfo
1738                                  if not node.offline],
1739       constants.NV_INSTANCELIST: hypervisors,
1740       constants.NV_VERSION: None,
1741       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1742       constants.NV_NODESETUP: None,
1743       constants.NV_TIME: None,
1744       constants.NV_MASTERIP: (master_node, master_ip),
1745       }
1746
1747     if vg_name is not None:
1748       node_verify_param[constants.NV_VGLIST] = None
1749       node_verify_param[constants.NV_LVLIST] = vg_name
1750       node_verify_param[constants.NV_PVLIST] = [vg_name]
1751       node_verify_param[constants.NV_DRBDLIST] = None
1752
1753     # Build our expected cluster state
1754     node_image = dict((node.name, self.NodeImage(offline=node.offline))
1755                       for node in nodeinfo)
1756
1757     for instance in instancelist:
1758       inst_config = instanceinfo[instance]
1759
1760       for nname in inst_config.all_nodes:
1761         if nname not in node_image:
1762           # ghost node
1763           gnode = self.NodeImage()
1764           gnode.ghost = True
1765           node_image[nname] = gnode
1766
1767       inst_config.MapLVsByNode(node_vol_should)
1768
1769       pnode = inst_config.primary_node
1770       node_image[pnode].pinst.append(instance)
1771
1772       for snode in inst_config.secondary_nodes:
1773         nimg = node_image[snode]
1774         nimg.sinst.append(instance)
1775         if pnode not in nimg.sbp:
1776           nimg.sbp[pnode] = []
1777         nimg.sbp[pnode].append(instance)
1778
1779     # At this point, we have the in-memory data structures complete,
1780     # except for the runtime information, which we'll gather next
1781
1782     # Due to the way our RPC system works, exact response times cannot be
1783     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1784     # time before and after executing the request, we can at least have a time
1785     # window.
1786     nvinfo_starttime = time.time()
1787     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1788                                            self.cfg.GetClusterName())
1789     nvinfo_endtime = time.time()
1790
1791     all_drbd_map = self.cfg.ComputeDRBDMap()
1792
1793     feedback_fn("* Verifying node status")
1794     for node_i in nodeinfo:
1795       node = node_i.name
1796       nimg = node_image[node]
1797
1798       if node_i.offline:
1799         if verbose:
1800           feedback_fn("* Skipping offline node %s" % (node,))
1801         n_offline += 1
1802         continue
1803
1804       if node == master_node:
1805         ntype = "master"
1806       elif node_i.master_candidate:
1807         ntype = "master candidate"
1808       elif node_i.drained:
1809         ntype = "drained"
1810         n_drained += 1
1811       else:
1812         ntype = "regular"
1813       if verbose:
1814         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1815
1816       msg = all_nvinfo[node].fail_msg
1817       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1818       if msg:
1819         nimg.rpc_fail = True
1820         continue
1821
1822       nresult = all_nvinfo[node].payload
1823
1824       nimg.call_ok = self._VerifyNode(node_i, nresult)
1825       self._VerifyNodeNetwork(node_i, nresult)
1826       self._VerifyNodeLVM(node_i, nresult, vg_name)
1827       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1828                             master_files)
1829       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1830       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1831
1832       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1833       self._UpdateNodeInstances(node_i, nresult, nimg)
1834       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1835
1836     feedback_fn("* Verifying instance status")
1837     for instance in instancelist:
1838       if verbose:
1839         feedback_fn("* Verifying instance %s" % instance)
1840       inst_config = instanceinfo[instance]
1841       self._VerifyInstance(instance, inst_config, node_image)
1842       inst_nodes_offline = []
1843
1844       pnode = inst_config.primary_node
1845       pnode_img = node_image[pnode]
1846       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1847                self.ENODERPC, pnode, "instance %s, connection to"
1848                " primary node failed", instance)
1849
1850       if pnode_img.offline:
1851         inst_nodes_offline.append(pnode)
1852
1853       # If the instance is non-redundant we cannot survive losing its primary
1854       # node, so we are not N+1 compliant. On the other hand we have no disk
1855       # templates with more than one secondary so that situation is not well
1856       # supported either.
1857       # FIXME: does not support file-backed instances
1858       if not inst_config.secondary_nodes:
1859         i_non_redundant.append(instance)
1860       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1861                instance, "instance has multiple secondary nodes: %s",
1862                utils.CommaJoin(inst_config.secondary_nodes),
1863                code=self.ETYPE_WARNING)
1864
1865       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1866         i_non_a_balanced.append(instance)
1867
1868       for snode in inst_config.secondary_nodes:
1869         s_img = node_image[snode]
1870         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1871                  "instance %s, connection to secondary node failed", instance)
1872
1873         if s_img.offline:
1874           inst_nodes_offline.append(snode)
1875
1876       # warn that the instance lives on offline nodes
1877       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1878                "instance lives on offline node(s) %s",
1879                utils.CommaJoin(inst_nodes_offline))
1880       # ... or ghost nodes
1881       for node in inst_config.all_nodes:
1882         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1883                  "instance lives on ghost node %s", node)
1884
1885     feedback_fn("* Verifying orphan volumes")
1886     self._VerifyOrphanVolumes(node_vol_should, node_image)
1887
1888     feedback_fn("* Verifying orphan instances")
1889     self._VerifyOrphanInstances(instancelist, node_image)
1890
1891     if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1892       feedback_fn("* Verifying N+1 Memory redundancy")
1893       self._VerifyNPlusOneMemory(node_image, instanceinfo)
1894
1895     feedback_fn("* Other Notes")
1896     if i_non_redundant:
1897       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1898                   % len(i_non_redundant))
1899
1900     if i_non_a_balanced:
1901       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1902                   % len(i_non_a_balanced))
1903
1904     if n_offline:
1905       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
1906
1907     if n_drained:
1908       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
1909
1910     return not self.bad
1911
1912   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1913     """Analyze the post-hooks' result
1914
1915     This method analyses the hook result, handles it, and sends some
1916     nicely-formatted feedback back to the user.
1917
1918     @param phase: one of L{constants.HOOKS_PHASE_POST} or
1919         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1920     @param hooks_results: the results of the multi-node hooks rpc call
1921     @param feedback_fn: function used send feedback back to the caller
1922     @param lu_result: previous Exec result
1923     @return: the new Exec result, based on the previous result
1924         and hook results
1925
1926     """
1927     # We only really run POST phase hooks, and are only interested in
1928     # their results
1929     if phase == constants.HOOKS_PHASE_POST:
1930       # Used to change hooks' output to proper indentation
1931       indent_re = re.compile('^', re.M)
1932       feedback_fn("* Hooks Results")
1933       assert hooks_results, "invalid result from hooks"
1934
1935       for node_name in hooks_results:
1936         res = hooks_results[node_name]
1937         msg = res.fail_msg
1938         test = msg and not res.offline
1939         self._ErrorIf(test, self.ENODEHOOKS, node_name,
1940                       "Communication failure in hooks execution: %s", msg)
1941         if res.offline or msg:
1942           # No need to investigate payload if node is offline or gave an error.
1943           # override manually lu_result here as _ErrorIf only
1944           # overrides self.bad
1945           lu_result = 1
1946           continue
1947         for script, hkr, output in res.payload:
1948           test = hkr == constants.HKR_FAIL
1949           self._ErrorIf(test, self.ENODEHOOKS, node_name,
1950                         "Script %s failed, output:", script)
1951           if test:
1952             output = indent_re.sub('      ', output)
1953             feedback_fn("%s" % output)
1954             lu_result = 0
1955
1956       return lu_result
1957
1958
1959 class LUVerifyDisks(NoHooksLU):
1960   """Verifies the cluster disks status.
1961
1962   """
1963   _OP_REQP = []
1964   REQ_BGL = False
1965
1966   def ExpandNames(self):
1967     self.needed_locks = {
1968       locking.LEVEL_NODE: locking.ALL_SET,
1969       locking.LEVEL_INSTANCE: locking.ALL_SET,
1970     }
1971     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1972
1973   def CheckPrereq(self):
1974     """Check prerequisites.
1975
1976     This has no prerequisites.
1977
1978     """
1979     pass
1980
1981   def Exec(self, feedback_fn):
1982     """Verify integrity of cluster disks.
1983
1984     @rtype: tuple of three items
1985     @return: a tuple of (dict of node-to-node_error, list of instances
1986         which need activate-disks, dict of instance: (node, volume) for
1987         missing volumes
1988
1989     """
1990     result = res_nodes, res_instances, res_missing = {}, [], {}
1991
1992     vg_name = self.cfg.GetVGName()
1993     nodes = utils.NiceSort(self.cfg.GetNodeList())
1994     instances = [self.cfg.GetInstanceInfo(name)
1995                  for name in self.cfg.GetInstanceList()]
1996
1997     nv_dict = {}
1998     for inst in instances:
1999       inst_lvs = {}
2000       if (not inst.admin_up or
2001           inst.disk_template not in constants.DTS_NET_MIRROR):
2002         continue
2003       inst.MapLVsByNode(inst_lvs)
2004       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2005       for node, vol_list in inst_lvs.iteritems():
2006         for vol in vol_list:
2007           nv_dict[(node, vol)] = inst
2008
2009     if not nv_dict:
2010       return result
2011
2012     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2013
2014     for node in nodes:
2015       # node_volume
2016       node_res = node_lvs[node]
2017       if node_res.offline:
2018         continue
2019       msg = node_res.fail_msg
2020       if msg:
2021         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2022         res_nodes[node] = msg
2023         continue
2024
2025       lvs = node_res.payload
2026       for lv_name, (_, _, lv_online) in lvs.items():
2027         inst = nv_dict.pop((node, lv_name), None)
2028         if (not lv_online and inst is not None
2029             and inst.name not in res_instances):
2030           res_instances.append(inst.name)
2031
2032     # any leftover items in nv_dict are missing LVs, let's arrange the
2033     # data better
2034     for key, inst in nv_dict.iteritems():
2035       if inst.name not in res_missing:
2036         res_missing[inst.name] = []
2037       res_missing[inst.name].append(key)
2038
2039     return result
2040
2041
2042 class LURepairDiskSizes(NoHooksLU):
2043   """Verifies the cluster disks sizes.
2044
2045   """
2046   _OP_REQP = ["instances"]
2047   REQ_BGL = False
2048
2049   def ExpandNames(self):
2050     if not isinstance(self.op.instances, list):
2051       raise errors.OpPrereqError("Invalid argument type 'instances'",
2052                                  errors.ECODE_INVAL)
2053
2054     if self.op.instances:
2055       self.wanted_names = []
2056       for name in self.op.instances:
2057         full_name = _ExpandInstanceName(self.cfg, name)
2058         self.wanted_names.append(full_name)
2059       self.needed_locks = {
2060         locking.LEVEL_NODE: [],
2061         locking.LEVEL_INSTANCE: self.wanted_names,
2062         }
2063       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2064     else:
2065       self.wanted_names = None
2066       self.needed_locks = {
2067         locking.LEVEL_NODE: locking.ALL_SET,
2068         locking.LEVEL_INSTANCE: locking.ALL_SET,
2069         }
2070     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2071
2072   def DeclareLocks(self, level):
2073     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2074       self._LockInstancesNodes(primary_only=True)
2075
2076   def CheckPrereq(self):
2077     """Check prerequisites.
2078
2079     This only checks the optional instance list against the existing names.
2080
2081     """
2082     if self.wanted_names is None:
2083       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2084
2085     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2086                              in self.wanted_names]
2087
2088   def _EnsureChildSizes(self, disk):
2089     """Ensure children of the disk have the needed disk size.
2090
2091     This is valid mainly for DRBD8 and fixes an issue where the
2092     children have smaller disk size.
2093
2094     @param disk: an L{ganeti.objects.Disk} object
2095
2096     """
2097     if disk.dev_type == constants.LD_DRBD8:
2098       assert disk.children, "Empty children for DRBD8?"
2099       fchild = disk.children[0]
2100       mismatch = fchild.size < disk.size
2101       if mismatch:
2102         self.LogInfo("Child disk has size %d, parent %d, fixing",
2103                      fchild.size, disk.size)
2104         fchild.size = disk.size
2105
2106       # and we recurse on this child only, not on the metadev
2107       return self._EnsureChildSizes(fchild) or mismatch
2108     else:
2109       return False
2110
2111   def Exec(self, feedback_fn):
2112     """Verify the size of cluster disks.
2113
2114     """
2115     # TODO: check child disks too
2116     # TODO: check differences in size between primary/secondary nodes
2117     per_node_disks = {}
2118     for instance in self.wanted_instances:
2119       pnode = instance.primary_node
2120       if pnode not in per_node_disks:
2121         per_node_disks[pnode] = []
2122       for idx, disk in enumerate(instance.disks):
2123         per_node_disks[pnode].append((instance, idx, disk))
2124
2125     changed = []
2126     for node, dskl in per_node_disks.items():
2127       newl = [v[2].Copy() for v in dskl]
2128       for dsk in newl:
2129         self.cfg.SetDiskID(dsk, node)
2130       result = self.rpc.call_blockdev_getsizes(node, newl)
2131       if result.fail_msg:
2132         self.LogWarning("Failure in blockdev_getsizes call to node"
2133                         " %s, ignoring", node)
2134         continue
2135       if len(result.data) != len(dskl):
2136         self.LogWarning("Invalid result from node %s, ignoring node results",
2137                         node)
2138         continue
2139       for ((instance, idx, disk), size) in zip(dskl, result.data):
2140         if size is None:
2141           self.LogWarning("Disk %d of instance %s did not return size"
2142                           " information, ignoring", idx, instance.name)
2143           continue
2144         if not isinstance(size, (int, long)):
2145           self.LogWarning("Disk %d of instance %s did not return valid"
2146                           " size information, ignoring", idx, instance.name)
2147           continue
2148         size = size >> 20
2149         if size != disk.size:
2150           self.LogInfo("Disk %d of instance %s has mismatched size,"
2151                        " correcting: recorded %d, actual %d", idx,
2152                        instance.name, disk.size, size)
2153           disk.size = size
2154           self.cfg.Update(instance, feedback_fn)
2155           changed.append((instance.name, idx, size))
2156         if self._EnsureChildSizes(disk):
2157           self.cfg.Update(instance, feedback_fn)
2158           changed.append((instance.name, idx, disk.size))
2159     return changed
2160
2161
2162 class LURenameCluster(LogicalUnit):
2163   """Rename the cluster.
2164
2165   """
2166   HPATH = "cluster-rename"
2167   HTYPE = constants.HTYPE_CLUSTER
2168   _OP_REQP = ["name"]
2169
2170   def BuildHooksEnv(self):
2171     """Build hooks env.
2172
2173     """
2174     env = {
2175       "OP_TARGET": self.cfg.GetClusterName(),
2176       "NEW_NAME": self.op.name,
2177       }
2178     mn = self.cfg.GetMasterNode()
2179     all_nodes = self.cfg.GetNodeList()
2180     return env, [mn], all_nodes
2181
2182   def CheckPrereq(self):
2183     """Verify that the passed name is a valid one.
2184
2185     """
2186     hostname = utils.GetHostInfo(self.op.name)
2187
2188     new_name = hostname.name
2189     self.ip = new_ip = hostname.ip
2190     old_name = self.cfg.GetClusterName()
2191     old_ip = self.cfg.GetMasterIP()
2192     if new_name == old_name and new_ip == old_ip:
2193       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2194                                  " cluster has changed",
2195                                  errors.ECODE_INVAL)
2196     if new_ip != old_ip:
2197       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2198         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2199                                    " reachable on the network. Aborting." %
2200                                    new_ip, errors.ECODE_NOTUNIQUE)
2201
2202     self.op.name = new_name
2203
2204   def Exec(self, feedback_fn):
2205     """Rename the cluster.
2206
2207     """
2208     clustername = self.op.name
2209     ip = self.ip
2210
2211     # shutdown the master IP
2212     master = self.cfg.GetMasterNode()
2213     result = self.rpc.call_node_stop_master(master, False)
2214     result.Raise("Could not disable the master role")
2215
2216     try:
2217       cluster = self.cfg.GetClusterInfo()
2218       cluster.cluster_name = clustername
2219       cluster.master_ip = ip
2220       self.cfg.Update(cluster, feedback_fn)
2221
2222       # update the known hosts file
2223       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2224       node_list = self.cfg.GetNodeList()
2225       try:
2226         node_list.remove(master)
2227       except ValueError:
2228         pass
2229       result = self.rpc.call_upload_file(node_list,
2230                                          constants.SSH_KNOWN_HOSTS_FILE)
2231       for to_node, to_result in result.iteritems():
2232         msg = to_result.fail_msg
2233         if msg:
2234           msg = ("Copy of file %s to node %s failed: %s" %
2235                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2236           self.proc.LogWarning(msg)
2237
2238     finally:
2239       result = self.rpc.call_node_start_master(master, False, False)
2240       msg = result.fail_msg
2241       if msg:
2242         self.LogWarning("Could not re-enable the master role on"
2243                         " the master, please restart manually: %s", msg)
2244
2245
2246 def _RecursiveCheckIfLVMBased(disk):
2247   """Check if the given disk or its children are lvm-based.
2248
2249   @type disk: L{objects.Disk}
2250   @param disk: the disk to check
2251   @rtype: boolean
2252   @return: boolean indicating whether a LD_LV dev_type was found or not
2253
2254   """
2255   if disk.children:
2256     for chdisk in disk.children:
2257       if _RecursiveCheckIfLVMBased(chdisk):
2258         return True
2259   return disk.dev_type == constants.LD_LV
2260
2261
2262 class LUSetClusterParams(LogicalUnit):
2263   """Change the parameters of the cluster.
2264
2265   """
2266   HPATH = "cluster-modify"
2267   HTYPE = constants.HTYPE_CLUSTER
2268   _OP_REQP = []
2269   REQ_BGL = False
2270
2271   def CheckArguments(self):
2272     """Check parameters
2273
2274     """
2275     for attr in ["candidate_pool_size",
2276                  "uid_pool", "add_uids", "remove_uids"]:
2277       if not hasattr(self.op, attr):
2278         setattr(self.op, attr, None)
2279
2280     if self.op.candidate_pool_size is not None:
2281       try:
2282         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2283       except (ValueError, TypeError), err:
2284         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2285                                    str(err), errors.ECODE_INVAL)
2286       if self.op.candidate_pool_size < 1:
2287         raise errors.OpPrereqError("At least one master candidate needed",
2288                                    errors.ECODE_INVAL)
2289
2290     _CheckBooleanOpField(self.op, "maintain_node_health")
2291
2292     if self.op.uid_pool:
2293       uidpool.CheckUidPool(self.op.uid_pool)
2294
2295     if self.op.add_uids:
2296       uidpool.CheckUidPool(self.op.add_uids)
2297
2298     if self.op.remove_uids:
2299       uidpool.CheckUidPool(self.op.remove_uids)
2300
2301   def ExpandNames(self):
2302     # FIXME: in the future maybe other cluster params won't require checking on
2303     # all nodes to be modified.
2304     self.needed_locks = {
2305       locking.LEVEL_NODE: locking.ALL_SET,
2306     }
2307     self.share_locks[locking.LEVEL_NODE] = 1
2308
2309   def BuildHooksEnv(self):
2310     """Build hooks env.
2311
2312     """
2313     env = {
2314       "OP_TARGET": self.cfg.GetClusterName(),
2315       "NEW_VG_NAME": self.op.vg_name,
2316       }
2317     mn = self.cfg.GetMasterNode()
2318     return env, [mn], [mn]
2319
2320   def CheckPrereq(self):
2321     """Check prerequisites.
2322
2323     This checks whether the given params don't conflict and
2324     if the given volume group is valid.
2325
2326     """
2327     if self.op.vg_name is not None and not self.op.vg_name:
2328       instances = self.cfg.GetAllInstancesInfo().values()
2329       for inst in instances:
2330         for disk in inst.disks:
2331           if _RecursiveCheckIfLVMBased(disk):
2332             raise errors.OpPrereqError("Cannot disable lvm storage while"
2333                                        " lvm-based instances exist",
2334                                        errors.ECODE_INVAL)
2335
2336     node_list = self.acquired_locks[locking.LEVEL_NODE]
2337
2338     # if vg_name not None, checks given volume group on all nodes
2339     if self.op.vg_name:
2340       vglist = self.rpc.call_vg_list(node_list)
2341       for node in node_list:
2342         msg = vglist[node].fail_msg
2343         if msg:
2344           # ignoring down node
2345           self.LogWarning("Error while gathering data on node %s"
2346                           " (ignoring node): %s", node, msg)
2347           continue
2348         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2349                                               self.op.vg_name,
2350                                               constants.MIN_VG_SIZE)
2351         if vgstatus:
2352           raise errors.OpPrereqError("Error on node '%s': %s" %
2353                                      (node, vgstatus), errors.ECODE_ENVIRON)
2354
2355     self.cluster = cluster = self.cfg.GetClusterInfo()
2356     # validate params changes
2357     if self.op.beparams:
2358       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2359       self.new_beparams = objects.FillDict(
2360         cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2361
2362     if self.op.nicparams:
2363       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2364       self.new_nicparams = objects.FillDict(
2365         cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2366       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2367       nic_errors = []
2368
2369       # check all instances for consistency
2370       for instance in self.cfg.GetAllInstancesInfo().values():
2371         for nic_idx, nic in enumerate(instance.nics):
2372           params_copy = copy.deepcopy(nic.nicparams)
2373           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2374
2375           # check parameter syntax
2376           try:
2377             objects.NIC.CheckParameterSyntax(params_filled)
2378           except errors.ConfigurationError, err:
2379             nic_errors.append("Instance %s, nic/%d: %s" %
2380                               (instance.name, nic_idx, err))
2381
2382           # if we're moving instances to routed, check that they have an ip
2383           target_mode = params_filled[constants.NIC_MODE]
2384           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2385             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2386                               (instance.name, nic_idx))
2387       if nic_errors:
2388         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2389                                    "\n".join(nic_errors))
2390
2391     # hypervisor list/parameters
2392     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2393     if self.op.hvparams:
2394       if not isinstance(self.op.hvparams, dict):
2395         raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2396                                    errors.ECODE_INVAL)
2397       for hv_name, hv_dict in self.op.hvparams.items():
2398         if hv_name not in self.new_hvparams:
2399           self.new_hvparams[hv_name] = hv_dict
2400         else:
2401           self.new_hvparams[hv_name].update(hv_dict)
2402
2403     # os hypervisor parameters
2404     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2405     if self.op.os_hvp:
2406       if not isinstance(self.op.os_hvp, dict):
2407         raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2408                                    errors.ECODE_INVAL)
2409       for os_name, hvs in self.op.os_hvp.items():
2410         if not isinstance(hvs, dict):
2411           raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2412                                       " input"), errors.ECODE_INVAL)
2413         if os_name not in self.new_os_hvp:
2414           self.new_os_hvp[os_name] = hvs
2415         else:
2416           for hv_name, hv_dict in hvs.items():
2417             if hv_name not in self.new_os_hvp[os_name]:
2418               self.new_os_hvp[os_name][hv_name] = hv_dict
2419             else:
2420               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2421
2422     # changes to the hypervisor list
2423     if self.op.enabled_hypervisors is not None:
2424       self.hv_list = self.op.enabled_hypervisors
2425       if not self.hv_list:
2426         raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2427                                    " least one member",
2428                                    errors.ECODE_INVAL)
2429       invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2430       if invalid_hvs:
2431         raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2432                                    " entries: %s" %
2433                                    utils.CommaJoin(invalid_hvs),
2434                                    errors.ECODE_INVAL)
2435       for hv in self.hv_list:
2436         # if the hypervisor doesn't already exist in the cluster
2437         # hvparams, we initialize it to empty, and then (in both
2438         # cases) we make sure to fill the defaults, as we might not
2439         # have a complete defaults list if the hypervisor wasn't
2440         # enabled before
2441         if hv not in new_hvp:
2442           new_hvp[hv] = {}
2443         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2444         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2445     else:
2446       self.hv_list = cluster.enabled_hypervisors
2447
2448     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2449       # either the enabled list has changed, or the parameters have, validate
2450       for hv_name, hv_params in self.new_hvparams.items():
2451         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2452             (self.op.enabled_hypervisors and
2453              hv_name in self.op.enabled_hypervisors)):
2454           # either this is a new hypervisor, or its parameters have changed
2455           hv_class = hypervisor.GetHypervisor(hv_name)
2456           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2457           hv_class.CheckParameterSyntax(hv_params)
2458           _CheckHVParams(self, node_list, hv_name, hv_params)
2459
2460     if self.op.os_hvp:
2461       # no need to check any newly-enabled hypervisors, since the
2462       # defaults have already been checked in the above code-block
2463       for os_name, os_hvp in self.new_os_hvp.items():
2464         for hv_name, hv_params in os_hvp.items():
2465           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2466           # we need to fill in the new os_hvp on top of the actual hv_p
2467           cluster_defaults = self.new_hvparams.get(hv_name, {})
2468           new_osp = objects.FillDict(cluster_defaults, hv_params)
2469           hv_class = hypervisor.GetHypervisor(hv_name)
2470           hv_class.CheckParameterSyntax(new_osp)
2471           _CheckHVParams(self, node_list, hv_name, new_osp)
2472
2473
2474   def Exec(self, feedback_fn):
2475     """Change the parameters of the cluster.
2476
2477     """
2478     if self.op.vg_name is not None:
2479       new_volume = self.op.vg_name
2480       if not new_volume:
2481         new_volume = None
2482       if new_volume != self.cfg.GetVGName():
2483         self.cfg.SetVGName(new_volume)
2484       else:
2485         feedback_fn("Cluster LVM configuration already in desired"
2486                     " state, not changing")
2487     if self.op.hvparams:
2488       self.cluster.hvparams = self.new_hvparams
2489     if self.op.os_hvp:
2490       self.cluster.os_hvp = self.new_os_hvp
2491     if self.op.enabled_hypervisors is not None:
2492       self.cluster.hvparams = self.new_hvparams
2493       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2494     if self.op.beparams:
2495       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2496     if self.op.nicparams:
2497       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2498
2499     if self.op.candidate_pool_size is not None:
2500       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2501       # we need to update the pool size here, otherwise the save will fail
2502       _AdjustCandidatePool(self, [])
2503
2504     if self.op.maintain_node_health is not None:
2505       self.cluster.maintain_node_health = self.op.maintain_node_health
2506
2507     if self.op.add_uids is not None:
2508       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2509
2510     if self.op.remove_uids is not None:
2511       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2512
2513     if self.op.uid_pool is not None:
2514       self.cluster.uid_pool = self.op.uid_pool
2515
2516     self.cfg.Update(self.cluster, feedback_fn)
2517
2518
2519 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2520   """Distribute additional files which are part of the cluster configuration.
2521
2522   ConfigWriter takes care of distributing the config and ssconf files, but
2523   there are more files which should be distributed to all nodes. This function
2524   makes sure those are copied.
2525
2526   @param lu: calling logical unit
2527   @param additional_nodes: list of nodes not in the config to distribute to
2528
2529   """
2530   # 1. Gather target nodes
2531   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2532   dist_nodes = lu.cfg.GetOnlineNodeList()
2533   if additional_nodes is not None:
2534     dist_nodes.extend(additional_nodes)
2535   if myself.name in dist_nodes:
2536     dist_nodes.remove(myself.name)
2537
2538   # 2. Gather files to distribute
2539   dist_files = set([constants.ETC_HOSTS,
2540                     constants.SSH_KNOWN_HOSTS_FILE,
2541                     constants.RAPI_CERT_FILE,
2542                     constants.RAPI_USERS_FILE,
2543                     constants.CONFD_HMAC_KEY,
2544                    ])
2545
2546   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2547   for hv_name in enabled_hypervisors:
2548     hv_class = hypervisor.GetHypervisor(hv_name)
2549     dist_files.update(hv_class.GetAncillaryFiles())
2550
2551   # 3. Perform the files upload
2552   for fname in dist_files:
2553     if os.path.exists(fname):
2554       result = lu.rpc.call_upload_file(dist_nodes, fname)
2555       for to_node, to_result in result.items():
2556         msg = to_result.fail_msg
2557         if msg:
2558           msg = ("Copy of file %s to node %s failed: %s" %
2559                  (fname, to_node, msg))
2560           lu.proc.LogWarning(msg)
2561
2562
2563 class LURedistributeConfig(NoHooksLU):
2564   """Force the redistribution of cluster configuration.
2565
2566   This is a very simple LU.
2567
2568   """
2569   _OP_REQP = []
2570   REQ_BGL = False
2571
2572   def ExpandNames(self):
2573     self.needed_locks = {
2574       locking.LEVEL_NODE: locking.ALL_SET,
2575     }
2576     self.share_locks[locking.LEVEL_NODE] = 1
2577
2578   def CheckPrereq(self):
2579     """Check prerequisites.
2580
2581     """
2582
2583   def Exec(self, feedback_fn):
2584     """Redistribute the configuration.
2585
2586     """
2587     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2588     _RedistributeAncillaryFiles(self)
2589
2590
2591 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2592   """Sleep and poll for an instance's disk to sync.
2593
2594   """
2595   if not instance.disks or disks is not None and not disks:
2596     return True
2597
2598   disks = _ExpandCheckDisks(instance, disks)
2599
2600   if not oneshot:
2601     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2602
2603   node = instance.primary_node
2604
2605   for dev in disks:
2606     lu.cfg.SetDiskID(dev, node)
2607
2608   # TODO: Convert to utils.Retry
2609
2610   retries = 0
2611   degr_retries = 10 # in seconds, as we sleep 1 second each time
2612   while True:
2613     max_time = 0
2614     done = True
2615     cumul_degraded = False
2616     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2617     msg = rstats.fail_msg
2618     if msg:
2619       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2620       retries += 1
2621       if retries >= 10:
2622         raise errors.RemoteError("Can't contact node %s for mirror data,"
2623                                  " aborting." % node)
2624       time.sleep(6)
2625       continue
2626     rstats = rstats.payload
2627     retries = 0
2628     for i, mstat in enumerate(rstats):
2629       if mstat is None:
2630         lu.LogWarning("Can't compute data for node %s/%s",
2631                            node, disks[i].iv_name)
2632         continue
2633
2634       cumul_degraded = (cumul_degraded or
2635                         (mstat.is_degraded and mstat.sync_percent is None))
2636       if mstat.sync_percent is not None:
2637         done = False
2638         if mstat.estimated_time is not None:
2639           rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2640           max_time = mstat.estimated_time
2641         else:
2642           rem_time = "no time estimate"
2643         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2644                         (disks[i].iv_name, mstat.sync_percent, rem_time))
2645
2646     # if we're done but degraded, let's do a few small retries, to
2647     # make sure we see a stable and not transient situation; therefore
2648     # we force restart of the loop
2649     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2650       logging.info("Degraded disks found, %d retries left", degr_retries)
2651       degr_retries -= 1
2652       time.sleep(1)
2653       continue
2654
2655     if done or oneshot:
2656       break
2657
2658     time.sleep(min(60, max_time))
2659
2660   if done:
2661     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2662   return not cumul_degraded
2663
2664
2665 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2666   """Check that mirrors are not degraded.
2667
2668   The ldisk parameter, if True, will change the test from the
2669   is_degraded attribute (which represents overall non-ok status for
2670   the device(s)) to the ldisk (representing the local storage status).
2671
2672   """
2673   lu.cfg.SetDiskID(dev, node)
2674
2675   result = True
2676
2677   if on_primary or dev.AssembleOnSecondary():
2678     rstats = lu.rpc.call_blockdev_find(node, dev)
2679     msg = rstats.fail_msg
2680     if msg:
2681       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2682       result = False
2683     elif not rstats.payload:
2684       lu.LogWarning("Can't find disk on node %s", node)
2685       result = False
2686     else:
2687       if ldisk:
2688         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2689       else:
2690         result = result and not rstats.payload.is_degraded
2691
2692   if dev.children:
2693     for child in dev.children:
2694       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2695
2696   return result
2697
2698
2699 class LUDiagnoseOS(NoHooksLU):
2700   """Logical unit for OS diagnose/query.
2701
2702   """
2703   _OP_REQP = ["output_fields", "names"]
2704   REQ_BGL = False
2705   _FIELDS_STATIC = utils.FieldSet()
2706   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2707   # Fields that need calculation of global os validity
2708   _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2709
2710   def ExpandNames(self):
2711     if self.op.names:
2712       raise errors.OpPrereqError("Selective OS query not supported",
2713                                  errors.ECODE_INVAL)
2714
2715     _CheckOutputFields(static=self._FIELDS_STATIC,
2716                        dynamic=self._FIELDS_DYNAMIC,
2717                        selected=self.op.output_fields)
2718
2719     # Lock all nodes, in shared mode
2720     # Temporary removal of locks, should be reverted later
2721     # TODO: reintroduce locks when they are lighter-weight
2722     self.needed_locks = {}
2723     #self.share_locks[locking.LEVEL_NODE] = 1
2724     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2725
2726   def CheckPrereq(self):
2727     """Check prerequisites.
2728
2729     """
2730
2731   @staticmethod
2732   def _DiagnoseByOS(rlist):
2733     """Remaps a per-node return list into an a per-os per-node dictionary
2734
2735     @param rlist: a map with node names as keys and OS objects as values
2736
2737     @rtype: dict
2738     @return: a dictionary with osnames as keys and as value another map, with
2739         nodes as keys and tuples of (path, status, diagnose) as values, eg::
2740
2741           {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2742                                      (/srv/..., False, "invalid api")],
2743                            "node2": [(/srv/..., True, "")]}
2744           }
2745
2746     """
2747     all_os = {}
2748     # we build here the list of nodes that didn't fail the RPC (at RPC
2749     # level), so that nodes with a non-responding node daemon don't
2750     # make all OSes invalid
2751     good_nodes = [node_name for node_name in rlist
2752                   if not rlist[node_name].fail_msg]
2753     for node_name, nr in rlist.items():
2754       if nr.fail_msg or not nr.payload:
2755         continue
2756       for name, path, status, diagnose, variants in nr.payload:
2757         if name not in all_os:
2758           # build a list of nodes for this os containing empty lists
2759           # for each node in node_list
2760           all_os[name] = {}
2761           for nname in good_nodes:
2762             all_os[name][nname] = []
2763         all_os[name][node_name].append((path, status, diagnose, variants))
2764     return all_os
2765
2766   def Exec(self, feedback_fn):
2767     """Compute the list of OSes.
2768
2769     """
2770     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2771     node_data = self.rpc.call_os_diagnose(valid_nodes)
2772     pol = self._DiagnoseByOS(node_data)
2773     output = []
2774     calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2775     calc_variants = "variants" in self.op.output_fields
2776
2777     for os_name, os_data in pol.items():
2778       row = []
2779       if calc_valid:
2780         valid = True
2781         variants = None
2782         for osl in os_data.values():
2783           valid = valid and osl and osl[0][1]
2784           if not valid:
2785             variants = None
2786             break
2787           if calc_variants:
2788             node_variants = osl[0][3]
2789             if variants is None:
2790               variants = node_variants
2791             else:
2792               variants = [v for v in variants if v in node_variants]
2793
2794       for field in self.op.output_fields:
2795         if field == "name":
2796           val = os_name
2797         elif field == "valid":
2798           val = valid
2799         elif field == "node_status":
2800           # this is just a copy of the dict
2801           val = {}
2802           for node_name, nos_list in os_data.items():
2803             val[node_name] = nos_list
2804         elif field == "variants":
2805           val =  variants
2806         else:
2807           raise errors.ParameterError(field)
2808         row.append(val)
2809       output.append(row)
2810
2811     return output
2812
2813
2814 class LURemoveNode(LogicalUnit):
2815   """Logical unit for removing a node.
2816
2817   """
2818   HPATH = "node-remove"
2819   HTYPE = constants.HTYPE_NODE
2820   _OP_REQP = ["node_name"]
2821
2822   def BuildHooksEnv(self):
2823     """Build hooks env.
2824
2825     This doesn't run on the target node in the pre phase as a failed
2826     node would then be impossible to remove.
2827
2828     """
2829     env = {
2830       "OP_TARGET": self.op.node_name,
2831       "NODE_NAME": self.op.node_name,
2832       }
2833     all_nodes = self.cfg.GetNodeList()
2834     try:
2835       all_nodes.remove(self.op.node_name)
2836     except ValueError:
2837       logging.warning("Node %s which is about to be removed not found"
2838                       " in the all nodes list", self.op.node_name)
2839     return env, all_nodes, all_nodes
2840
2841   def CheckPrereq(self):
2842     """Check prerequisites.
2843
2844     This checks:
2845      - the node exists in the configuration
2846      - it does not have primary or secondary instances
2847      - it's not the master
2848
2849     Any errors are signaled by raising errors.OpPrereqError.
2850
2851     """
2852     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2853     node = self.cfg.GetNodeInfo(self.op.node_name)
2854     assert node is not None
2855
2856     instance_list = self.cfg.GetInstanceList()
2857
2858     masternode = self.cfg.GetMasterNode()
2859     if node.name == masternode:
2860       raise errors.OpPrereqError("Node is the master node,"
2861                                  " you need to failover first.",
2862                                  errors.ECODE_INVAL)
2863
2864     for instance_name in instance_list:
2865       instance = self.cfg.GetInstanceInfo(instance_name)
2866       if node.name in instance.all_nodes:
2867         raise errors.OpPrereqError("Instance %s is still running on the node,"
2868                                    " please remove first." % instance_name,
2869                                    errors.ECODE_INVAL)
2870     self.op.node_name = node.name
2871     self.node = node
2872
2873   def Exec(self, feedback_fn):
2874     """Removes the node from the cluster.
2875
2876     """
2877     node = self.node
2878     logging.info("Stopping the node daemon and removing configs from node %s",
2879                  node.name)
2880
2881     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2882
2883     # Promote nodes to master candidate as needed
2884     _AdjustCandidatePool(self, exceptions=[node.name])
2885     self.context.RemoveNode(node.name)
2886
2887     # Run post hooks on the node before it's removed
2888     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2889     try:
2890       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2891     except:
2892       # pylint: disable-msg=W0702
2893       self.LogWarning("Errors occurred running hooks on %s" % node.name)
2894
2895     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2896     msg = result.fail_msg
2897     if msg:
2898       self.LogWarning("Errors encountered on the remote node while leaving"
2899                       " the cluster: %s", msg)
2900
2901     # Remove node from our /etc/hosts
2902     if self.cfg.GetClusterInfo().modify_etc_hosts:
2903       # FIXME: this should be done via an rpc call to node daemon
2904       utils.RemoveHostFromEtcHosts(node.name)
2905       _RedistributeAncillaryFiles(self)
2906
2907
2908 class LUQueryNodes(NoHooksLU):
2909   """Logical unit for querying nodes.
2910
2911   """
2912   # pylint: disable-msg=W0142
2913   _OP_REQP = ["output_fields", "names", "use_locking"]
2914   REQ_BGL = False
2915
2916   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2917                     "master_candidate", "offline", "drained"]
2918
2919   _FIELDS_DYNAMIC = utils.FieldSet(
2920     "dtotal", "dfree",
2921     "mtotal", "mnode", "mfree",
2922     "bootid",
2923     "ctotal", "cnodes", "csockets",
2924     )
2925
2926   _FIELDS_STATIC = utils.FieldSet(*[
2927     "pinst_cnt", "sinst_cnt",
2928     "pinst_list", "sinst_list",
2929     "pip", "sip", "tags",
2930     "master",
2931     "role"] + _SIMPLE_FIELDS
2932     )
2933
2934   def ExpandNames(self):
2935     _CheckOutputFields(static=self._FIELDS_STATIC,
2936                        dynamic=self._FIELDS_DYNAMIC,
2937                        selected=self.op.output_fields)
2938
2939     self.needed_locks = {}
2940     self.share_locks[locking.LEVEL_NODE] = 1
2941
2942     if self.op.names:
2943       self.wanted = _GetWantedNodes(self, self.op.names)
2944     else:
2945       self.wanted = locking.ALL_SET
2946
2947     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2948     self.do_locking = self.do_node_query and self.op.use_locking
2949     if self.do_locking:
2950       # if we don't request only static fields, we need to lock the nodes
2951       self.needed_locks[locking.LEVEL_NODE] = self.wanted
2952
2953   def CheckPrereq(self):
2954     """Check prerequisites.
2955
2956     """
2957     # The validation of the node list is done in the _GetWantedNodes,
2958     # if non empty, and if empty, there's no validation to do
2959     pass
2960
2961   def Exec(self, feedback_fn):
2962     """Computes the list of nodes and their attributes.
2963
2964     """
2965     all_info = self.cfg.GetAllNodesInfo()
2966     if self.do_locking:
2967       nodenames = self.acquired_locks[locking.LEVEL_NODE]
2968     elif self.wanted != locking.ALL_SET:
2969       nodenames = self.wanted
2970       missing = set(nodenames).difference(all_info.keys())
2971       if missing:
2972         raise errors.OpExecError(
2973           "Some nodes were removed before retrieving their data: %s" % missing)
2974     else:
2975       nodenames = all_info.keys()
2976
2977     nodenames = utils.NiceSort(nodenames)
2978     nodelist = [all_info[name] for name in nodenames]
2979
2980     # begin data gathering
2981
2982     if self.do_node_query:
2983       live_data = {}
2984       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2985                                           self.cfg.GetHypervisorType())
2986       for name in nodenames:
2987         nodeinfo = node_data[name]
2988         if not nodeinfo.fail_msg and nodeinfo.payload:
2989           nodeinfo = nodeinfo.payload
2990           fn = utils.TryConvert
2991           live_data[name] = {
2992             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2993             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2994             "mfree": fn(int, nodeinfo.get('memory_free', None)),
2995             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2996             "dfree": fn(int, nodeinfo.get('vg_free', None)),
2997             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2998             "bootid": nodeinfo.get('bootid', None),
2999             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3000             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3001             }
3002         else:
3003           live_data[name] = {}
3004     else:
3005       live_data = dict.fromkeys(nodenames, {})
3006
3007     node_to_primary = dict([(name, set()) for name in nodenames])
3008     node_to_secondary = dict([(name, set()) for name in nodenames])
3009
3010     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3011                              "sinst_cnt", "sinst_list"))
3012     if inst_fields & frozenset(self.op.output_fields):
3013       inst_data = self.cfg.GetAllInstancesInfo()
3014
3015       for inst in inst_data.values():
3016         if inst.primary_node in node_to_primary:
3017           node_to_primary[inst.primary_node].add(inst.name)
3018         for secnode in inst.secondary_nodes:
3019           if secnode in node_to_secondary:
3020             node_to_secondary[secnode].add(inst.name)
3021
3022     master_node = self.cfg.GetMasterNode()
3023
3024     # end data gathering
3025
3026     output = []
3027     for node in nodelist:
3028       node_output = []
3029       for field in self.op.output_fields:
3030         if field in self._SIMPLE_FIELDS:
3031           val = getattr(node, field)
3032         elif field == "pinst_list":
3033           val = list(node_to_primary[node.name])
3034         elif field == "sinst_list":
3035           val = list(node_to_secondary[node.name])
3036         elif field == "pinst_cnt":
3037           val = len(node_to_primary[node.name])
3038         elif field == "sinst_cnt":
3039           val = len(node_to_secondary[node.name])
3040         elif field == "pip":
3041           val = node.primary_ip
3042         elif field == "sip":
3043           val = node.secondary_ip
3044         elif field == "tags":
3045           val = list(node.GetTags())
3046         elif field == "master":
3047           val = node.name == master_node
3048         elif self._FIELDS_DYNAMIC.Matches(field):
3049           val = live_data[node.name].get(field, None)
3050         elif field == "role":
3051           if node.name == master_node:
3052             val = "M"
3053           elif node.master_candidate:
3054             val = "C"
3055           elif node.drained:
3056             val = "D"
3057           elif node.offline:
3058             val = "O"
3059           else:
3060             val = "R"
3061         else:
3062           raise errors.ParameterError(field)
3063         node_output.append(val)
3064       output.append(node_output)
3065
3066     return output
3067
3068
3069 class LUQueryNodeVolumes(NoHooksLU):
3070   """Logical unit for getting volumes on node(s).
3071
3072   """
3073   _OP_REQP = ["nodes", "output_fields"]
3074   REQ_BGL = False
3075   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3076   _FIELDS_STATIC = utils.FieldSet("node")
3077
3078   def ExpandNames(self):
3079     _CheckOutputFields(static=self._FIELDS_STATIC,
3080                        dynamic=self._FIELDS_DYNAMIC,
3081                        selected=self.op.output_fields)
3082
3083     self.needed_locks = {}
3084     self.share_locks[locking.LEVEL_NODE] = 1
3085     if not self.op.nodes:
3086       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3087     else:
3088       self.needed_locks[locking.LEVEL_NODE] = \
3089         _GetWantedNodes(self, self.op.nodes)
3090
3091   def CheckPrereq(self):
3092     """Check prerequisites.
3093
3094     This checks that the fields required are valid output fields.
3095
3096     """
3097     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3098
3099   def Exec(self, feedback_fn):
3100     """Computes the list of nodes and their attributes.
3101
3102     """
3103     nodenames = self.nodes
3104     volumes = self.rpc.call_node_volumes(nodenames)
3105
3106     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3107              in self.cfg.GetInstanceList()]
3108
3109     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3110
3111     output = []
3112     for node in nodenames:
3113       nresult = volumes[node]
3114       if nresult.offline:
3115         continue
3116       msg = nresult.fail_msg
3117       if msg:
3118         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3119         continue
3120
3121       node_vols = nresult.payload[:]
3122       node_vols.sort(key=lambda vol: vol['dev'])
3123
3124       for vol in node_vols:
3125         node_output = []
3126         for field in self.op.output_fields:
3127           if field == "node":
3128             val = node
3129           elif field == "phys":
3130             val = vol['dev']
3131           elif field == "vg":
3132             val = vol['vg']
3133           elif field == "name":
3134             val = vol['name']
3135           elif field == "size":
3136             val = int(float(vol['size']))
3137           elif field == "instance":
3138             for inst in ilist:
3139               if node not in lv_by_node[inst]:
3140                 continue
3141               if vol['name'] in lv_by_node[inst][node]:
3142                 val = inst.name
3143                 break
3144             else:
3145               val = '-'
3146           else:
3147             raise errors.ParameterError(field)
3148           node_output.append(str(val))
3149
3150         output.append(node_output)
3151
3152     return output
3153
3154
3155 class LUQueryNodeStorage(NoHooksLU):
3156   """Logical unit for getting information on storage units on node(s).
3157
3158   """
3159   _OP_REQP = ["nodes", "storage_type", "output_fields"]
3160   REQ_BGL = False
3161   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3162
3163   def CheckArguments(self):
3164     _CheckStorageType(self.op.storage_type)
3165
3166     _CheckOutputFields(static=self._FIELDS_STATIC,
3167                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3168                        selected=self.op.output_fields)
3169
3170   def ExpandNames(self):
3171     self.needed_locks = {}
3172     self.share_locks[locking.LEVEL_NODE] = 1
3173
3174     if self.op.nodes:
3175       self.needed_locks[locking.LEVEL_NODE] = \
3176         _GetWantedNodes(self, self.op.nodes)
3177     else:
3178       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3179
3180   def CheckPrereq(self):
3181     """Check prerequisites.
3182
3183     This checks that the fields required are valid output fields.
3184
3185     """
3186     self.op.name = getattr(self.op, "name", None)
3187
3188     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3189
3190   def Exec(self, feedback_fn):
3191     """Computes the list of nodes and their attributes.
3192
3193     """
3194     # Always get name to sort by
3195     if constants.SF_NAME in self.op.output_fields:
3196       fields = self.op.output_fields[:]
3197     else:
3198       fields = [constants.SF_NAME] + self.op.output_fields
3199
3200     # Never ask for node or type as it's only known to the LU
3201     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3202       while extra in fields:
3203         fields.remove(extra)
3204
3205     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3206     name_idx = field_idx[constants.SF_NAME]
3207
3208     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3209     data = self.rpc.call_storage_list(self.nodes,
3210                                       self.op.storage_type, st_args,
3211                                       self.op.name, fields)
3212
3213     result = []
3214
3215     for node in utils.NiceSort(self.nodes):
3216       nresult = data[node]
3217       if nresult.offline:
3218         continue
3219
3220       msg = nresult.fail_msg
3221       if msg:
3222         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3223         continue
3224
3225       rows = dict([(row[name_idx], row) for row in nresult.payload])
3226
3227       for name in utils.NiceSort(rows.keys()):
3228         row = rows[name]
3229
3230         out = []
3231
3232         for field in self.op.output_fields:
3233           if field == constants.SF_NODE:
3234             val = node
3235           elif field == constants.SF_TYPE:
3236             val = self.op.storage_type
3237           elif field in field_idx:
3238             val = row[field_idx[field]]
3239           else:
3240             raise errors.ParameterError(field)
3241
3242           out.append(val)
3243
3244         result.append(out)
3245
3246     return result
3247
3248
3249 class LUModifyNodeStorage(NoHooksLU):
3250   """Logical unit for modifying a storage volume on a node.
3251
3252   """
3253   _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3254   REQ_BGL = False
3255
3256   def CheckArguments(self):
3257     self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3258
3259     _CheckStorageType(self.op.storage_type)
3260
3261   def ExpandNames(self):
3262     self.needed_locks = {
3263       locking.LEVEL_NODE: self.op.node_name,
3264       }
3265
3266   def CheckPrereq(self):
3267     """Check prerequisites.
3268
3269     """
3270     storage_type = self.op.storage_type
3271
3272     try:
3273       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3274     except KeyError:
3275       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3276                                  " modified" % storage_type,
3277                                  errors.ECODE_INVAL)
3278
3279     diff = set(self.op.changes.keys()) - modifiable
3280     if diff:
3281       raise errors.OpPrereqError("The following fields can not be modified for"
3282                                  " storage units of type '%s': %r" %
3283                                  (storage_type, list(diff)),
3284                                  errors.ECODE_INVAL)
3285
3286   def Exec(self, feedback_fn):
3287     """Computes the list of nodes and their attributes.
3288
3289     """
3290     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3291     result = self.rpc.call_storage_modify(self.op.node_name,
3292                                           self.op.storage_type, st_args,
3293                                           self.op.name, self.op.changes)
3294     result.Raise("Failed to modify storage unit '%s' on %s" %
3295                  (self.op.name, self.op.node_name))
3296
3297
3298 class LUAddNode(LogicalUnit):
3299   """Logical unit for adding node to the cluster.
3300
3301   """
3302   HPATH = "node-add"
3303   HTYPE = constants.HTYPE_NODE
3304   _OP_REQP = ["node_name"]
3305
3306   def CheckArguments(self):
3307     # validate/normalize the node name
3308     self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3309
3310   def BuildHooksEnv(self):
3311     """Build hooks env.
3312
3313     This will run on all nodes before, and on all nodes + the new node after.
3314
3315     """
3316     env = {
3317       "OP_TARGET": self.op.node_name,
3318       "NODE_NAME": self.op.node_name,
3319       "NODE_PIP": self.op.primary_ip,
3320       "NODE_SIP": self.op.secondary_ip,
3321       }
3322     nodes_0 = self.cfg.GetNodeList()
3323     nodes_1 = nodes_0 + [self.op.node_name, ]
3324     return env, nodes_0, nodes_1
3325
3326   def CheckPrereq(self):
3327     """Check prerequisites.
3328
3329     This checks:
3330      - the new node is not already in the config
3331      - it is resolvable
3332      - its parameters (single/dual homed) matches the cluster
3333
3334     Any errors are signaled by raising errors.OpPrereqError.
3335
3336     """
3337     node_name = self.op.node_name
3338     cfg = self.cfg
3339
3340     dns_data = utils.GetHostInfo(node_name)
3341
3342     node = dns_data.name
3343     primary_ip = self.op.primary_ip = dns_data.ip
3344     secondary_ip = getattr(self.op, "secondary_ip", None)
3345     if secondary_ip is None:
3346       secondary_ip = primary_ip
3347     if not utils.IsValidIP(secondary_ip):
3348       raise errors.OpPrereqError("Invalid secondary IP given",
3349                                  errors.ECODE_INVAL)
3350     self.op.secondary_ip = secondary_ip
3351
3352     node_list = cfg.GetNodeList()
3353     if not self.op.readd and node in node_list:
3354       raise errors.OpPrereqError("Node %s is already in the configuration" %
3355                                  node, errors.ECODE_EXISTS)
3356     elif self.op.readd and node not in node_list:
3357       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3358                                  errors.ECODE_NOENT)
3359
3360     self.changed_primary_ip = False
3361
3362     for existing_node_name in node_list:
3363       existing_node = cfg.GetNodeInfo(existing_node_name)
3364
3365       if self.op.readd and node == existing_node_name:
3366         if existing_node.secondary_ip != secondary_ip:
3367           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3368                                      " address configuration as before",
3369                                      errors.ECODE_INVAL)
3370         if existing_node.primary_ip != primary_ip:
3371           self.changed_primary_ip = True
3372
3373         continue
3374
3375       if (existing_node.primary_ip == primary_ip or
3376           existing_node.secondary_ip == primary_ip or
3377           existing_node.primary_ip == secondary_ip or
3378           existing_node.secondary_ip == secondary_ip):
3379         raise errors.OpPrereqError("New node ip address(es) conflict with"
3380                                    " existing node %s" % existing_node.name,
3381                                    errors.ECODE_NOTUNIQUE)
3382
3383     # check that the type of the node (single versus dual homed) is the
3384     # same as for the master
3385     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3386     master_singlehomed = myself.secondary_ip == myself.primary_ip
3387     newbie_singlehomed = secondary_ip == primary_ip
3388     if master_singlehomed != newbie_singlehomed:
3389       if master_singlehomed:
3390         raise errors.OpPrereqError("The master has no private ip but the"
3391                                    " new node has one",
3392                                    errors.ECODE_INVAL)
3393       else:
3394         raise errors.OpPrereqError("The master has a private ip but the"
3395                                    " new node doesn't have one",
3396                                    errors.ECODE_INVAL)
3397
3398     # checks reachability
3399     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3400       raise errors.OpPrereqError("Node not reachable by ping",
3401                                  errors.ECODE_ENVIRON)
3402
3403     if not newbie_singlehomed:
3404       # check reachability from my secondary ip to newbie's secondary ip
3405       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3406                            source=myself.secondary_ip):
3407         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3408                                    " based ping to noded port",
3409                                    errors.ECODE_ENVIRON)
3410
3411     if self.op.readd:
3412       exceptions = [node]
3413     else:
3414       exceptions = []
3415
3416     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3417
3418     if self.op.readd:
3419       self.new_node = self.cfg.GetNodeInfo(node)
3420       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3421     else:
3422       self.new_node = objects.Node(name=node,
3423                                    primary_ip=primary_ip,
3424                                    secondary_ip=secondary_ip,
3425                                    master_candidate=self.master_candidate,
3426                                    offline=False, drained=False)
3427
3428   def Exec(self, feedback_fn):
3429     """Adds the new node to the cluster.
3430
3431     """
3432     new_node = self.new_node
3433     node = new_node.name
3434
3435     # for re-adds, reset the offline/drained/master-candidate flags;
3436     # we need to reset here, otherwise offline would prevent RPC calls
3437     # later in the procedure; this also means that if the re-add
3438     # fails, we are left with a non-offlined, broken node
3439     if self.op.readd:
3440       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3441       self.LogInfo("Readding a node, the offline/drained flags were reset")
3442       # if we demote the node, we do cleanup later in the procedure
3443       new_node.master_candidate = self.master_candidate
3444       if self.changed_primary_ip:
3445         new_node.primary_ip = self.op.primary_ip
3446
3447     # notify the user about any possible mc promotion
3448     if new_node.master_candidate:
3449       self.LogInfo("Node will be a master candidate")
3450
3451     # check connectivity
3452     result = self.rpc.call_version([node])[node]
3453     result.Raise("Can't get version information from node %s" % node)
3454     if constants.PROTOCOL_VERSION == result.payload:
3455       logging.info("Communication to node %s fine, sw version %s match",
3456                    node, result.payload)
3457     else:
3458       raise errors.OpExecError("Version mismatch master version %s,"
3459                                " node version %s" %
3460                                (constants.PROTOCOL_VERSION, result.payload))
3461
3462     # setup ssh on node
3463     if self.cfg.GetClusterInfo().modify_ssh_setup:
3464       logging.info("Copy ssh key to node %s", node)
3465       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3466       keyarray = []
3467       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3468                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3469                   priv_key, pub_key]
3470
3471       for i in keyfiles:
3472         keyarray.append(utils.ReadFile(i))
3473
3474       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3475                                       keyarray[2], keyarray[3], keyarray[4],
3476                                       keyarray[5])
3477       result.Raise("Cannot transfer ssh keys to the new node")
3478
3479     # Add node to our /etc/hosts, and add key to known_hosts
3480     if self.cfg.GetClusterInfo().modify_etc_hosts:
3481       # FIXME: this should be done via an rpc call to node daemon
3482       utils.AddHostToEtcHosts(new_node.name)
3483
3484     if new_node.secondary_ip != new_node.primary_ip:
3485       result = self.rpc.call_node_has_ip_address(new_node.name,
3486                                                  new_node.secondary_ip)
3487       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3488                    prereq=True, ecode=errors.ECODE_ENVIRON)
3489       if not result.payload:
3490         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3491                                  " you gave (%s). Please fix and re-run this"
3492                                  " command." % new_node.secondary_ip)
3493
3494     node_verify_list = [self.cfg.GetMasterNode()]
3495     node_verify_param = {
3496       constants.NV_NODELIST: [node],
3497       # TODO: do a node-net-test as well?
3498     }
3499
3500     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3501                                        self.cfg.GetClusterName())
3502     for verifier in node_verify_list:
3503       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3504       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3505       if nl_payload:
3506         for failed in nl_payload:
3507           feedback_fn("ssh/hostname verification failed"
3508                       " (checking from %s): %s" %
3509                       (verifier, nl_payload[failed]))
3510         raise errors.OpExecError("ssh/hostname verification failed.")
3511
3512     if self.op.readd:
3513       _RedistributeAncillaryFiles(self)
3514       self.context.ReaddNode(new_node)
3515       # make sure we redistribute the config
3516       self.cfg.Update(new_node, feedback_fn)
3517       # and make sure the new node will not have old files around
3518       if not new_node.master_candidate:
3519         result = self.rpc.call_node_demote_from_mc(new_node.name)
3520         msg = result.fail_msg
3521         if msg:
3522           self.LogWarning("Node failed to demote itself from master"
3523                           " candidate status: %s" % msg)
3524     else:
3525       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3526       self.context.AddNode(new_node, self.proc.GetECId())
3527
3528
3529 class LUSetNodeParams(LogicalUnit):
3530   """Modifies the parameters of a node.
3531
3532   """
3533   HPATH = "node-modify"
3534   HTYPE = constants.HTYPE_NODE
3535   _OP_REQP = ["node_name"]
3536   REQ_BGL = False
3537
3538   def CheckArguments(self):
3539     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3540     _CheckBooleanOpField(self.op, 'master_candidate')
3541     _CheckBooleanOpField(self.op, 'offline')
3542     _CheckBooleanOpField(self.op, 'drained')
3543     _CheckBooleanOpField(self.op, 'auto_promote')
3544     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3545     if all_mods.count(None) == 3:
3546       raise errors.OpPrereqError("Please pass at least one modification",
3547                                  errors.ECODE_INVAL)
3548     if all_mods.count(True) > 1:
3549       raise errors.OpPrereqError("Can't set the node into more than one"
3550                                  " state at the same time",
3551                                  errors.ECODE_INVAL)
3552
3553     # Boolean value that tells us whether we're offlining or draining the node
3554     self.offline_or_drain = (self.op.offline == True or
3555                              self.op.drained == True)
3556     self.deoffline_or_drain = (self.op.offline == False or
3557                                self.op.drained == False)
3558     self.might_demote = (self.op.master_candidate == False or
3559                          self.offline_or_drain)
3560
3561     self.lock_all = self.op.auto_promote and self.might_demote
3562
3563
3564   def ExpandNames(self):
3565     if self.lock_all:
3566       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3567     else:
3568       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3569
3570   def BuildHooksEnv(self):
3571     """Build hooks env.
3572
3573     This runs on the master node.
3574
3575     """
3576     env = {
3577       "OP_TARGET": self.op.node_name,
3578       "MASTER_CANDIDATE": str(self.op.master_candidate),
3579       "OFFLINE": str(self.op.offline),
3580       "DRAINED": str(self.op.drained),
3581       }
3582     nl = [self.cfg.GetMasterNode(),
3583           self.op.node_name]
3584     return env, nl, nl
3585
3586   def CheckPrereq(self):
3587     """Check prerequisites.
3588
3589     This only checks the instance list against the existing names.
3590
3591     """
3592     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3593
3594     if (self.op.master_candidate is not None or
3595         self.op.drained is not None or
3596         self.op.offline is not None):
3597       # we can't change the master's node flags
3598       if self.op.node_name == self.cfg.GetMasterNode():
3599         raise errors.OpPrereqError("The master role can be changed"
3600                                    " only via masterfailover",
3601                                    errors.ECODE_INVAL)
3602
3603
3604     if node.master_candidate and self.might_demote and not self.lock_all:
3605       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3606       # check if after removing the current node, we're missing master
3607       # candidates
3608       (mc_remaining, mc_should, _) = \
3609           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3610       if mc_remaining < mc_should:
3611         raise errors.OpPrereqError("Not enough master candidates, please"
3612                                    " pass auto_promote to allow promotion",
3613                                    errors.ECODE_INVAL)
3614
3615     if (self.op.master_candidate == True and
3616         ((node.offline and not self.op.offline == False) or
3617          (node.drained and not self.op.drained == False))):
3618       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3619                                  " to master_candidate" % node.name,
3620                                  errors.ECODE_INVAL)
3621
3622     # If we're being deofflined/drained, we'll MC ourself if needed
3623     if (self.deoffline_or_drain and not self.offline_or_drain and not
3624         self.op.master_candidate == True and not node.master_candidate):
3625       self.op.master_candidate = _DecideSelfPromotion(self)
3626       if self.op.master_candidate:
3627         self.LogInfo("Autopromoting node to master candidate")
3628
3629     return
3630
3631   def Exec(self, feedback_fn):
3632     """Modifies a node.
3633
3634     """
3635     node = self.node
3636
3637     result = []
3638     changed_mc = False
3639
3640     if self.op.offline is not None:
3641       node.offline = self.op.offline
3642       result.append(("offline", str(self.op.offline)))
3643       if self.op.offline == True:
3644         if node.master_candidate:
3645           node.master_candidate = False
3646           changed_mc = True
3647           result.append(("master_candidate", "auto-demotion due to offline"))
3648         if node.drained:
3649           node.drained = False
3650           result.append(("drained", "clear drained status due to offline"))
3651
3652     if self.op.master_candidate is not None:
3653       node.master_candidate = self.op.master_candidate
3654       changed_mc = True
3655       result.append(("master_candidate", str(self.op.master_candidate)))
3656       if self.op.master_candidate == False:
3657         rrc = self.rpc.call_node_demote_from_mc(node.name)
3658         msg = rrc.fail_msg
3659         if msg:
3660           self.LogWarning("Node failed to demote itself: %s" % msg)
3661
3662     if self.op.drained is not None:
3663       node.drained = self.op.drained
3664       result.append(("drained", str(self.op.drained)))
3665       if self.op.drained == True:
3666         if node.master_candidate:
3667           node.master_candidate = False
3668           changed_mc = True
3669           result.append(("master_candidate", "auto-demotion due to drain"))
3670           rrc = self.rpc.call_node_demote_from_mc(node.name)
3671           msg = rrc.fail_msg
3672           if msg:
3673             self.LogWarning("Node failed to demote itself: %s" % msg)
3674         if node.offline:
3675           node.offline = False
3676           result.append(("offline", "clear offline status due to drain"))
3677
3678     # we locked all nodes, we adjust the CP before updating this node
3679     if self.lock_all:
3680       _AdjustCandidatePool(self, [node.name])
3681
3682     # this will trigger configuration file update, if needed
3683     self.cfg.Update(node, feedback_fn)
3684
3685     # this will trigger job queue propagation or cleanup
3686     if changed_mc:
3687       self.context.ReaddNode(node)
3688
3689     return result
3690
3691
3692 class LUPowercycleNode(NoHooksLU):
3693   """Powercycles a node.
3694
3695   """
3696   _OP_REQP = ["node_name", "force"]
3697   REQ_BGL = False
3698
3699   def CheckArguments(self):
3700     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3701     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3702       raise errors.OpPrereqError("The node is the master and the force"
3703                                  " parameter was not set",
3704                                  errors.ECODE_INVAL)
3705
3706   def ExpandNames(self):
3707     """Locking for PowercycleNode.
3708
3709     This is a last-resort option and shouldn't block on other
3710     jobs. Therefore, we grab no locks.
3711
3712     """
3713     self.needed_locks = {}
3714
3715   def CheckPrereq(self):
3716     """Check prerequisites.
3717
3718     This LU has no prereqs.
3719
3720     """
3721     pass
3722
3723   def Exec(self, feedback_fn):
3724     """Reboots a node.
3725
3726     """
3727     result = self.rpc.call_node_powercycle(self.op.node_name,
3728                                            self.cfg.GetHypervisorType())
3729     result.Raise("Failed to schedule the reboot")
3730     return result.payload
3731
3732
3733 class LUQueryClusterInfo(NoHooksLU):
3734   """Query cluster configuration.
3735
3736   """
3737   _OP_REQP = []
3738   REQ_BGL = False
3739
3740   def ExpandNames(self):
3741     self.needed_locks = {}
3742
3743   def CheckPrereq(self):
3744     """No prerequsites needed for this LU.
3745
3746     """
3747     pass
3748
3749   def Exec(self, feedback_fn):
3750     """Return cluster config.
3751
3752     """
3753     cluster = self.cfg.GetClusterInfo()
3754     os_hvp = {}
3755
3756     # Filter just for enabled hypervisors
3757     for os_name, hv_dict in cluster.os_hvp.items():
3758       os_hvp[os_name] = {}
3759       for hv_name, hv_params in hv_dict.items():
3760         if hv_name in cluster.enabled_hypervisors:
3761           os_hvp[os_name][hv_name] = hv_params
3762
3763     result = {
3764       "software_version": constants.RELEASE_VERSION,
3765       "protocol_version": constants.PROTOCOL_VERSION,
3766       "config_version": constants.CONFIG_VERSION,
3767       "os_api_version": max(constants.OS_API_VERSIONS),
3768       "export_version": constants.EXPORT_VERSION,
3769       "architecture": (platform.architecture()[0], platform.machine()),
3770       "name": cluster.cluster_name,
3771       "master": cluster.master_node,
3772       "default_hypervisor": cluster.enabled_hypervisors[0],
3773       "enabled_hypervisors": cluster.enabled_hypervisors,
3774       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3775                         for hypervisor_name in cluster.enabled_hypervisors]),
3776       "os_hvp": os_hvp,
3777       "beparams": cluster.beparams,
3778       "nicparams": cluster.nicparams,
3779       "candidate_pool_size": cluster.candidate_pool_size,
3780       "master_netdev": cluster.master_netdev,
3781       "volume_group_name": cluster.volume_group_name,
3782       "file_storage_dir": cluster.file_storage_dir,
3783       "maintain_node_health": cluster.maintain_node_health,
3784       "ctime": cluster.ctime,
3785       "mtime": cluster.mtime,
3786       "uuid": cluster.uuid,
3787       "tags": list(cluster.GetTags()),
3788       "uid_pool": cluster.uid_pool,
3789       }
3790
3791     return result
3792
3793
3794 class LUQueryConfigValues(NoHooksLU):
3795   """Return configuration values.
3796
3797   """
3798   _OP_REQP = []
3799   REQ_BGL = False
3800   _FIELDS_DYNAMIC = utils.FieldSet()
3801   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3802                                   "watcher_pause")
3803
3804   def ExpandNames(self):
3805     self.needed_locks = {}
3806
3807     _CheckOutputFields(static=self._FIELDS_STATIC,
3808                        dynamic=self._FIELDS_DYNAMIC,
3809                        selected=self.op.output_fields)
3810
3811   def CheckPrereq(self):
3812     """No prerequisites.
3813
3814     """
3815     pass
3816
3817   def Exec(self, feedback_fn):
3818     """Dump a representation of the cluster config to the standard output.
3819
3820     """
3821     values = []
3822     for field in self.op.output_fields:
3823       if field == "cluster_name":
3824         entry = self.cfg.GetClusterName()
3825       elif field == "master_node":
3826         entry = self.cfg.GetMasterNode()
3827       elif field == "drain_flag":
3828         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3829       elif field == "watcher_pause":
3830         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3831       else:
3832         raise errors.ParameterError(field)
3833       values.append(entry)
3834     return values
3835
3836
3837 class LUActivateInstanceDisks(NoHooksLU):
3838   """Bring up an instance's disks.
3839
3840   """
3841   _OP_REQP = ["instance_name"]
3842   REQ_BGL = False
3843
3844   def ExpandNames(self):
3845     self._ExpandAndLockInstance()
3846     self.needed_locks[locking.LEVEL_NODE] = []
3847     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3848
3849   def DeclareLocks(self, level):
3850     if level == locking.LEVEL_NODE:
3851       self._LockInstancesNodes()
3852
3853   def CheckPrereq(self):
3854     """Check prerequisites.
3855
3856     This checks that the instance is in the cluster.
3857
3858     """
3859     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3860     assert self.instance is not None, \
3861       "Cannot retrieve locked instance %s" % self.op.instance_name
3862     _CheckNodeOnline(self, self.instance.primary_node)
3863     if not hasattr(self.op, "ignore_size"):
3864       self.op.ignore_size = False
3865
3866   def Exec(self, feedback_fn):
3867     """Activate the disks.
3868
3869     """
3870     disks_ok, disks_info = \
3871               _AssembleInstanceDisks(self, self.instance,
3872                                      ignore_size=self.op.ignore_size)
3873     if not disks_ok:
3874       raise errors.OpExecError("Cannot activate block devices")
3875
3876     return disks_info
3877
3878
3879 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
3880                            ignore_size=False):
3881   """Prepare the block devices for an instance.
3882
3883   This sets up the block devices on all nodes.
3884
3885   @type lu: L{LogicalUnit}
3886   @param lu: the logical unit on whose behalf we execute
3887   @type instance: L{objects.Instance}
3888   @param instance: the instance for whose disks we assemble
3889   @type disks: list of L{objects.Disk} or None
3890   @param disks: which disks to assemble (or all, if None)
3891   @type ignore_secondaries: boolean
3892   @param ignore_secondaries: if true, errors on secondary nodes
3893       won't result in an error return from the function
3894   @type ignore_size: boolean
3895   @param ignore_size: if true, the current known size of the disk
3896       will not be used during the disk activation, useful for cases
3897       when the size is wrong
3898   @return: False if the operation failed, otherwise a list of
3899       (host, instance_visible_name, node_visible_name)
3900       with the mapping from node devices to instance devices
3901
3902   """
3903   device_info = []
3904   disks_ok = True
3905   iname = instance.name
3906   disks = _ExpandCheckDisks(instance, disks)
3907
3908   # With the two passes mechanism we try to reduce the window of
3909   # opportunity for the race condition of switching DRBD to primary
3910   # before handshaking occured, but we do not eliminate it
3911
3912   # The proper fix would be to wait (with some limits) until the
3913   # connection has been made and drbd transitions from WFConnection
3914   # into any other network-connected state (Connected, SyncTarget,
3915   # SyncSource, etc.)
3916
3917   # 1st pass, assemble on all nodes in secondary mode
3918   for inst_disk in disks:
3919     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3920       if ignore_size:
3921         node_disk = node_disk.Copy()
3922         node_disk.UnsetSize()
3923       lu.cfg.SetDiskID(node_disk, node)
3924       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3925       msg = result.fail_msg
3926       if msg:
3927         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3928                            " (is_primary=False, pass=1): %s",
3929                            inst_disk.iv_name, node, msg)
3930         if not ignore_secondaries:
3931           disks_ok = False
3932
3933   # FIXME: race condition on drbd migration to primary
3934
3935   # 2nd pass, do only the primary node
3936   for inst_disk in disks:
3937     dev_path = None
3938
3939     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3940       if node != instance.primary_node:
3941         continue
3942       if ignore_size:
3943         node_disk = node_disk.Copy()
3944         node_disk.UnsetSize()
3945       lu.cfg.SetDiskID(node_disk, node)
3946       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3947       msg = result.fail_msg
3948       if msg:
3949         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3950                            " (is_primary=True, pass=2): %s",
3951                            inst_disk.iv_name, node, msg)
3952         disks_ok = False
3953       else:
3954         dev_path = result.payload
3955
3956     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3957
3958   # leave the disks configured for the primary node
3959   # this is a workaround that would be fixed better by
3960   # improving the logical/physical id handling
3961   for disk in disks:
3962     lu.cfg.SetDiskID(disk, instance.primary_node)
3963
3964   return disks_ok, device_info
3965
3966
3967 def _StartInstanceDisks(lu, instance, force):
3968   """Start the disks of an instance.
3969
3970   """
3971   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3972                                            ignore_secondaries=force)
3973   if not disks_ok:
3974     _ShutdownInstanceDisks(lu, instance)
3975     if force is not None and not force:
3976       lu.proc.LogWarning("", hint="If the message above refers to a"
3977                          " secondary node,"
3978                          " you can retry the operation using '--force'.")
3979     raise errors.OpExecError("Disk consistency error")
3980
3981
3982 class LUDeactivateInstanceDisks(NoHooksLU):
3983   """Shutdown an instance's disks.
3984
3985   """
3986   _OP_REQP = ["instance_name"]
3987   REQ_BGL = False
3988
3989   def ExpandNames(self):
3990     self._ExpandAndLockInstance()
3991     self.needed_locks[locking.LEVEL_NODE] = []
3992     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3993
3994   def DeclareLocks(self, level):
3995     if level == locking.LEVEL_NODE:
3996       self._LockInstancesNodes()
3997
3998   def CheckPrereq(self):
3999     """Check prerequisites.
4000
4001     This checks that the instance is in the cluster.
4002
4003     """
4004     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4005     assert self.instance is not None, \
4006       "Cannot retrieve locked instance %s" % self.op.instance_name
4007
4008   def Exec(self, feedback_fn):
4009     """Deactivate the disks
4010
4011     """
4012     instance = self.instance
4013     _SafeShutdownInstanceDisks(self, instance)
4014
4015
4016 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4017   """Shutdown block devices of an instance.
4018
4019   This function checks if an instance is running, before calling
4020   _ShutdownInstanceDisks.
4021
4022   """
4023   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4024   _ShutdownInstanceDisks(lu, instance, disks=disks)
4025
4026
4027 def _ExpandCheckDisks(instance, disks):
4028   """Return the instance disks selected by the disks list
4029
4030   @type disks: list of L{objects.Disk} or None
4031   @param disks: selected disks
4032   @rtype: list of L{objects.Disk}
4033   @return: selected instance disks to act on
4034
4035   """
4036   if disks is None:
4037     return instance.disks
4038   else:
4039     if not set(disks).issubset(instance.disks):
4040       raise errors.ProgrammerError("Can only act on disks belonging to the"
4041                                    " target instance")
4042     return disks
4043
4044
4045 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4046   """Shutdown block devices of an instance.
4047
4048   This does the shutdown on all nodes of the instance.
4049
4050   If the ignore_primary is false, errors on the primary node are
4051   ignored.
4052
4053   """
4054   all_result = True
4055   disks = _ExpandCheckDisks(instance, disks)
4056
4057   for disk in disks:
4058     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4059       lu.cfg.SetDiskID(top_disk, node)
4060       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4061       msg = result.fail_msg
4062       if msg:
4063         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4064                       disk.iv_name, node, msg)
4065         if not ignore_primary or node != instance.primary_node:
4066           all_result = False
4067   return all_result
4068
4069
4070 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4071   """Checks if a node has enough free memory.
4072
4073   This function check if a given node has the needed amount of free
4074   memory. In case the node has less memory or we cannot get the
4075   information from the node, this function raise an OpPrereqError
4076   exception.
4077
4078   @type lu: C{LogicalUnit}
4079   @param lu: a logical unit from which we get configuration data
4080   @type node: C{str}
4081   @param node: the node to check
4082   @type reason: C{str}
4083   @param reason: string to use in the error message
4084   @type requested: C{int}
4085   @param requested: the amount of memory in MiB to check for
4086   @type hypervisor_name: C{str}
4087   @param hypervisor_name: the hypervisor to ask for memory stats
4088   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4089       we cannot check the node
4090
4091   """
4092   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4093   nodeinfo[node].Raise("Can't get data from node %s" % node,
4094                        prereq=True, ecode=errors.ECODE_ENVIRON)
4095   free_mem = nodeinfo[node].payload.get('memory_free', None)
4096   if not isinstance(free_mem, int):
4097     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4098                                " was '%s'" % (node, free_mem),
4099                                errors.ECODE_ENVIRON)
4100   if requested > free_mem:
4101     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4102                                " needed %s MiB, available %s MiB" %
4103                                (node, reason, requested, free_mem),
4104                                errors.ECODE_NORES)
4105
4106
4107 def _CheckNodesFreeDisk(lu, nodenames, requested):
4108   """Checks if nodes have enough free disk space in the default VG.
4109
4110   This function check if all given nodes have the needed amount of
4111   free disk. In case any node has less disk or we cannot get the
4112   information from the node, this function raise an OpPrereqError
4113   exception.
4114
4115   @type lu: C{LogicalUnit}
4116   @param lu: a logical unit from which we get configuration data
4117   @type nodenames: C{list}
4118   @param nodenames: the list of node names to check
4119   @type requested: C{int}
4120   @param requested: the amount of disk in MiB to check for
4121   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4122       we cannot check the node
4123
4124   """
4125   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4126                                    lu.cfg.GetHypervisorType())
4127   for node in nodenames:
4128     info = nodeinfo[node]
4129     info.Raise("Cannot get current information from node %s" % node,
4130                prereq=True, ecode=errors.ECODE_ENVIRON)
4131     vg_free = info.payload.get("vg_free", None)
4132     if not isinstance(vg_free, int):
4133       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4134                                  " result was '%s'" % (node, vg_free),
4135                                  errors.ECODE_ENVIRON)
4136     if requested > vg_free:
4137       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4138                                  " required %d MiB, available %d MiB" %
4139                                  (node, requested, vg_free),
4140                                  errors.ECODE_NORES)
4141
4142
4143 class LUStartupInstance(LogicalUnit):
4144   """Starts an instance.
4145
4146   """
4147   HPATH = "instance-start"
4148   HTYPE = constants.HTYPE_INSTANCE
4149   _OP_REQP = ["instance_name", "force"]
4150   REQ_BGL = False
4151
4152   def ExpandNames(self):
4153     self._ExpandAndLockInstance()
4154
4155   def BuildHooksEnv(self):
4156     """Build hooks env.
4157
4158     This runs on master, primary and secondary nodes of the instance.
4159
4160     """
4161     env = {
4162       "FORCE": self.op.force,
4163       }
4164     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4165     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4166     return env, nl, nl
4167
4168   def CheckPrereq(self):
4169     """Check prerequisites.
4170
4171     This checks that the instance is in the cluster.
4172
4173     """
4174     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4175     assert self.instance is not None, \
4176       "Cannot retrieve locked instance %s" % self.op.instance_name
4177
4178     # extra beparams
4179     self.beparams = getattr(self.op, "beparams", {})
4180     if self.beparams:
4181       if not isinstance(self.beparams, dict):
4182         raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4183                                    " dict" % (type(self.beparams), ),
4184                                    errors.ECODE_INVAL)
4185       # fill the beparams dict
4186       utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4187       self.op.beparams = self.beparams
4188
4189     # extra hvparams
4190     self.hvparams = getattr(self.op, "hvparams", {})
4191     if self.hvparams:
4192       if not isinstance(self.hvparams, dict):
4193         raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4194                                    " dict" % (type(self.hvparams), ),
4195                                    errors.ECODE_INVAL)
4196
4197       # check hypervisor parameter syntax (locally)
4198       cluster = self.cfg.GetClusterInfo()
4199       utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4200       filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4201                                     instance.hvparams)
4202       filled_hvp.update(self.hvparams)
4203       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4204       hv_type.CheckParameterSyntax(filled_hvp)
4205       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4206       self.op.hvparams = self.hvparams
4207
4208     _CheckNodeOnline(self, instance.primary_node)
4209
4210     bep = self.cfg.GetClusterInfo().FillBE(instance)
4211     # check bridges existence
4212     _CheckInstanceBridgesExist(self, instance)
4213
4214     remote_info = self.rpc.call_instance_info(instance.primary_node,
4215                                               instance.name,
4216                                               instance.hypervisor)
4217     remote_info.Raise("Error checking node %s" % instance.primary_node,
4218                       prereq=True, ecode=errors.ECODE_ENVIRON)
4219     if not remote_info.payload: # not running already
4220       _CheckNodeFreeMemory(self, instance.primary_node,
4221                            "starting instance %s" % instance.name,
4222                            bep[constants.BE_MEMORY], instance.hypervisor)
4223
4224   def Exec(self, feedback_fn):
4225     """Start the instance.
4226
4227     """
4228     instance = self.instance
4229     force = self.op.force
4230
4231     self.cfg.MarkInstanceUp(instance.name)
4232
4233     node_current = instance.primary_node
4234
4235     _StartInstanceDisks(self, instance, force)
4236
4237     result = self.rpc.call_instance_start(node_current, instance,
4238                                           self.hvparams, self.beparams)
4239     msg = result.fail_msg
4240     if msg:
4241       _ShutdownInstanceDisks(self, instance)
4242       raise errors.OpExecError("Could not start instance: %s" % msg)
4243
4244
4245 class LURebootInstance(LogicalUnit):
4246   """Reboot an instance.
4247
4248   """
4249   HPATH = "instance-reboot"
4250   HTYPE = constants.HTYPE_INSTANCE
4251   _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4252   REQ_BGL = False
4253
4254   def CheckArguments(self):
4255     """Check the arguments.
4256
4257     """
4258     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4259                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4260
4261   def ExpandNames(self):
4262     if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4263                                    constants.INSTANCE_REBOOT_HARD,
4264                                    constants.INSTANCE_REBOOT_FULL]:
4265       raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4266                                   (constants.INSTANCE_REBOOT_SOFT,
4267                                    constants.INSTANCE_REBOOT_HARD,
4268                                    constants.INSTANCE_REBOOT_FULL))
4269     self._ExpandAndLockInstance()
4270
4271   def BuildHooksEnv(self):
4272     """Build hooks env.
4273
4274     This runs on master, primary and secondary nodes of the instance.
4275
4276     """
4277     env = {
4278       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4279       "REBOOT_TYPE": self.op.reboot_type,
4280       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4281       }
4282     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4283     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4284     return env, nl, nl
4285
4286   def CheckPrereq(self):
4287     """Check prerequisites.
4288
4289     This checks that the instance is in the cluster.
4290
4291     """
4292     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4293     assert self.instance is not None, \
4294       "Cannot retrieve locked instance %s" % self.op.instance_name
4295
4296     _CheckNodeOnline(self, instance.primary_node)
4297
4298     # check bridges existence
4299     _CheckInstanceBridgesExist(self, instance)
4300
4301   def Exec(self, feedback_fn):
4302     """Reboot the instance.
4303
4304     """
4305     instance = self.instance
4306     ignore_secondaries = self.op.ignore_secondaries
4307     reboot_type = self.op.reboot_type
4308
4309     node_current = instance.primary_node
4310
4311     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4312                        constants.INSTANCE_REBOOT_HARD]:
4313       for disk in instance.disks:
4314         self.cfg.SetDiskID(disk, node_current)
4315       result = self.rpc.call_instance_reboot(node_current, instance,
4316                                              reboot_type,
4317                                              self.shutdown_timeout)
4318       result.Raise("Could not reboot instance")
4319     else:
4320       result = self.rpc.call_instance_shutdown(node_current, instance,
4321                                                self.shutdown_timeout)
4322       result.Raise("Could not shutdown instance for full reboot")
4323       _ShutdownInstanceDisks(self, instance)
4324       _StartInstanceDisks(self, instance, ignore_secondaries)
4325       result = self.rpc.call_instance_start(node_current, instance, None, None)
4326       msg = result.fail_msg
4327       if msg:
4328         _ShutdownInstanceDisks(self, instance)
4329         raise errors.OpExecError("Could not start instance for"
4330                                  " full reboot: %s" % msg)
4331
4332     self.cfg.MarkInstanceUp(instance.name)
4333
4334
4335 class LUShutdownInstance(LogicalUnit):
4336   """Shutdown an instance.
4337
4338   """
4339   HPATH = "instance-stop"
4340   HTYPE = constants.HTYPE_INSTANCE
4341   _OP_REQP = ["instance_name"]
4342   REQ_BGL = False
4343
4344   def CheckArguments(self):
4345     """Check the arguments.
4346
4347     """
4348     self.timeout = getattr(self.op, "timeout",
4349                            constants.DEFAULT_SHUTDOWN_TIMEOUT)
4350
4351   def ExpandNames(self):
4352     self._ExpandAndLockInstance()
4353
4354   def BuildHooksEnv(self):
4355     """Build hooks env.
4356
4357     This runs on master, primary and secondary nodes of the instance.
4358
4359     """
4360     env = _BuildInstanceHookEnvByObject(self, self.instance)
4361     env["TIMEOUT"] = self.timeout
4362     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4363     return env, nl, nl
4364
4365   def CheckPrereq(self):
4366     """Check prerequisites.
4367
4368     This checks that the instance is in the cluster.
4369
4370     """
4371     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4372     assert self.instance is not None, \
4373       "Cannot retrieve locked instance %s" % self.op.instance_name
4374     _CheckNodeOnline(self, self.instance.primary_node)
4375
4376   def Exec(self, feedback_fn):
4377     """Shutdown the instance.
4378
4379     """
4380     instance = self.instance
4381     node_current = instance.primary_node
4382     timeout = self.timeout
4383     self.cfg.MarkInstanceDown(instance.name)
4384     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4385     msg = result.fail_msg
4386     if msg:
4387       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4388
4389     _ShutdownInstanceDisks(self, instance)
4390
4391
4392 class LUReinstallInstance(LogicalUnit):
4393   """Reinstall an instance.
4394
4395   """
4396   HPATH = "instance-reinstall"
4397   HTYPE = constants.HTYPE_INSTANCE
4398   _OP_REQP = ["instance_name"]
4399   REQ_BGL = False
4400
4401   def ExpandNames(self):
4402     self._ExpandAndLockInstance()
4403
4404   def BuildHooksEnv(self):
4405     """Build hooks env.
4406
4407     This runs on master, primary and secondary nodes of the instance.
4408
4409     """
4410     env = _BuildInstanceHookEnvByObject(self, self.instance)
4411     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4412     return env, nl, nl
4413
4414   def CheckPrereq(self):
4415     """Check prerequisites.
4416
4417     This checks that the instance is in the cluster and is not running.
4418
4419     """
4420     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4421     assert instance is not None, \
4422       "Cannot retrieve locked instance %s" % self.op.instance_name
4423     _CheckNodeOnline(self, instance.primary_node)
4424
4425     if instance.disk_template == constants.DT_DISKLESS:
4426       raise errors.OpPrereqError("Instance '%s' has no disks" %
4427                                  self.op.instance_name,
4428                                  errors.ECODE_INVAL)
4429     _CheckInstanceDown(self, instance, "cannot reinstall")
4430
4431     self.op.os_type = getattr(self.op, "os_type", None)
4432     self.op.force_variant = getattr(self.op, "force_variant", False)
4433     if self.op.os_type is not None:
4434       # OS verification
4435       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4436       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4437
4438     self.instance = instance
4439
4440   def Exec(self, feedback_fn):
4441     """Reinstall the instance.
4442
4443     """
4444     inst = self.instance
4445
4446     if self.op.os_type is not None:
4447       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4448       inst.os = self.op.os_type
4449       self.cfg.Update(inst, feedback_fn)
4450
4451     _StartInstanceDisks(self, inst, None)
4452     try:
4453       feedback_fn("Running the instance OS create scripts...")
4454       # FIXME: pass debug option from opcode to backend
4455       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4456                                              self.op.debug_level)
4457       result.Raise("Could not install OS for instance %s on node %s" %
4458                    (inst.name, inst.primary_node))
4459     finally:
4460       _ShutdownInstanceDisks(self, inst)
4461
4462
4463 class LURecreateInstanceDisks(LogicalUnit):
4464   """Recreate an instance's missing disks.
4465
4466   """
4467   HPATH = "instance-recreate-disks"
4468   HTYPE = constants.HTYPE_INSTANCE
4469   _OP_REQP = ["instance_name", "disks"]
4470   REQ_BGL = False
4471
4472   def CheckArguments(self):
4473     """Check the arguments.
4474
4475     """
4476     if not isinstance(self.op.disks, list):
4477       raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4478     for item in self.op.disks:
4479       if (not isinstance(item, int) or
4480           item < 0):
4481         raise errors.OpPrereqError("Invalid disk specification '%s'" %
4482                                    str(item), errors.ECODE_INVAL)
4483
4484   def ExpandNames(self):
4485     self._ExpandAndLockInstance()
4486
4487   def BuildHooksEnv(self):
4488     """Build hooks env.
4489
4490     This runs on master, primary and secondary nodes of the instance.
4491
4492     """
4493     env = _BuildInstanceHookEnvByObject(self, self.instance)
4494     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4495     return env, nl, nl
4496
4497   def CheckPrereq(self):
4498     """Check prerequisites.
4499
4500     This checks that the instance is in the cluster and is not running.
4501
4502     """
4503     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4504     assert instance is not None, \
4505       "Cannot retrieve locked instance %s" % self.op.instance_name
4506     _CheckNodeOnline(self, instance.primary_node)
4507
4508     if instance.disk_template == constants.DT_DISKLESS:
4509       raise errors.OpPrereqError("Instance '%s' has no disks" %
4510                                  self.op.instance_name, errors.ECODE_INVAL)
4511     _CheckInstanceDown(self, instance, "cannot recreate disks")
4512
4513     if not self.op.disks:
4514       self.op.disks = range(len(instance.disks))
4515     else:
4516       for idx in self.op.disks:
4517         if idx >= len(instance.disks):
4518           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4519                                      errors.ECODE_INVAL)
4520
4521     self.instance = instance
4522
4523   def Exec(self, feedback_fn):
4524     """Recreate the disks.
4525
4526     """
4527     to_skip = []
4528     for idx, _ in enumerate(self.instance.disks):
4529       if idx not in self.op.disks: # disk idx has not been passed in
4530         to_skip.append(idx)
4531         continue
4532
4533     _CreateDisks(self, self.instance, to_skip=to_skip)
4534
4535
4536 class LURenameInstance(LogicalUnit):
4537   """Rename an instance.
4538
4539   """
4540   HPATH = "instance-rename"
4541   HTYPE = constants.HTYPE_INSTANCE
4542   _OP_REQP = ["instance_name", "new_name"]
4543
4544   def BuildHooksEnv(self):
4545     """Build hooks env.
4546
4547     This runs on master, primary and secondary nodes of the instance.
4548
4549     """
4550     env = _BuildInstanceHookEnvByObject(self, self.instance)
4551     env["INSTANCE_NEW_NAME"] = self.op.new_name
4552     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4553     return env, nl, nl
4554
4555   def CheckPrereq(self):
4556     """Check prerequisites.
4557
4558     This checks that the instance is in the cluster and is not running.
4559
4560     """
4561     self.op.instance_name = _ExpandInstanceName(self.cfg,
4562                                                 self.op.instance_name)
4563     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4564     assert instance is not None
4565     _CheckNodeOnline(self, instance.primary_node)
4566     _CheckInstanceDown(self, instance, "cannot rename")
4567     self.instance = instance
4568
4569     # new name verification
4570     name_info = utils.GetHostInfo(self.op.new_name)
4571
4572     self.op.new_name = new_name = name_info.name
4573     instance_list = self.cfg.GetInstanceList()
4574     if new_name in instance_list:
4575       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4576                                  new_name, errors.ECODE_EXISTS)
4577
4578     if not getattr(self.op, "ignore_ip", False):
4579       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4580         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4581                                    (name_info.ip, new_name),
4582                                    errors.ECODE_NOTUNIQUE)
4583
4584
4585   def Exec(self, feedback_fn):
4586     """Reinstall the instance.
4587
4588     """
4589     inst = self.instance
4590     old_name = inst.name
4591
4592     if inst.disk_template == constants.DT_FILE:
4593       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4594
4595     self.cfg.RenameInstance(inst.name, self.op.new_name)
4596     # Change the instance lock. This is definitely safe while we hold the BGL
4597     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4598     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4599
4600     # re-read the instance from the configuration after rename
4601     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4602
4603     if inst.disk_template == constants.DT_FILE:
4604       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4605       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4606                                                      old_file_storage_dir,
4607                                                      new_file_storage_dir)
4608       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4609                    " (but the instance has been renamed in Ganeti)" %
4610                    (inst.primary_node, old_file_storage_dir,
4611                     new_file_storage_dir))
4612
4613     _StartInstanceDisks(self, inst, None)
4614     try:
4615       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4616                                                  old_name, self.op.debug_level)
4617       msg = result.fail_msg
4618       if msg:
4619         msg = ("Could not run OS rename script for instance %s on node %s"
4620                " (but the instance has been renamed in Ganeti): %s" %
4621                (inst.name, inst.primary_node, msg))
4622         self.proc.LogWarning(msg)
4623     finally:
4624       _ShutdownInstanceDisks(self, inst)
4625
4626
4627 class LURemoveInstance(LogicalUnit):
4628   """Remove an instance.
4629
4630   """
4631   HPATH = "instance-remove"
4632   HTYPE = constants.HTYPE_INSTANCE
4633   _OP_REQP = ["instance_name", "ignore_failures"]
4634   REQ_BGL = False
4635
4636   def CheckArguments(self):
4637     """Check the arguments.
4638
4639     """
4640     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4641                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4642
4643   def ExpandNames(self):
4644     self._ExpandAndLockInstance()
4645     self.needed_locks[locking.LEVEL_NODE] = []
4646     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4647
4648   def DeclareLocks(self, level):
4649     if level == locking.LEVEL_NODE:
4650       self._LockInstancesNodes()
4651
4652   def BuildHooksEnv(self):
4653     """Build hooks env.
4654
4655     This runs on master, primary and secondary nodes of the instance.
4656
4657     """
4658     env = _BuildInstanceHookEnvByObject(self, self.instance)
4659     env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4660     nl = [self.cfg.GetMasterNode()]
4661     nl_post = list(self.instance.all_nodes) + nl
4662     return env, nl, nl_post
4663
4664   def CheckPrereq(self):
4665     """Check prerequisites.
4666
4667     This checks that the instance is in the cluster.
4668
4669     """
4670     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4671     assert self.instance is not None, \
4672       "Cannot retrieve locked instance %s" % self.op.instance_name
4673
4674   def Exec(self, feedback_fn):
4675     """Remove the instance.
4676
4677     """
4678     instance = self.instance
4679     logging.info("Shutting down instance %s on node %s",
4680                  instance.name, instance.primary_node)
4681
4682     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4683                                              self.shutdown_timeout)
4684     msg = result.fail_msg
4685     if msg:
4686       if self.op.ignore_failures:
4687         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4688       else:
4689         raise errors.OpExecError("Could not shutdown instance %s on"
4690                                  " node %s: %s" %
4691                                  (instance.name, instance.primary_node, msg))
4692
4693     logging.info("Removing block devices for instance %s", instance.name)
4694
4695     if not _RemoveDisks(self, instance):
4696       if self.op.ignore_failures:
4697         feedback_fn("Warning: can't remove instance's disks")
4698       else:
4699         raise errors.OpExecError("Can't remove instance's disks")
4700
4701     logging.info("Removing instance %s out of cluster config", instance.name)
4702
4703     self.cfg.RemoveInstance(instance.name)
4704     self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4705
4706
4707 class LUQueryInstances(NoHooksLU):
4708   """Logical unit for querying instances.
4709
4710   """
4711   # pylint: disable-msg=W0142
4712   _OP_REQP = ["output_fields", "names", "use_locking"]
4713   REQ_BGL = False
4714   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4715                     "serial_no", "ctime", "mtime", "uuid"]
4716   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4717                                     "admin_state",
4718                                     "disk_template", "ip", "mac", "bridge",
4719                                     "nic_mode", "nic_link",
4720                                     "sda_size", "sdb_size", "vcpus", "tags",
4721                                     "network_port", "beparams",
4722                                     r"(disk)\.(size)/([0-9]+)",
4723                                     r"(disk)\.(sizes)", "disk_usage",
4724                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4725                                     r"(nic)\.(bridge)/([0-9]+)",
4726                                     r"(nic)\.(macs|ips|modes|links|bridges)",
4727                                     r"(disk|nic)\.(count)",
4728                                     "hvparams",
4729                                     ] + _SIMPLE_FIELDS +
4730                                   ["hv/%s" % name
4731                                    for name in constants.HVS_PARAMETERS
4732                                    if name not in constants.HVC_GLOBALS] +
4733                                   ["be/%s" % name
4734                                    for name in constants.BES_PARAMETERS])
4735   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4736
4737
4738   def ExpandNames(self):
4739     _CheckOutputFields(static=self._FIELDS_STATIC,
4740                        dynamic=self._FIELDS_DYNAMIC,
4741                        selected=self.op.output_fields)
4742
4743     self.needed_locks = {}
4744     self.share_locks[locking.LEVEL_INSTANCE] = 1
4745     self.share_locks[locking.LEVEL_NODE] = 1
4746
4747     if self.op.names:
4748       self.wanted = _GetWantedInstances(self, self.op.names)
4749     else:
4750       self.wanted = locking.ALL_SET
4751
4752     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4753     self.do_locking = self.do_node_query and self.op.use_locking
4754     if self.do_locking:
4755       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4756       self.needed_locks[locking.LEVEL_NODE] = []
4757       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4758
4759   def DeclareLocks(self, level):
4760     if level == locking.LEVEL_NODE and self.do_locking:
4761       self._LockInstancesNodes()
4762
4763   def CheckPrereq(self):
4764     """Check prerequisites.
4765
4766     """
4767     pass
4768
4769   def Exec(self, feedback_fn):
4770     """Computes the list of nodes and their attributes.
4771
4772     """
4773     # pylint: disable-msg=R0912
4774     # way too many branches here
4775     all_info = self.cfg.GetAllInstancesInfo()
4776     if self.wanted == locking.ALL_SET:
4777       # caller didn't specify instance names, so ordering is not important
4778       if self.do_locking:
4779         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4780       else:
4781         instance_names = all_info.keys()
4782       instance_names = utils.NiceSort(instance_names)
4783     else:
4784       # caller did specify names, so we must keep the ordering
4785       if self.do_locking:
4786         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4787       else:
4788         tgt_set = all_info.keys()
4789       missing = set(self.wanted).difference(tgt_set)
4790       if missing:
4791         raise errors.OpExecError("Some instances were removed before"
4792                                  " retrieving their data: %s" % missing)
4793       instance_names = self.wanted
4794
4795     instance_list = [all_info[iname] for iname in instance_names]
4796
4797     # begin data gathering
4798
4799     nodes = frozenset([inst.primary_node for inst in instance_list])
4800     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4801
4802     bad_nodes = []
4803     off_nodes = []
4804     if self.do_node_query:
4805       live_data = {}
4806       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4807       for name in nodes:
4808         result = node_data[name]
4809         if result.offline:
4810           # offline nodes will be in both lists
4811           off_nodes.append(name)
4812         if result.fail_msg:
4813           bad_nodes.append(name)
4814         else:
4815           if result.payload:
4816             live_data.update(result.payload)
4817           # else no instance is alive
4818     else:
4819       live_data = dict([(name, {}) for name in instance_names])
4820
4821     # end data gathering
4822
4823     HVPREFIX = "hv/"
4824     BEPREFIX = "be/"
4825     output = []
4826     cluster = self.cfg.GetClusterInfo()
4827     for instance in instance_list:
4828       iout = []
4829       i_hv = cluster.FillHV(instance, skip_globals=True)
4830       i_be = cluster.FillBE(instance)
4831       i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4832                                  nic.nicparams) for nic in instance.nics]
4833       for field in self.op.output_fields:
4834         st_match = self._FIELDS_STATIC.Matches(field)
4835         if field in self._SIMPLE_FIELDS:
4836           val = getattr(instance, field)
4837         elif field == "pnode":
4838           val = instance.primary_node
4839         elif field == "snodes":
4840           val = list(instance.secondary_nodes)
4841         elif field == "admin_state":
4842           val = instance.admin_up
4843         elif field == "oper_state":
4844           if instance.primary_node in bad_nodes:
4845             val = None
4846           else:
4847             val = bool(live_data.get(instance.name))
4848         elif field == "status":
4849           if instance.primary_node in off_nodes:
4850             val = "ERROR_nodeoffline"
4851           elif instance.primary_node in bad_nodes:
4852             val = "ERROR_nodedown"
4853           else:
4854             running = bool(live_data.get(instance.name))
4855             if running:
4856               if instance.admin_up:
4857                 val = "running"
4858               else:
4859                 val = "ERROR_up"
4860             else:
4861               if instance.admin_up:
4862                 val = "ERROR_down"
4863               else:
4864                 val = "ADMIN_down"
4865         elif field == "oper_ram":
4866           if instance.primary_node in bad_nodes:
4867             val = None
4868           elif instance.name in live_data:
4869             val = live_data[instance.name].get("memory", "?")
4870           else:
4871             val = "-"
4872         elif field == "vcpus":
4873           val = i_be[constants.BE_VCPUS]
4874         elif field == "disk_template":
4875           val = instance.disk_template
4876         elif field == "ip":
4877           if instance.nics:
4878             val = instance.nics[0].ip
4879           else:
4880             val = None
4881         elif field == "nic_mode":
4882           if instance.nics:
4883             val = i_nicp[0][constants.NIC_MODE]
4884           else:
4885             val = None
4886         elif field == "nic_link":
4887           if instance.nics:
4888             val = i_nicp[0][constants.NIC_LINK]
4889           else:
4890             val = None
4891         elif field == "bridge":
4892           if (instance.nics and
4893               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4894             val = i_nicp[0][constants.NIC_LINK]
4895           else:
4896             val = None
4897         elif field == "mac":
4898           if instance.nics:
4899             val = instance.nics[0].mac
4900           else:
4901             val = None
4902         elif field == "sda_size" or field == "sdb_size":
4903           idx = ord(field[2]) - ord('a')
4904           try:
4905             val = instance.FindDisk(idx).size
4906           except errors.OpPrereqError:
4907             val = None
4908         elif field == "disk_usage": # total disk usage per node
4909           disk_sizes = [{'size': disk.size} for disk in instance.disks]
4910           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4911         elif field == "tags":
4912           val = list(instance.GetTags())
4913         elif field == "hvparams":
4914           val = i_hv
4915         elif (field.startswith(HVPREFIX) and
4916               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4917               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4918           val = i_hv.get(field[len(HVPREFIX):], None)
4919         elif field == "beparams":
4920           val = i_be
4921         elif (field.startswith(BEPREFIX) and
4922               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4923           val = i_be.get(field[len(BEPREFIX):], None)
4924         elif st_match and st_match.groups():
4925           # matches a variable list
4926           st_groups = st_match.groups()
4927           if st_groups and st_groups[0] == "disk":
4928             if st_groups[1] == "count":
4929               val = len(instance.disks)
4930             elif st_groups[1] == "sizes":
4931               val = [disk.size for disk in instance.disks]
4932             elif st_groups[1] == "size":
4933               try:
4934                 val = instance.FindDisk(st_groups[2]).size
4935               except errors.OpPrereqError:
4936                 val = None
4937             else:
4938               assert False, "Unhandled disk parameter"
4939           elif st_groups[0] == "nic":
4940             if st_groups[1] == "count":
4941               val = len(instance.nics)
4942             elif st_groups[1] == "macs":
4943               val = [nic.mac for nic in instance.nics]
4944             elif st_groups[1] == "ips":
4945               val = [nic.ip for nic in instance.nics]
4946             elif st_groups[1] == "modes":
4947               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4948             elif st_groups[1] == "links":
4949               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4950             elif st_groups[1] == "bridges":
4951               val = []
4952               for nicp in i_nicp:
4953                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4954                   val.append(nicp[constants.NIC_LINK])
4955                 else:
4956                   val.append(None)
4957             else:
4958               # index-based item
4959               nic_idx = int(st_groups[2])
4960               if nic_idx >= len(instance.nics):
4961                 val = None
4962               else:
4963                 if st_groups[1] == "mac":
4964                   val = instance.nics[nic_idx].mac
4965                 elif st_groups[1] == "ip":
4966                   val = instance.nics[nic_idx].ip
4967                 elif st_groups[1] == "mode":
4968                   val = i_nicp[nic_idx][constants.NIC_MODE]
4969                 elif st_groups[1] == "link":
4970                   val = i_nicp[nic_idx][constants.NIC_LINK]
4971                 elif st_groups[1] == "bridge":
4972                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4973                   if nic_mode == constants.NIC_MODE_BRIDGED:
4974                     val = i_nicp[nic_idx][constants.NIC_LINK]
4975                   else:
4976                     val = None
4977                 else:
4978                   assert False, "Unhandled NIC parameter"
4979           else:
4980             assert False, ("Declared but unhandled variable parameter '%s'" %
4981                            field)
4982         else:
4983           assert False, "Declared but unhandled parameter '%s'" % field
4984         iout.append(val)
4985       output.append(iout)
4986
4987     return output
4988
4989
4990 class LUFailoverInstance(LogicalUnit):
4991   """Failover an instance.
4992
4993   """
4994   HPATH = "instance-failover"
4995   HTYPE = constants.HTYPE_INSTANCE
4996   _OP_REQP = ["instance_name", "ignore_consistency"]
4997   REQ_BGL = False
4998
4999   def CheckArguments(self):
5000     """Check the arguments.
5001
5002     """
5003     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5004                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
5005
5006   def ExpandNames(self):
5007     self._ExpandAndLockInstance()
5008     self.needed_locks[locking.LEVEL_NODE] = []
5009     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5010
5011   def DeclareLocks(self, level):
5012     if level == locking.LEVEL_NODE:
5013       self._LockInstancesNodes()
5014
5015   def BuildHooksEnv(self):
5016     """Build hooks env.
5017
5018     This runs on master, primary and secondary nodes of the instance.
5019
5020     """
5021     instance = self.instance
5022     source_node = instance.primary_node
5023     target_node = instance.secondary_nodes[0]
5024     env = {
5025       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5026       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5027       "OLD_PRIMARY": source_node,
5028       "OLD_SECONDARY": target_node,
5029       "NEW_PRIMARY": target_node,
5030       "NEW_SECONDARY": source_node,
5031       }
5032     env.update(_BuildInstanceHookEnvByObject(self, instance))
5033     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5034     nl_post = list(nl)
5035     nl_post.append(source_node)
5036     return env, nl, nl_post
5037
5038   def CheckPrereq(self):
5039     """Check prerequisites.
5040
5041     This checks that the instance is in the cluster.
5042
5043     """
5044     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5045     assert self.instance is not None, \
5046       "Cannot retrieve locked instance %s" % self.op.instance_name
5047
5048     bep = self.cfg.GetClusterInfo().FillBE(instance)
5049     if instance.disk_template not in constants.DTS_NET_MIRROR:
5050       raise errors.OpPrereqError("Instance's disk layout is not"
5051                                  " network mirrored, cannot failover.",
5052                                  errors.ECODE_STATE)
5053
5054     secondary_nodes = instance.secondary_nodes
5055     if not secondary_nodes:
5056       raise errors.ProgrammerError("no secondary node but using "
5057                                    "a mirrored disk template")
5058
5059     target_node = secondary_nodes[0]
5060     _CheckNodeOnline(self, target_node)
5061     _CheckNodeNotDrained(self, target_node)
5062     if instance.admin_up:
5063       # check memory requirements on the secondary node
5064       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5065                            instance.name, bep[constants.BE_MEMORY],
5066                            instance.hypervisor)
5067     else:
5068       self.LogInfo("Not checking memory on the secondary node as"
5069                    " instance will not be started")
5070
5071     # check bridge existance
5072     _CheckInstanceBridgesExist(self, instance, node=target_node)
5073
5074   def Exec(self, feedback_fn):
5075     """Failover an instance.
5076
5077     The failover is done by shutting it down on its present node and
5078     starting it on the secondary.
5079
5080     """
5081     instance = self.instance
5082
5083     source_node = instance.primary_node
5084     target_node = instance.secondary_nodes[0]
5085
5086     if instance.admin_up:
5087       feedback_fn("* checking disk consistency between source and target")
5088       for dev in instance.disks:
5089         # for drbd, these are drbd over lvm
5090         if not _CheckDiskConsistency(self, dev, target_node, False):
5091           if not self.op.ignore_consistency:
5092             raise errors.OpExecError("Disk %s is degraded on target node,"
5093                                      " aborting failover." % dev.iv_name)
5094     else:
5095       feedback_fn("* not checking disk consistency as instance is not running")
5096
5097     feedback_fn("* shutting down instance on source node")
5098     logging.info("Shutting down instance %s on node %s",
5099                  instance.name, source_node)
5100
5101     result = self.rpc.call_instance_shutdown(source_node, instance,
5102                                              self.shutdown_timeout)
5103     msg = result.fail_msg
5104     if msg:
5105       if self.op.ignore_consistency:
5106         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5107                              " Proceeding anyway. Please make sure node"
5108                              " %s is down. Error details: %s",
5109                              instance.name, source_node, source_node, msg)
5110       else:
5111         raise errors.OpExecError("Could not shutdown instance %s on"
5112                                  " node %s: %s" %
5113                                  (instance.name, source_node, msg))
5114
5115     feedback_fn("* deactivating the instance's disks on source node")
5116     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5117       raise errors.OpExecError("Can't shut down the instance's disks.")
5118
5119     instance.primary_node = target_node
5120     # distribute new instance config to the other nodes
5121     self.cfg.Update(instance, feedback_fn)
5122
5123     # Only start the instance if it's marked as up
5124     if instance.admin_up:
5125       feedback_fn("* activating the instance's disks on target node")
5126       logging.info("Starting instance %s on node %s",
5127                    instance.name, target_node)
5128
5129       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5130                                                ignore_secondaries=True)
5131       if not disks_ok:
5132         _ShutdownInstanceDisks(self, instance)
5133         raise errors.OpExecError("Can't activate the instance's disks")
5134
5135       feedback_fn("* starting the instance on the target node")
5136       result = self.rpc.call_instance_start(target_node, instance, None, None)
5137       msg = result.fail_msg
5138       if msg:
5139         _ShutdownInstanceDisks(self, instance)
5140         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5141                                  (instance.name, target_node, msg))
5142
5143
5144 class LUMigrateInstance(LogicalUnit):
5145   """Migrate an instance.
5146
5147   This is migration without shutting down, compared to the failover,
5148   which is done with shutdown.
5149
5150   """
5151   HPATH = "instance-migrate"
5152   HTYPE = constants.HTYPE_INSTANCE
5153   _OP_REQP = ["instance_name", "live", "cleanup"]
5154
5155   REQ_BGL = False
5156
5157   def ExpandNames(self):
5158     self._ExpandAndLockInstance()
5159
5160     self.needed_locks[locking.LEVEL_NODE] = []
5161     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5162
5163     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5164                                        self.op.live, self.op.cleanup)
5165     self.tasklets = [self._migrater]
5166
5167   def DeclareLocks(self, level):
5168     if level == locking.LEVEL_NODE:
5169       self._LockInstancesNodes()
5170
5171   def BuildHooksEnv(self):
5172     """Build hooks env.
5173
5174     This runs on master, primary and secondary nodes of the instance.
5175
5176     """
5177     instance = self._migrater.instance
5178     source_node = instance.primary_node
5179     target_node = instance.secondary_nodes[0]
5180     env = _BuildInstanceHookEnvByObject(self, instance)
5181     env["MIGRATE_LIVE"] = self.op.live
5182     env["MIGRATE_CLEANUP"] = self.op.cleanup
5183     env.update({
5184         "OLD_PRIMARY": source_node,
5185         "OLD_SECONDARY": target_node,
5186         "NEW_PRIMARY": target_node,
5187         "NEW_SECONDARY": source_node,
5188         })
5189     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5190     nl_post = list(nl)
5191     nl_post.append(source_node)
5192     return env, nl, nl_post
5193
5194
5195 class LUMoveInstance(LogicalUnit):
5196   """Move an instance by data-copying.
5197
5198   """
5199   HPATH = "instance-move"
5200   HTYPE = constants.HTYPE_INSTANCE
5201   _OP_REQP = ["instance_name", "target_node"]
5202   REQ_BGL = False
5203
5204   def CheckArguments(self):
5205     """Check the arguments.
5206
5207     """
5208     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5209                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
5210
5211   def ExpandNames(self):
5212     self._ExpandAndLockInstance()
5213     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5214     self.op.target_node = target_node
5215     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5216     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5217
5218   def DeclareLocks(self, level):
5219     if level == locking.LEVEL_NODE:
5220       self._LockInstancesNodes(primary_only=True)
5221
5222   def BuildHooksEnv(self):
5223     """Build hooks env.
5224
5225     This runs on master, primary and secondary nodes of the instance.
5226
5227     """
5228     env = {
5229       "TARGET_NODE": self.op.target_node,
5230       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5231       }
5232     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5233     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5234                                        self.op.target_node]
5235     return env, nl, nl
5236
5237   def CheckPrereq(self):
5238     """Check prerequisites.
5239
5240     This checks that the instance is in the cluster.
5241
5242     """
5243     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5244     assert self.instance is not None, \
5245       "Cannot retrieve locked instance %s" % self.op.instance_name
5246
5247     node = self.cfg.GetNodeInfo(self.op.target_node)
5248     assert node is not None, \
5249       "Cannot retrieve locked node %s" % self.op.target_node
5250
5251     self.target_node = target_node = node.name
5252
5253     if target_node == instance.primary_node:
5254       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5255                                  (instance.name, target_node),
5256                                  errors.ECODE_STATE)
5257
5258     bep = self.cfg.GetClusterInfo().FillBE(instance)
5259
5260     for idx, dsk in enumerate(instance.disks):
5261       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5262         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5263                                    " cannot copy" % idx, errors.ECODE_STATE)
5264
5265     _CheckNodeOnline(self, target_node)
5266     _CheckNodeNotDrained(self, target_node)
5267
5268     if instance.admin_up:
5269       # check memory requirements on the secondary node
5270       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5271                            instance.name, bep[constants.BE_MEMORY],
5272                            instance.hypervisor)
5273     else:
5274       self.LogInfo("Not checking memory on the secondary node as"
5275                    " instance will not be started")
5276
5277     # check bridge existance
5278     _CheckInstanceBridgesExist(self, instance, node=target_node)
5279
5280   def Exec(self, feedback_fn):
5281     """Move an instance.
5282
5283     The move is done by shutting it down on its present node, copying
5284     the data over (slow) and starting it on the new node.
5285
5286     """
5287     instance = self.instance
5288
5289     source_node = instance.primary_node
5290     target_node = self.target_node
5291
5292     self.LogInfo("Shutting down instance %s on source node %s",
5293                  instance.name, source_node)
5294
5295     result = self.rpc.call_instance_shutdown(source_node, instance,
5296                                              self.shutdown_timeout)
5297     msg = result.fail_msg
5298     if msg:
5299       if self.op.ignore_consistency:
5300         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5301                              " Proceeding anyway. Please make sure node"
5302                              " %s is down. Error details: %s",
5303                              instance.name, source_node, source_node, msg)
5304       else:
5305         raise errors.OpExecError("Could not shutdown instance %s on"
5306                                  " node %s: %s" %
5307                                  (instance.name, source_node, msg))
5308
5309     # create the target disks
5310     try:
5311       _CreateDisks(self, instance, target_node=target_node)
5312     except errors.OpExecError:
5313       self.LogWarning("Device creation failed, reverting...")
5314       try:
5315         _RemoveDisks(self, instance, target_node=target_node)
5316       finally:
5317         self.cfg.ReleaseDRBDMinors(instance.name)
5318         raise
5319
5320     cluster_name = self.cfg.GetClusterInfo().cluster_name
5321
5322     errs = []
5323     # activate, get path, copy the data over
5324     for idx, disk in enumerate(instance.disks):
5325       self.LogInfo("Copying data for disk %d", idx)
5326       result = self.rpc.call_blockdev_assemble(target_node, disk,
5327                                                instance.name, True)
5328       if result.fail_msg:
5329         self.LogWarning("Can't assemble newly created disk %d: %s",
5330                         idx, result.fail_msg)
5331         errs.append(result.fail_msg)
5332         break
5333       dev_path = result.payload
5334       result = self.rpc.call_blockdev_export(source_node, disk,
5335                                              target_node, dev_path,
5336                                              cluster_name)
5337       if result.fail_msg:
5338         self.LogWarning("Can't copy data over for disk %d: %s",
5339                         idx, result.fail_msg)
5340         errs.append(result.fail_msg)
5341         break
5342
5343     if errs:
5344       self.LogWarning("Some disks failed to copy, aborting")
5345       try:
5346         _RemoveDisks(self, instance, target_node=target_node)
5347       finally:
5348         self.cfg.ReleaseDRBDMinors(instance.name)
5349         raise errors.OpExecError("Errors during disk copy: %s" %
5350                                  (",".join(errs),))
5351
5352     instance.primary_node = target_node
5353     self.cfg.Update(instance, feedback_fn)
5354
5355     self.LogInfo("Removing the disks on the original node")
5356     _RemoveDisks(self, instance, target_node=source_node)
5357
5358     # Only start the instance if it's marked as up
5359     if instance.admin_up:
5360       self.LogInfo("Starting instance %s on node %s",
5361                    instance.name, target_node)
5362
5363       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5364                                            ignore_secondaries=True)
5365       if not disks_ok:
5366         _ShutdownInstanceDisks(self, instance)
5367         raise errors.OpExecError("Can't activate the instance's disks")
5368
5369       result = self.rpc.call_instance_start(target_node, instance, None, None)
5370       msg = result.fail_msg
5371       if msg:
5372         _ShutdownInstanceDisks(self, instance)
5373         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5374                                  (instance.name, target_node, msg))
5375
5376
5377 class LUMigrateNode(LogicalUnit):
5378   """Migrate all instances from a node.
5379
5380   """
5381   HPATH = "node-migrate"
5382   HTYPE = constants.HTYPE_NODE
5383   _OP_REQP = ["node_name", "live"]
5384   REQ_BGL = False
5385
5386   def ExpandNames(self):
5387     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5388
5389     self.needed_locks = {
5390       locking.LEVEL_NODE: [self.op.node_name],
5391       }
5392
5393     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5394
5395     # Create tasklets for migrating instances for all instances on this node
5396     names = []
5397     tasklets = []
5398
5399     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5400       logging.debug("Migrating instance %s", inst.name)
5401       names.append(inst.name)
5402
5403       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5404
5405     self.tasklets = tasklets
5406
5407     # Declare instance locks
5408     self.needed_locks[locking.LEVEL_INSTANCE] = names
5409
5410   def DeclareLocks(self, level):
5411     if level == locking.LEVEL_NODE:
5412       self._LockInstancesNodes()
5413
5414   def BuildHooksEnv(self):
5415     """Build hooks env.
5416
5417     This runs on the master, the primary and all the secondaries.
5418
5419     """
5420     env = {
5421       "NODE_NAME": self.op.node_name,
5422       }
5423
5424     nl = [self.cfg.GetMasterNode()]
5425
5426     return (env, nl, nl)
5427
5428
5429 class TLMigrateInstance(Tasklet):
5430   def __init__(self, lu, instance_name, live, cleanup):
5431     """Initializes this class.
5432
5433     """
5434     Tasklet.__init__(self, lu)
5435
5436     # Parameters
5437     self.instance_name = instance_name
5438     self.live = live
5439     self.cleanup = cleanup
5440
5441   def CheckPrereq(self):
5442     """Check prerequisites.
5443
5444     This checks that the instance is in the cluster.
5445
5446     """
5447     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5448     instance = self.cfg.GetInstanceInfo(instance_name)
5449     assert instance is not None
5450
5451     if instance.disk_template != constants.DT_DRBD8:
5452       raise errors.OpPrereqError("Instance's disk layout is not"
5453                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5454
5455     secondary_nodes = instance.secondary_nodes
5456     if not secondary_nodes:
5457       raise errors.ConfigurationError("No secondary node but using"
5458                                       " drbd8 disk template")
5459
5460     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5461
5462     target_node = secondary_nodes[0]
5463     # check memory requirements on the secondary node
5464     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5465                          instance.name, i_be[constants.BE_MEMORY],
5466                          instance.hypervisor)
5467
5468     # check bridge existance
5469     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5470
5471     if not self.cleanup:
5472       _CheckNodeNotDrained(self.lu, target_node)
5473       result = self.rpc.call_instance_migratable(instance.primary_node,
5474                                                  instance)
5475       result.Raise("Can't migrate, please use failover",
5476                    prereq=True, ecode=errors.ECODE_STATE)
5477
5478     self.instance = instance
5479
5480   def _WaitUntilSync(self):
5481     """Poll with custom rpc for disk sync.
5482
5483     This uses our own step-based rpc call.
5484
5485     """
5486     self.feedback_fn("* wait until resync is done")
5487     all_done = False
5488     while not all_done:
5489       all_done = True
5490       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5491                                             self.nodes_ip,
5492                                             self.instance.disks)
5493       min_percent = 100
5494       for node, nres in result.items():
5495         nres.Raise("Cannot resync disks on node %s" % node)
5496         node_done, node_percent = nres.payload
5497         all_done = all_done and node_done
5498         if node_percent is not None:
5499           min_percent = min(min_percent, node_percent)
5500       if not all_done:
5501         if min_percent < 100:
5502           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5503         time.sleep(2)
5504
5505   def _EnsureSecondary(self, node):
5506     """Demote a node to secondary.
5507
5508     """
5509     self.feedback_fn("* switching node %s to secondary mode" % node)
5510
5511     for dev in self.instance.disks:
5512       self.cfg.SetDiskID(dev, node)
5513
5514     result = self.rpc.call_blockdev_close(node, self.instance.name,
5515                                           self.instance.disks)
5516     result.Raise("Cannot change disk to secondary on node %s" % node)
5517
5518   def _GoStandalone(self):
5519     """Disconnect from the network.
5520
5521     """
5522     self.feedback_fn("* changing into standalone mode")
5523     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5524                                                self.instance.disks)
5525     for node, nres in result.items():
5526       nres.Raise("Cannot disconnect disks node %s" % node)
5527
5528   def _GoReconnect(self, multimaster):
5529     """Reconnect to the network.
5530
5531     """
5532     if multimaster:
5533       msg = "dual-master"
5534     else:
5535       msg = "single-master"
5536     self.feedback_fn("* changing disks into %s mode" % msg)
5537     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5538                                            self.instance.disks,
5539                                            self.instance.name, multimaster)
5540     for node, nres in result.items():
5541       nres.Raise("Cannot change disks config on node %s" % node)
5542
5543   def _ExecCleanup(self):
5544     """Try to cleanup after a failed migration.
5545
5546     The cleanup is done by:
5547       - check that the instance is running only on one node
5548         (and update the config if needed)
5549       - change disks on its secondary node to secondary
5550       - wait until disks are fully synchronized
5551       - disconnect from the network
5552       - change disks into single-master mode
5553       - wait again until disks are fully synchronized
5554
5555     """
5556     instance = self.instance
5557     target_node = self.target_node
5558     source_node = self.source_node
5559
5560     # check running on only one node
5561     self.feedback_fn("* checking where the instance actually runs"
5562                      " (if this hangs, the hypervisor might be in"
5563                      " a bad state)")
5564     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5565     for node, result in ins_l.items():
5566       result.Raise("Can't contact node %s" % node)
5567
5568     runningon_source = instance.name in ins_l[source_node].payload
5569     runningon_target = instance.name in ins_l[target_node].payload
5570
5571     if runningon_source and runningon_target:
5572       raise errors.OpExecError("Instance seems to be running on two nodes,"
5573                                " or the hypervisor is confused. You will have"
5574                                " to ensure manually that it runs only on one"
5575                                " and restart this operation.")
5576
5577     if not (runningon_source or runningon_target):
5578       raise errors.OpExecError("Instance does not seem to be running at all."
5579                                " In this case, it's safer to repair by"
5580                                " running 'gnt-instance stop' to ensure disk"
5581                                " shutdown, and then restarting it.")
5582
5583     if runningon_target:
5584       # the migration has actually succeeded, we need to update the config
5585       self.feedback_fn("* instance running on secondary node (%s),"
5586                        " updating config" % target_node)
5587       instance.primary_node = target_node
5588       self.cfg.Update(instance, self.feedback_fn)
5589       demoted_node = source_node
5590     else:
5591       self.feedback_fn("* instance confirmed to be running on its"
5592                        " primary node (%s)" % source_node)
5593       demoted_node = target_node
5594
5595     self._EnsureSecondary(demoted_node)
5596     try:
5597       self._WaitUntilSync()
5598     except errors.OpExecError:
5599       # we ignore here errors, since if the device is standalone, it
5600       # won't be able to sync
5601       pass
5602     self._GoStandalone()
5603     self._GoReconnect(False)
5604     self._WaitUntilSync()
5605
5606     self.feedback_fn("* done")
5607
5608   def _RevertDiskStatus(self):
5609     """Try to revert the disk status after a failed migration.
5610
5611     """
5612     target_node = self.target_node
5613     try:
5614       self._EnsureSecondary(target_node)
5615       self._GoStandalone()
5616       self._GoReconnect(False)
5617       self._WaitUntilSync()
5618     except errors.OpExecError, err:
5619       self.lu.LogWarning("Migration failed and I can't reconnect the"
5620                          " drives: error '%s'\n"
5621                          "Please look and recover the instance status" %
5622                          str(err))
5623
5624   def _AbortMigration(self):
5625     """Call the hypervisor code to abort a started migration.
5626
5627     """
5628     instance = self.instance
5629     target_node = self.target_node
5630     migration_info = self.migration_info
5631
5632     abort_result = self.rpc.call_finalize_migration(target_node,
5633                                                     instance,
5634                                                     migration_info,
5635                                                     False)
5636     abort_msg = abort_result.fail_msg
5637     if abort_msg:
5638       logging.error("Aborting migration failed on target node %s: %s",
5639                     target_node, abort_msg)
5640       # Don't raise an exception here, as we stil have to try to revert the
5641       # disk status, even if this step failed.
5642
5643   def _ExecMigration(self):
5644     """Migrate an instance.
5645
5646     The migrate is done by:
5647       - change the disks into dual-master mode
5648       - wait until disks are fully synchronized again
5649       - migrate the instance
5650       - change disks on the new secondary node (the old primary) to secondary
5651       - wait until disks are fully synchronized
5652       - change disks into single-master mode
5653
5654     """
5655     instance = self.instance
5656     target_node = self.target_node
5657     source_node = self.source_node
5658
5659     self.feedback_fn("* checking disk consistency between source and target")
5660     for dev in instance.disks:
5661       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5662         raise errors.OpExecError("Disk %s is degraded or not fully"
5663                                  " synchronized on target node,"
5664                                  " aborting migrate." % dev.iv_name)
5665
5666     # First get the migration information from the remote node
5667     result = self.rpc.call_migration_info(source_node, instance)
5668     msg = result.fail_msg
5669     if msg:
5670       log_err = ("Failed fetching source migration information from %s: %s" %
5671                  (source_node, msg))
5672       logging.error(log_err)
5673       raise errors.OpExecError(log_err)
5674
5675     self.migration_info = migration_info = result.payload
5676
5677     # Then switch the disks to master/master mode
5678     self._EnsureSecondary(target_node)
5679     self._GoStandalone()
5680     self._GoReconnect(True)
5681     self._WaitUntilSync()
5682
5683     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5684     result = self.rpc.call_accept_instance(target_node,
5685                                            instance,
5686                                            migration_info,
5687                                            self.nodes_ip[target_node])
5688
5689     msg = result.fail_msg
5690     if msg:
5691       logging.error("Instance pre-migration failed, trying to revert"
5692                     " disk status: %s", msg)
5693       self.feedback_fn("Pre-migration failed, aborting")
5694       self._AbortMigration()
5695       self._RevertDiskStatus()
5696       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5697                                (instance.name, msg))
5698
5699     self.feedback_fn("* migrating instance to %s" % target_node)
5700     time.sleep(10)
5701     result = self.rpc.call_instance_migrate(source_node, instance,
5702                                             self.nodes_ip[target_node],
5703                                             self.live)
5704     msg = result.fail_msg
5705     if msg:
5706       logging.error("Instance migration failed, trying to revert"
5707                     " disk status: %s", msg)
5708       self.feedback_fn("Migration failed, aborting")
5709       self._AbortMigration()
5710       self._RevertDiskStatus()
5711       raise errors.OpExecError("Could not migrate instance %s: %s" %
5712                                (instance.name, msg))
5713     time.sleep(10)
5714
5715     instance.primary_node = target_node
5716     # distribute new instance config to the other nodes
5717     self.cfg.Update(instance, self.feedback_fn)
5718
5719     result = self.rpc.call_finalize_migration(target_node,
5720                                               instance,
5721                                               migration_info,
5722                                               True)
5723     msg = result.fail_msg
5724     if msg:
5725       logging.error("Instance migration succeeded, but finalization failed:"
5726                     " %s", msg)
5727       raise errors.OpExecError("Could not finalize instance migration: %s" %
5728                                msg)
5729
5730     self._EnsureSecondary(source_node)
5731     self._WaitUntilSync()
5732     self._GoStandalone()
5733     self._GoReconnect(False)
5734     self._WaitUntilSync()
5735
5736     self.feedback_fn("* done")
5737
5738   def Exec(self, feedback_fn):
5739     """Perform the migration.
5740
5741     """
5742     feedback_fn("Migrating instance %s" % self.instance.name)
5743
5744     self.feedback_fn = feedback_fn
5745
5746     self.source_node = self.instance.primary_node
5747     self.target_node = self.instance.secondary_nodes[0]
5748     self.all_nodes = [self.source_node, self.target_node]
5749     self.nodes_ip = {
5750       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5751       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5752       }
5753
5754     if self.cleanup:
5755       return self._ExecCleanup()
5756     else:
5757       return self._ExecMigration()
5758
5759
5760 def _CreateBlockDev(lu, node, instance, device, force_create,
5761                     info, force_open):
5762   """Create a tree of block devices on a given node.
5763
5764   If this device type has to be created on secondaries, create it and
5765   all its children.
5766
5767   If not, just recurse to children keeping the same 'force' value.
5768
5769   @param lu: the lu on whose behalf we execute
5770   @param node: the node on which to create the device
5771   @type instance: L{objects.Instance}
5772   @param instance: the instance which owns the device
5773   @type device: L{objects.Disk}
5774   @param device: the device to create
5775   @type force_create: boolean
5776   @param force_create: whether to force creation of this device; this
5777       will be change to True whenever we find a device which has
5778       CreateOnSecondary() attribute
5779   @param info: the extra 'metadata' we should attach to the device
5780       (this will be represented as a LVM tag)
5781   @type force_open: boolean
5782   @param force_open: this parameter will be passes to the
5783       L{backend.BlockdevCreate} function where it specifies
5784       whether we run on primary or not, and it affects both
5785       the child assembly and the device own Open() execution
5786
5787   """
5788   if device.CreateOnSecondary():
5789     force_create = True
5790
5791   if device.children:
5792     for child in device.children:
5793       _CreateBlockDev(lu, node, instance, child, force_create,
5794                       info, force_open)
5795
5796   if not force_create:
5797     return
5798
5799   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5800
5801
5802 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5803   """Create a single block device on a given node.
5804
5805   This will not recurse over children of the device, so they must be
5806   created in advance.
5807
5808   @param lu: the lu on whose behalf we execute
5809   @param node: the node on which to create the device
5810   @type instance: L{objects.Instance}
5811   @param instance: the instance which owns the device
5812   @type device: L{objects.Disk}
5813   @param device: the device to create
5814   @param info: the extra 'metadata' we should attach to the device
5815       (this will be represented as a LVM tag)
5816   @type force_open: boolean
5817   @param force_open: this parameter will be passes to the
5818       L{backend.BlockdevCreate} function where it specifies
5819       whether we run on primary or not, and it affects both
5820       the child assembly and the device own Open() execution
5821
5822   """
5823   lu.cfg.SetDiskID(device, node)
5824   result = lu.rpc.call_blockdev_create(node, device, device.size,
5825                                        instance.name, force_open, info)
5826   result.Raise("Can't create block device %s on"
5827                " node %s for instance %s" % (device, node, instance.name))
5828   if device.physical_id is None:
5829     device.physical_id = result.payload
5830
5831
5832 def _GenerateUniqueNames(lu, exts):
5833   """Generate a suitable LV name.
5834
5835   This will generate a logical volume name for the given instance.
5836
5837   """
5838   results = []
5839   for val in exts:
5840     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5841     results.append("%s%s" % (new_id, val))
5842   return results
5843
5844
5845 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5846                          p_minor, s_minor):
5847   """Generate a drbd8 device complete with its children.
5848
5849   """
5850   port = lu.cfg.AllocatePort()
5851   vgname = lu.cfg.GetVGName()
5852   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5853   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5854                           logical_id=(vgname, names[0]))
5855   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5856                           logical_id=(vgname, names[1]))
5857   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5858                           logical_id=(primary, secondary, port,
5859                                       p_minor, s_minor,
5860                                       shared_secret),
5861                           children=[dev_data, dev_meta],
5862                           iv_name=iv_name)
5863   return drbd_dev
5864
5865
5866 def _GenerateDiskTemplate(lu, template_name,
5867                           instance_name, primary_node,
5868                           secondary_nodes, disk_info,
5869                           file_storage_dir, file_driver,
5870                           base_index):
5871   """Generate the entire disk layout for a given template type.
5872
5873   """
5874   #TODO: compute space requirements
5875
5876   vgname = lu.cfg.GetVGName()
5877   disk_count = len(disk_info)
5878   disks = []
5879   if template_name == constants.DT_DISKLESS:
5880     pass
5881   elif template_name == constants.DT_PLAIN:
5882     if len(secondary_nodes) != 0:
5883       raise errors.ProgrammerError("Wrong template configuration")
5884
5885     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5886                                       for i in range(disk_count)])
5887     for idx, disk in enumerate(disk_info):
5888       disk_index = idx + base_index
5889       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5890                               logical_id=(vgname, names[idx]),
5891                               iv_name="disk/%d" % disk_index,
5892                               mode=disk["mode"])
5893       disks.append(disk_dev)
5894   elif template_name == constants.DT_DRBD8:
5895     if len(secondary_nodes) != 1:
5896       raise errors.ProgrammerError("Wrong template configuration")
5897     remote_node = secondary_nodes[0]
5898     minors = lu.cfg.AllocateDRBDMinor(
5899       [primary_node, remote_node] * len(disk_info), instance_name)
5900
5901     names = []
5902     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5903                                                for i in range(disk_count)]):
5904       names.append(lv_prefix + "_data")
5905       names.append(lv_prefix + "_meta")
5906     for idx, disk in enumerate(disk_info):
5907       disk_index = idx + base_index
5908       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5909                                       disk["size"], names[idx*2:idx*2+2],
5910                                       "disk/%d" % disk_index,
5911                                       minors[idx*2], minors[idx*2+1])
5912       disk_dev.mode = disk["mode"]
5913       disks.append(disk_dev)
5914   elif template_name == constants.DT_FILE:
5915     if len(secondary_nodes) != 0:
5916       raise errors.ProgrammerError("Wrong template configuration")
5917
5918     _RequireFileStorage()
5919
5920     for idx, disk in enumerate(disk_info):
5921       disk_index = idx + base_index
5922       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5923                               iv_name="disk/%d" % disk_index,
5924                               logical_id=(file_driver,
5925                                           "%s/disk%d" % (file_storage_dir,
5926                                                          disk_index)),
5927                               mode=disk["mode"])
5928       disks.append(disk_dev)
5929   else:
5930     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5931   return disks
5932
5933
5934 def _GetInstanceInfoText(instance):
5935   """Compute that text that should be added to the disk's metadata.
5936
5937   """
5938   return "originstname+%s" % instance.name
5939
5940
5941 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5942   """Create all disks for an instance.
5943
5944   This abstracts away some work from AddInstance.
5945
5946   @type lu: L{LogicalUnit}
5947   @param lu: the logical unit on whose behalf we execute
5948   @type instance: L{objects.Instance}
5949   @param instance: the instance whose disks we should create
5950   @type to_skip: list
5951   @param to_skip: list of indices to skip
5952   @type target_node: string
5953   @param target_node: if passed, overrides the target node for creation
5954   @rtype: boolean
5955   @return: the success of the creation
5956
5957   """
5958   info = _GetInstanceInfoText(instance)
5959   if target_node is None:
5960     pnode = instance.primary_node
5961     all_nodes = instance.all_nodes
5962   else:
5963     pnode = target_node
5964     all_nodes = [pnode]
5965
5966   if instance.disk_template == constants.DT_FILE:
5967     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5968     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5969
5970     result.Raise("Failed to create directory '%s' on"
5971                  " node %s" % (file_storage_dir, pnode))
5972
5973   # Note: this needs to be kept in sync with adding of disks in
5974   # LUSetInstanceParams
5975   for idx, device in enumerate(instance.disks):
5976     if to_skip and idx in to_skip:
5977       continue
5978     logging.info("Creating volume %s for instance %s",
5979                  device.iv_name, instance.name)
5980     #HARDCODE
5981     for node in all_nodes:
5982       f_create = node == pnode
5983       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5984
5985
5986 def _RemoveDisks(lu, instance, target_node=None):
5987   """Remove all disks for an instance.
5988
5989   This abstracts away some work from `AddInstance()` and
5990   `RemoveInstance()`. Note that in case some of the devices couldn't
5991   be removed, the removal will continue with the other ones (compare
5992   with `_CreateDisks()`).
5993
5994   @type lu: L{LogicalUnit}
5995   @param lu: the logical unit on whose behalf we execute
5996   @type instance: L{objects.Instance}
5997   @param instance: the instance whose disks we should remove
5998   @type target_node: string
5999   @param target_node: used to override the node on which to remove the disks
6000   @rtype: boolean
6001   @return: the success of the removal
6002
6003   """
6004   logging.info("Removing block devices for instance %s", instance.name)
6005
6006   all_result = True
6007   for device in instance.disks:
6008     if target_node:
6009       edata = [(target_node, device)]
6010     else:
6011       edata = device.ComputeNodeTree(instance.primary_node)
6012     for node, disk in edata:
6013       lu.cfg.SetDiskID(disk, node)
6014       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6015       if msg:
6016         lu.LogWarning("Could not remove block device %s on node %s,"
6017                       " continuing anyway: %s", device.iv_name, node, msg)
6018         all_result = False
6019
6020   if instance.disk_template == constants.DT_FILE:
6021     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6022     if target_node:
6023       tgt = target_node
6024     else:
6025       tgt = instance.primary_node
6026     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6027     if result.fail_msg:
6028       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6029                     file_storage_dir, instance.primary_node, result.fail_msg)
6030       all_result = False
6031
6032   return all_result
6033
6034
6035 def _ComputeDiskSize(disk_template, disks):
6036   """Compute disk size requirements in the volume group
6037
6038   """
6039   # Required free disk space as a function of disk and swap space
6040   req_size_dict = {
6041     constants.DT_DISKLESS: None,
6042     constants.DT_PLAIN: sum(d["size"] for d in disks),
6043     # 128 MB are added for drbd metadata for each disk
6044     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6045     constants.DT_FILE: None,
6046   }
6047
6048   if disk_template not in req_size_dict:
6049     raise errors.ProgrammerError("Disk template '%s' size requirement"
6050                                  " is unknown" %  disk_template)
6051
6052   return req_size_dict[disk_template]
6053
6054
6055 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6056   """Hypervisor parameter validation.
6057
6058   This function abstract the hypervisor parameter validation to be
6059   used in both instance create and instance modify.
6060
6061   @type lu: L{LogicalUnit}
6062   @param lu: the logical unit for which we check
6063   @type nodenames: list
6064   @param nodenames: the list of nodes on which we should check
6065   @type hvname: string
6066   @param hvname: the name of the hypervisor we should use
6067   @type hvparams: dict
6068   @param hvparams: the parameters which we need to check
6069   @raise errors.OpPrereqError: if the parameters are not valid
6070
6071   """
6072   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6073                                                   hvname,
6074                                                   hvparams)
6075   for node in nodenames:
6076     info = hvinfo[node]
6077     if info.offline:
6078       continue
6079     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6080
6081
6082 class LUCreateInstance(LogicalUnit):
6083   """Create an instance.
6084
6085   """
6086   HPATH = "instance-add"
6087   HTYPE = constants.HTYPE_INSTANCE
6088   _OP_REQP = ["instance_name", "disks",
6089               "mode", "start",
6090               "wait_for_sync", "ip_check", "nics",
6091               "hvparams", "beparams"]
6092   REQ_BGL = False
6093
6094   def CheckArguments(self):
6095     """Check arguments.
6096
6097     """
6098     # set optional parameters to none if they don't exist
6099     for attr in ["pnode", "snode", "iallocator", "hypervisor",
6100                  "disk_template", "identify_defaults"]:
6101       if not hasattr(self.op, attr):
6102         setattr(self.op, attr, None)
6103
6104     # do not require name_check to ease forward/backward compatibility
6105     # for tools
6106     if not hasattr(self.op, "name_check"):
6107       self.op.name_check = True
6108     if not hasattr(self.op, "no_install"):
6109       self.op.no_install = False
6110     if self.op.no_install and self.op.start:
6111       self.LogInfo("No-installation mode selected, disabling startup")
6112       self.op.start = False
6113     # validate/normalize the instance name
6114     self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6115     if self.op.ip_check and not self.op.name_check:
6116       # TODO: make the ip check more flexible and not depend on the name check
6117       raise errors.OpPrereqError("Cannot do ip checks without a name check",
6118                                  errors.ECODE_INVAL)
6119
6120     # check nics' parameter names
6121     for nic in self.op.nics:
6122       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6123
6124     # check disks. parameter names and consistent adopt/no-adopt strategy
6125     has_adopt = has_no_adopt = False
6126     for disk in self.op.disks:
6127       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6128       if "adopt" in disk:
6129         has_adopt = True
6130       else:
6131         has_no_adopt = True
6132     if has_adopt and has_no_adopt:
6133       raise errors.OpPrereqError("Either all disks are adopted or none is",
6134                                  errors.ECODE_INVAL)
6135     if has_adopt:
6136       if self.op.disk_template != constants.DT_PLAIN:
6137         raise errors.OpPrereqError("Disk adoption is only supported for the"
6138                                    " 'plain' disk template",
6139                                    errors.ECODE_INVAL)
6140       if self.op.iallocator is not None:
6141         raise errors.OpPrereqError("Disk adoption not allowed with an"
6142                                    " iallocator script", errors.ECODE_INVAL)
6143       if self.op.mode == constants.INSTANCE_IMPORT:
6144         raise errors.OpPrereqError("Disk adoption not allowed for"
6145                                    " instance import", errors.ECODE_INVAL)
6146
6147     self.adopt_disks = has_adopt
6148
6149     # verify creation mode
6150     if self.op.mode not in (constants.INSTANCE_CREATE,
6151                             constants.INSTANCE_IMPORT):
6152       raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6153                                  self.op.mode, errors.ECODE_INVAL)
6154
6155     # instance name verification
6156     if self.op.name_check:
6157       self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6158       self.op.instance_name = self.hostname1.name
6159       # used in CheckPrereq for ip ping check
6160       self.check_ip = self.hostname1.ip
6161     else:
6162       self.check_ip = None
6163
6164     # file storage checks
6165     if (self.op.file_driver and
6166         not self.op.file_driver in constants.FILE_DRIVER):
6167       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6168                                  self.op.file_driver, errors.ECODE_INVAL)
6169
6170     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6171       raise errors.OpPrereqError("File storage directory path not absolute",
6172                                  errors.ECODE_INVAL)
6173
6174     ### Node/iallocator related checks
6175     if [self.op.iallocator, self.op.pnode].count(None) != 1:
6176       raise errors.OpPrereqError("One and only one of iallocator and primary"
6177                                  " node must be given",
6178                                  errors.ECODE_INVAL)
6179
6180     if self.op.mode == constants.INSTANCE_IMPORT:
6181       # On import force_variant must be True, because if we forced it at
6182       # initial install, our only chance when importing it back is that it
6183       # works again!
6184       self.op.force_variant = True
6185
6186       if self.op.no_install:
6187         self.LogInfo("No-installation mode has no effect during import")
6188
6189     else: # INSTANCE_CREATE
6190       if getattr(self.op, "os_type", None) is None:
6191         raise errors.OpPrereqError("No guest OS specified",
6192                                    errors.ECODE_INVAL)
6193       self.op.force_variant = getattr(self.op, "force_variant", False)
6194       if self.op.disk_template is None:
6195         raise errors.OpPrereqError("No disk template specified",
6196                                    errors.ECODE_INVAL)
6197
6198   def ExpandNames(self):
6199     """ExpandNames for CreateInstance.
6200
6201     Figure out the right locks for instance creation.
6202
6203     """
6204     self.needed_locks = {}
6205
6206     instance_name = self.op.instance_name
6207     # this is just a preventive check, but someone might still add this
6208     # instance in the meantime, and creation will fail at lock-add time
6209     if instance_name in self.cfg.GetInstanceList():
6210       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6211                                  instance_name, errors.ECODE_EXISTS)
6212
6213     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6214
6215     if self.op.iallocator:
6216       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6217     else:
6218       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6219       nodelist = [self.op.pnode]
6220       if self.op.snode is not None:
6221         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6222         nodelist.append(self.op.snode)
6223       self.needed_locks[locking.LEVEL_NODE] = nodelist
6224
6225     # in case of import lock the source node too
6226     if self.op.mode == constants.INSTANCE_IMPORT:
6227       src_node = getattr(self.op, "src_node", None)
6228       src_path = getattr(self.op, "src_path", None)
6229
6230       if src_path is None:
6231         self.op.src_path = src_path = self.op.instance_name
6232
6233       if src_node is None:
6234         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6235         self.op.src_node = None
6236         if os.path.isabs(src_path):
6237           raise errors.OpPrereqError("Importing an instance from an absolute"
6238                                      " path requires a source node option.",
6239                                      errors.ECODE_INVAL)
6240       else:
6241         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6242         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6243           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6244         if not os.path.isabs(src_path):
6245           self.op.src_path = src_path = \
6246             utils.PathJoin(constants.EXPORT_DIR, src_path)
6247
6248   def _RunAllocator(self):
6249     """Run the allocator based on input opcode.
6250
6251     """
6252     nics = [n.ToDict() for n in self.nics]
6253     ial = IAllocator(self.cfg, self.rpc,
6254                      mode=constants.IALLOCATOR_MODE_ALLOC,
6255                      name=self.op.instance_name,
6256                      disk_template=self.op.disk_template,
6257                      tags=[],
6258                      os=self.op.os_type,
6259                      vcpus=self.be_full[constants.BE_VCPUS],
6260                      mem_size=self.be_full[constants.BE_MEMORY],
6261                      disks=self.disks,
6262                      nics=nics,
6263                      hypervisor=self.op.hypervisor,
6264                      )
6265
6266     ial.Run(self.op.iallocator)
6267
6268     if not ial.success:
6269       raise errors.OpPrereqError("Can't compute nodes using"
6270                                  " iallocator '%s': %s" %
6271                                  (self.op.iallocator, ial.info),
6272                                  errors.ECODE_NORES)
6273     if len(ial.result) != ial.required_nodes:
6274       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6275                                  " of nodes (%s), required %s" %
6276                                  (self.op.iallocator, len(ial.result),
6277                                   ial.required_nodes), errors.ECODE_FAULT)
6278     self.op.pnode = ial.result[0]
6279     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6280                  self.op.instance_name, self.op.iallocator,
6281                  utils.CommaJoin(ial.result))
6282     if ial.required_nodes == 2:
6283       self.op.snode = ial.result[1]
6284
6285   def BuildHooksEnv(self):
6286     """Build hooks env.
6287
6288     This runs on master, primary and secondary nodes of the instance.
6289
6290     """
6291     env = {
6292       "ADD_MODE": self.op.mode,
6293       }
6294     if self.op.mode == constants.INSTANCE_IMPORT:
6295       env["SRC_NODE"] = self.op.src_node
6296       env["SRC_PATH"] = self.op.src_path
6297       env["SRC_IMAGES"] = self.src_images
6298
6299     env.update(_BuildInstanceHookEnv(
6300       name=self.op.instance_name,
6301       primary_node=self.op.pnode,
6302       secondary_nodes=self.secondaries,
6303       status=self.op.start,
6304       os_type=self.op.os_type,
6305       memory=self.be_full[constants.BE_MEMORY],
6306       vcpus=self.be_full[constants.BE_VCPUS],
6307       nics=_NICListToTuple(self, self.nics),
6308       disk_template=self.op.disk_template,
6309       disks=[(d["size"], d["mode"]) for d in self.disks],
6310       bep=self.be_full,
6311       hvp=self.hv_full,
6312       hypervisor_name=self.op.hypervisor,
6313     ))
6314
6315     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6316           self.secondaries)
6317     return env, nl, nl
6318
6319   def _ReadExportInfo(self):
6320     """Reads the export information from disk.
6321
6322     It will override the opcode source node and path with the actual
6323     information, if these two were not specified before.
6324
6325     @return: the export information
6326
6327     """
6328     assert self.op.mode == constants.INSTANCE_IMPORT
6329
6330     src_node = self.op.src_node
6331     src_path = self.op.src_path
6332
6333     if src_node is None:
6334       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6335       exp_list = self.rpc.call_export_list(locked_nodes)
6336       found = False
6337       for node in exp_list:
6338         if exp_list[node].fail_msg:
6339           continue
6340         if src_path in exp_list[node].payload:
6341           found = True
6342           self.op.src_node = src_node = node
6343           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6344                                                        src_path)
6345           break
6346       if not found:
6347         raise errors.OpPrereqError("No export found for relative path %s" %
6348                                     src_path, errors.ECODE_INVAL)
6349
6350     _CheckNodeOnline(self, src_node)
6351     result = self.rpc.call_export_info(src_node, src_path)
6352     result.Raise("No export or invalid export found in dir %s" % src_path)
6353
6354     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6355     if not export_info.has_section(constants.INISECT_EXP):
6356       raise errors.ProgrammerError("Corrupted export config",
6357                                    errors.ECODE_ENVIRON)
6358
6359     ei_version = export_info.get(constants.INISECT_EXP, "version")
6360     if (int(ei_version) != constants.EXPORT_VERSION):
6361       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6362                                  (ei_version, constants.EXPORT_VERSION),
6363                                  errors.ECODE_ENVIRON)
6364     return export_info
6365
6366   def _ReadExportParams(self, einfo):
6367     """Use export parameters as defaults.
6368
6369     In case the opcode doesn't specify (as in override) some instance
6370     parameters, then try to use them from the export information, if
6371     that declares them.
6372
6373     """
6374     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6375
6376     if self.op.disk_template is None:
6377       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6378         self.op.disk_template = einfo.get(constants.INISECT_INS,
6379                                           "disk_template")
6380       else:
6381         raise errors.OpPrereqError("No disk template specified and the export"
6382                                    " is missing the disk_template information",
6383                                    errors.ECODE_INVAL)
6384
6385     if not self.op.disks:
6386       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6387         disks = []
6388         # TODO: import the disk iv_name too
6389         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6390           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6391           disks.append({"size": disk_sz})
6392         self.op.disks = disks
6393       else:
6394         raise errors.OpPrereqError("No disk info specified and the export"
6395                                    " is missing the disk information",
6396                                    errors.ECODE_INVAL)
6397
6398     if (not self.op.nics and
6399         einfo.has_option(constants.INISECT_INS, "nic_count")):
6400       nics = []
6401       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6402         ndict = {}
6403         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6404           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6405           ndict[name] = v
6406         nics.append(ndict)
6407       self.op.nics = nics
6408
6409     if (self.op.hypervisor is None and
6410         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6411       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6412     if einfo.has_section(constants.INISECT_HYP):
6413       # use the export parameters but do not override the ones
6414       # specified by the user
6415       for name, value in einfo.items(constants.INISECT_HYP):
6416         if name not in self.op.hvparams:
6417           self.op.hvparams[name] = value
6418
6419     if einfo.has_section(constants.INISECT_BEP):
6420       # use the parameters, without overriding
6421       for name, value in einfo.items(constants.INISECT_BEP):
6422         if name not in self.op.beparams:
6423           self.op.beparams[name] = value
6424     else:
6425       # try to read the parameters old style, from the main section
6426       for name in constants.BES_PARAMETERS:
6427         if (name not in self.op.beparams and
6428             einfo.has_option(constants.INISECT_INS, name)):
6429           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6430
6431   def _RevertToDefaults(self, cluster):
6432     """Revert the instance parameters to the default values.
6433
6434     """
6435     # hvparams
6436     hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6437     for name in self.op.hvparams.keys():
6438       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6439         del self.op.hvparams[name]
6440     # beparams
6441     be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6442     for name in self.op.beparams.keys():
6443       if name in be_defs and be_defs[name] == self.op.beparams[name]:
6444         del self.op.beparams[name]
6445     # nic params
6446     nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6447     for nic in self.op.nics:
6448       for name in constants.NICS_PARAMETERS:
6449         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6450           del nic[name]
6451
6452   def CheckPrereq(self):
6453     """Check prerequisites.
6454
6455     """
6456     if self.op.mode == constants.INSTANCE_IMPORT:
6457       export_info = self._ReadExportInfo()
6458       self._ReadExportParams(export_info)
6459
6460     _CheckDiskTemplate(self.op.disk_template)
6461
6462     if (not self.cfg.GetVGName() and
6463         self.op.disk_template not in constants.DTS_NOT_LVM):
6464       raise errors.OpPrereqError("Cluster does not support lvm-based"
6465                                  " instances", errors.ECODE_STATE)
6466
6467     if self.op.hypervisor is None:
6468       self.op.hypervisor = self.cfg.GetHypervisorType()
6469
6470     cluster = self.cfg.GetClusterInfo()
6471     enabled_hvs = cluster.enabled_hypervisors
6472     if self.op.hypervisor not in enabled_hvs:
6473       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6474                                  " cluster (%s)" % (self.op.hypervisor,
6475                                   ",".join(enabled_hvs)),
6476                                  errors.ECODE_STATE)
6477
6478     # check hypervisor parameter syntax (locally)
6479     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6480     filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6481                                                         self.op.os_type),
6482                                   self.op.hvparams)
6483     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6484     hv_type.CheckParameterSyntax(filled_hvp)
6485     self.hv_full = filled_hvp
6486     # check that we don't specify global parameters on an instance
6487     _CheckGlobalHvParams(self.op.hvparams)
6488
6489     # fill and remember the beparams dict
6490     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6491     self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6492                                     self.op.beparams)
6493
6494     # now that hvp/bep are in final format, let's reset to defaults,
6495     # if told to do so
6496     if self.op.identify_defaults:
6497       self._RevertToDefaults(cluster)
6498
6499     # NIC buildup
6500     self.nics = []
6501     for idx, nic in enumerate(self.op.nics):
6502       nic_mode_req = nic.get("mode", None)
6503       nic_mode = nic_mode_req
6504       if nic_mode is None:
6505         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6506
6507       # in routed mode, for the first nic, the default ip is 'auto'
6508       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6509         default_ip_mode = constants.VALUE_AUTO
6510       else:
6511         default_ip_mode = constants.VALUE_NONE
6512
6513       # ip validity checks
6514       ip = nic.get("ip", default_ip_mode)
6515       if ip is None or ip.lower() == constants.VALUE_NONE:
6516         nic_ip = None
6517       elif ip.lower() == constants.VALUE_AUTO:
6518         if not self.op.name_check:
6519           raise errors.OpPrereqError("IP address set to auto but name checks"
6520                                      " have been skipped. Aborting.",
6521                                      errors.ECODE_INVAL)
6522         nic_ip = self.hostname1.ip
6523       else:
6524         if not utils.IsValidIP(ip):
6525           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6526                                      " like a valid IP" % ip,
6527                                      errors.ECODE_INVAL)
6528         nic_ip = ip
6529
6530       # TODO: check the ip address for uniqueness
6531       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6532         raise errors.OpPrereqError("Routed nic mode requires an ip address",
6533                                    errors.ECODE_INVAL)
6534
6535       # MAC address verification
6536       mac = nic.get("mac", constants.VALUE_AUTO)
6537       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6538         mac = utils.NormalizeAndValidateMac(mac)
6539
6540         try:
6541           self.cfg.ReserveMAC(mac, self.proc.GetECId())
6542         except errors.ReservationError:
6543           raise errors.OpPrereqError("MAC address %s already in use"
6544                                      " in cluster" % mac,
6545                                      errors.ECODE_NOTUNIQUE)
6546
6547       # bridge verification
6548       bridge = nic.get("bridge", None)
6549       link = nic.get("link", None)
6550       if bridge and link:
6551         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6552                                    " at the same time", errors.ECODE_INVAL)
6553       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6554         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6555                                    errors.ECODE_INVAL)
6556       elif bridge:
6557         link = bridge
6558
6559       nicparams = {}
6560       if nic_mode_req:
6561         nicparams[constants.NIC_MODE] = nic_mode_req
6562       if link:
6563         nicparams[constants.NIC_LINK] = link
6564
6565       check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6566                                       nicparams)
6567       objects.NIC.CheckParameterSyntax(check_params)
6568       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6569
6570     # disk checks/pre-build
6571     self.disks = []
6572     for disk in self.op.disks:
6573       mode = disk.get("mode", constants.DISK_RDWR)
6574       if mode not in constants.DISK_ACCESS_SET:
6575         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6576                                    mode, errors.ECODE_INVAL)
6577       size = disk.get("size", None)
6578       if size is None:
6579         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6580       try:
6581         size = int(size)
6582       except (TypeError, ValueError):
6583         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6584                                    errors.ECODE_INVAL)
6585       new_disk = {"size": size, "mode": mode}
6586       if "adopt" in disk:
6587         new_disk["adopt"] = disk["adopt"]
6588       self.disks.append(new_disk)
6589
6590     if self.op.mode == constants.INSTANCE_IMPORT:
6591
6592       # Check that the new instance doesn't have less disks than the export
6593       instance_disks = len(self.disks)
6594       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6595       if instance_disks < export_disks:
6596         raise errors.OpPrereqError("Not enough disks to import."
6597                                    " (instance: %d, export: %d)" %
6598                                    (instance_disks, export_disks),
6599                                    errors.ECODE_INVAL)
6600
6601       disk_images = []
6602       for idx in range(export_disks):
6603         option = 'disk%d_dump' % idx
6604         if export_info.has_option(constants.INISECT_INS, option):
6605           # FIXME: are the old os-es, disk sizes, etc. useful?
6606           export_name = export_info.get(constants.INISECT_INS, option)
6607           image = utils.PathJoin(self.op.src_path, export_name)
6608           disk_images.append(image)
6609         else:
6610           disk_images.append(False)
6611
6612       self.src_images = disk_images
6613
6614       old_name = export_info.get(constants.INISECT_INS, 'name')
6615       try:
6616         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6617       except (TypeError, ValueError), err:
6618         raise errors.OpPrereqError("Invalid export file, nic_count is not"
6619                                    " an integer: %s" % str(err),
6620                                    errors.ECODE_STATE)
6621       if self.op.instance_name == old_name:
6622         for idx, nic in enumerate(self.nics):
6623           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6624             nic_mac_ini = 'nic%d_mac' % idx
6625             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6626
6627     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6628
6629     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6630     if self.op.ip_check:
6631       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6632         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6633                                    (self.check_ip, self.op.instance_name),
6634                                    errors.ECODE_NOTUNIQUE)
6635
6636     #### mac address generation
6637     # By generating here the mac address both the allocator and the hooks get
6638     # the real final mac address rather than the 'auto' or 'generate' value.
6639     # There is a race condition between the generation and the instance object
6640     # creation, which means that we know the mac is valid now, but we're not
6641     # sure it will be when we actually add the instance. If things go bad
6642     # adding the instance will abort because of a duplicate mac, and the
6643     # creation job will fail.
6644     for nic in self.nics:
6645       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6646         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6647
6648     #### allocator run
6649
6650     if self.op.iallocator is not None:
6651       self._RunAllocator()
6652
6653     #### node related checks
6654
6655     # check primary node
6656     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6657     assert self.pnode is not None, \
6658       "Cannot retrieve locked node %s" % self.op.pnode
6659     if pnode.offline:
6660       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6661                                  pnode.name, errors.ECODE_STATE)
6662     if pnode.drained:
6663       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6664                                  pnode.name, errors.ECODE_STATE)
6665
6666     self.secondaries = []
6667
6668     # mirror node verification
6669     if self.op.disk_template in constants.DTS_NET_MIRROR:
6670       if self.op.snode is None:
6671         raise errors.OpPrereqError("The networked disk templates need"
6672                                    " a mirror node", errors.ECODE_INVAL)
6673       if self.op.snode == pnode.name:
6674         raise errors.OpPrereqError("The secondary node cannot be the"
6675                                    " primary node.", errors.ECODE_INVAL)
6676       _CheckNodeOnline(self, self.op.snode)
6677       _CheckNodeNotDrained(self, self.op.snode)
6678       self.secondaries.append(self.op.snode)
6679
6680     nodenames = [pnode.name] + self.secondaries
6681
6682     req_size = _ComputeDiskSize(self.op.disk_template,
6683                                 self.disks)
6684
6685     # Check lv size requirements, if not adopting
6686     if req_size is not None and not self.adopt_disks:
6687       _CheckNodesFreeDisk(self, nodenames, req_size)
6688
6689     if self.adopt_disks: # instead, we must check the adoption data
6690       all_lvs = set([i["adopt"] for i in self.disks])
6691       if len(all_lvs) != len(self.disks):
6692         raise errors.OpPrereqError("Duplicate volume names given for adoption",
6693                                    errors.ECODE_INVAL)
6694       for lv_name in all_lvs:
6695         try:
6696           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6697         except errors.ReservationError:
6698           raise errors.OpPrereqError("LV named %s used by another instance" %
6699                                      lv_name, errors.ECODE_NOTUNIQUE)
6700
6701       node_lvs = self.rpc.call_lv_list([pnode.name],
6702                                        self.cfg.GetVGName())[pnode.name]
6703       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6704       node_lvs = node_lvs.payload
6705       delta = all_lvs.difference(node_lvs.keys())
6706       if delta:
6707         raise errors.OpPrereqError("Missing logical volume(s): %s" %
6708                                    utils.CommaJoin(delta),
6709                                    errors.ECODE_INVAL)
6710       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6711       if online_lvs:
6712         raise errors.OpPrereqError("Online logical volumes found, cannot"
6713                                    " adopt: %s" % utils.CommaJoin(online_lvs),
6714                                    errors.ECODE_STATE)
6715       # update the size of disk based on what is found
6716       for dsk in self.disks:
6717         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6718
6719     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6720
6721     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6722
6723     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6724
6725     # memory check on primary node
6726     if self.op.start:
6727       _CheckNodeFreeMemory(self, self.pnode.name,
6728                            "creating instance %s" % self.op.instance_name,
6729                            self.be_full[constants.BE_MEMORY],
6730                            self.op.hypervisor)
6731
6732     self.dry_run_result = list(nodenames)
6733
6734   def Exec(self, feedback_fn):
6735     """Create and add the instance to the cluster.
6736
6737     """
6738     instance = self.op.instance_name
6739     pnode_name = self.pnode.name
6740
6741     ht_kind = self.op.hypervisor
6742     if ht_kind in constants.HTS_REQ_PORT:
6743       network_port = self.cfg.AllocatePort()
6744     else:
6745       network_port = None
6746
6747     if constants.ENABLE_FILE_STORAGE:
6748       # this is needed because os.path.join does not accept None arguments
6749       if self.op.file_storage_dir is None:
6750         string_file_storage_dir = ""
6751       else:
6752         string_file_storage_dir = self.op.file_storage_dir
6753
6754       # build the full file storage dir path
6755       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6756                                         string_file_storage_dir, instance)
6757     else:
6758       file_storage_dir = ""
6759
6760
6761     disks = _GenerateDiskTemplate(self,
6762                                   self.op.disk_template,
6763                                   instance, pnode_name,
6764                                   self.secondaries,
6765                                   self.disks,
6766                                   file_storage_dir,
6767                                   self.op.file_driver,
6768                                   0)
6769
6770     iobj = objects.Instance(name=instance, os=self.op.os_type,
6771                             primary_node=pnode_name,
6772                             nics=self.nics, disks=disks,
6773                             disk_template=self.op.disk_template,
6774                             admin_up=False,
6775                             network_port=network_port,
6776                             beparams=self.op.beparams,
6777                             hvparams=self.op.hvparams,
6778                             hypervisor=self.op.hypervisor,
6779                             )
6780
6781     if self.adopt_disks:
6782       # rename LVs to the newly-generated names; we need to construct
6783       # 'fake' LV disks with the old data, plus the new unique_id
6784       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6785       rename_to = []
6786       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6787         rename_to.append(t_dsk.logical_id)
6788         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6789         self.cfg.SetDiskID(t_dsk, pnode_name)
6790       result = self.rpc.call_blockdev_rename(pnode_name,
6791                                              zip(tmp_disks, rename_to))
6792       result.Raise("Failed to rename adoped LVs")
6793     else:
6794       feedback_fn("* creating instance disks...")
6795       try:
6796         _CreateDisks(self, iobj)
6797       except errors.OpExecError:
6798         self.LogWarning("Device creation failed, reverting...")
6799         try:
6800           _RemoveDisks(self, iobj)
6801         finally:
6802           self.cfg.ReleaseDRBDMinors(instance)
6803           raise
6804
6805     feedback_fn("adding instance %s to cluster config" % instance)
6806
6807     self.cfg.AddInstance(iobj, self.proc.GetECId())
6808
6809     # Declare that we don't want to remove the instance lock anymore, as we've
6810     # added the instance to the config
6811     del self.remove_locks[locking.LEVEL_INSTANCE]
6812     # Unlock all the nodes
6813     if self.op.mode == constants.INSTANCE_IMPORT:
6814       nodes_keep = [self.op.src_node]
6815       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6816                        if node != self.op.src_node]
6817       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6818       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6819     else:
6820       self.context.glm.release(locking.LEVEL_NODE)
6821       del self.acquired_locks[locking.LEVEL_NODE]
6822
6823     if self.op.wait_for_sync:
6824       disk_abort = not _WaitForSync(self, iobj)
6825     elif iobj.disk_template in constants.DTS_NET_MIRROR:
6826       # make sure the disks are not degraded (still sync-ing is ok)
6827       time.sleep(15)
6828       feedback_fn("* checking mirrors status")
6829       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6830     else:
6831       disk_abort = False
6832
6833     if disk_abort:
6834       _RemoveDisks(self, iobj)
6835       self.cfg.RemoveInstance(iobj.name)
6836       # Make sure the instance lock gets removed
6837       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6838       raise errors.OpExecError("There are some degraded disks for"
6839                                " this instance")
6840
6841     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6842       if self.op.mode == constants.INSTANCE_CREATE:
6843         if not self.op.no_install:
6844           feedback_fn("* running the instance OS create scripts...")
6845           # FIXME: pass debug option from opcode to backend
6846           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6847                                                  self.op.debug_level)
6848           result.Raise("Could not add os for instance %s"
6849                        " on node %s" % (instance, pnode_name))
6850
6851       elif self.op.mode == constants.INSTANCE_IMPORT:
6852         feedback_fn("* running the instance OS import scripts...")
6853         src_node = self.op.src_node
6854         src_images = self.src_images
6855         cluster_name = self.cfg.GetClusterName()
6856         # FIXME: pass debug option from opcode to backend
6857         import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6858                                                          src_node, src_images,
6859                                                          cluster_name,
6860                                                          self.op.debug_level)
6861         msg = import_result.fail_msg
6862         if msg:
6863           self.LogWarning("Error while importing the disk images for instance"
6864                           " %s on node %s: %s" % (instance, pnode_name, msg))
6865       else:
6866         # also checked in the prereq part
6867         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6868                                      % self.op.mode)
6869
6870     if self.op.start:
6871       iobj.admin_up = True
6872       self.cfg.Update(iobj, feedback_fn)
6873       logging.info("Starting instance %s on node %s", instance, pnode_name)
6874       feedback_fn("* starting instance...")
6875       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6876       result.Raise("Could not start instance")
6877
6878     return list(iobj.all_nodes)
6879
6880
6881 class LUConnectConsole(NoHooksLU):
6882   """Connect to an instance's console.
6883
6884   This is somewhat special in that it returns the command line that
6885   you need to run on the master node in order to connect to the
6886   console.
6887
6888   """
6889   _OP_REQP = ["instance_name"]
6890   REQ_BGL = False
6891
6892   def ExpandNames(self):
6893     self._ExpandAndLockInstance()
6894
6895   def CheckPrereq(self):
6896     """Check prerequisites.
6897
6898     This checks that the instance is in the cluster.
6899
6900     """
6901     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6902     assert self.instance is not None, \
6903       "Cannot retrieve locked instance %s" % self.op.instance_name
6904     _CheckNodeOnline(self, self.instance.primary_node)
6905
6906   def Exec(self, feedback_fn):
6907     """Connect to the console of an instance
6908
6909     """
6910     instance = self.instance
6911     node = instance.primary_node
6912
6913     node_insts = self.rpc.call_instance_list([node],
6914                                              [instance.hypervisor])[node]
6915     node_insts.Raise("Can't get node information from %s" % node)
6916
6917     if instance.name not in node_insts.payload:
6918       raise errors.OpExecError("Instance %s is not running." % instance.name)
6919
6920     logging.debug("Connecting to console of %s on %s", instance.name, node)
6921
6922     hyper = hypervisor.GetHypervisor(instance.hypervisor)
6923     cluster = self.cfg.GetClusterInfo()
6924     # beparams and hvparams are passed separately, to avoid editing the
6925     # instance and then saving the defaults in the instance itself.
6926     hvparams = cluster.FillHV(instance)
6927     beparams = cluster.FillBE(instance)
6928     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6929
6930     # build ssh cmdline
6931     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6932
6933
6934 class LUReplaceDisks(LogicalUnit):
6935   """Replace the disks of an instance.
6936
6937   """
6938   HPATH = "mirrors-replace"
6939   HTYPE = constants.HTYPE_INSTANCE
6940   _OP_REQP = ["instance_name", "mode", "disks"]
6941   REQ_BGL = False
6942
6943   def CheckArguments(self):
6944     if not hasattr(self.op, "remote_node"):
6945       self.op.remote_node = None
6946     if not hasattr(self.op, "iallocator"):
6947       self.op.iallocator = None
6948     if not hasattr(self.op, "early_release"):
6949       self.op.early_release = False
6950
6951     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6952                                   self.op.iallocator)
6953
6954   def ExpandNames(self):
6955     self._ExpandAndLockInstance()
6956
6957     if self.op.iallocator is not None:
6958       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6959
6960     elif self.op.remote_node is not None:
6961       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6962       self.op.remote_node = remote_node
6963
6964       # Warning: do not remove the locking of the new secondary here
6965       # unless DRBD8.AddChildren is changed to work in parallel;
6966       # currently it doesn't since parallel invocations of
6967       # FindUnusedMinor will conflict
6968       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6969       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6970
6971     else:
6972       self.needed_locks[locking.LEVEL_NODE] = []
6973       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6974
6975     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6976                                    self.op.iallocator, self.op.remote_node,
6977                                    self.op.disks, False, self.op.early_release)
6978
6979     self.tasklets = [self.replacer]
6980
6981   def DeclareLocks(self, level):
6982     # If we're not already locking all nodes in the set we have to declare the
6983     # instance's primary/secondary nodes.
6984     if (level == locking.LEVEL_NODE and
6985         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6986       self._LockInstancesNodes()
6987
6988   def BuildHooksEnv(self):
6989     """Build hooks env.
6990
6991     This runs on the master, the primary and all the secondaries.
6992
6993     """
6994     instance = self.replacer.instance
6995     env = {
6996       "MODE": self.op.mode,
6997       "NEW_SECONDARY": self.op.remote_node,
6998       "OLD_SECONDARY": instance.secondary_nodes[0],
6999       }
7000     env.update(_BuildInstanceHookEnvByObject(self, instance))
7001     nl = [
7002       self.cfg.GetMasterNode(),
7003       instance.primary_node,
7004       ]
7005     if self.op.remote_node is not None:
7006       nl.append(self.op.remote_node)
7007     return env, nl, nl
7008
7009
7010 class LUEvacuateNode(LogicalUnit):
7011   """Relocate the secondary instances from a node.
7012
7013   """
7014   HPATH = "node-evacuate"
7015   HTYPE = constants.HTYPE_NODE
7016   _OP_REQP = ["node_name"]
7017   REQ_BGL = False
7018
7019   def CheckArguments(self):
7020     if not hasattr(self.op, "remote_node"):
7021       self.op.remote_node = None
7022     if not hasattr(self.op, "iallocator"):
7023       self.op.iallocator = None
7024     if not hasattr(self.op, "early_release"):
7025       self.op.early_release = False
7026
7027     TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
7028                                   self.op.remote_node,
7029                                   self.op.iallocator)
7030
7031   def ExpandNames(self):
7032     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7033
7034     self.needed_locks = {}
7035
7036     # Declare node locks
7037     if self.op.iallocator is not None:
7038       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7039
7040     elif self.op.remote_node is not None:
7041       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7042
7043       # Warning: do not remove the locking of the new secondary here
7044       # unless DRBD8.AddChildren is changed to work in parallel;
7045       # currently it doesn't since parallel invocations of
7046       # FindUnusedMinor will conflict
7047       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7048       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7049
7050     else:
7051       raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7052
7053     # Create tasklets for replacing disks for all secondary instances on this
7054     # node
7055     names = []
7056     tasklets = []
7057
7058     for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7059       logging.debug("Replacing disks for instance %s", inst.name)
7060       names.append(inst.name)
7061
7062       replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7063                                 self.op.iallocator, self.op.remote_node, [],
7064                                 True, self.op.early_release)
7065       tasklets.append(replacer)
7066
7067     self.tasklets = tasklets
7068     self.instance_names = names
7069
7070     # Declare instance locks
7071     self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7072
7073   def DeclareLocks(self, level):
7074     # If we're not already locking all nodes in the set we have to declare the
7075     # instance's primary/secondary nodes.
7076     if (level == locking.LEVEL_NODE and
7077         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7078       self._LockInstancesNodes()
7079
7080   def BuildHooksEnv(self):
7081     """Build hooks env.
7082
7083     This runs on the master, the primary and all the secondaries.
7084
7085     """
7086     env = {
7087       "NODE_NAME": self.op.node_name,
7088       }
7089
7090     nl = [self.cfg.GetMasterNode()]
7091
7092     if self.op.remote_node is not None:
7093       env["NEW_SECONDARY"] = self.op.remote_node
7094       nl.append(self.op.remote_node)
7095
7096     return (env, nl, nl)
7097
7098
7099 class TLReplaceDisks(Tasklet):
7100   """Replaces disks for an instance.
7101
7102   Note: Locking is not within the scope of this class.
7103
7104   """
7105   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7106                disks, delay_iallocator, early_release):
7107     """Initializes this class.
7108
7109     """
7110     Tasklet.__init__(self, lu)
7111
7112     # Parameters
7113     self.instance_name = instance_name
7114     self.mode = mode
7115     self.iallocator_name = iallocator_name
7116     self.remote_node = remote_node
7117     self.disks = disks
7118     self.delay_iallocator = delay_iallocator
7119     self.early_release = early_release
7120
7121     # Runtime data
7122     self.instance = None
7123     self.new_node = None
7124     self.target_node = None
7125     self.other_node = None
7126     self.remote_node_info = None
7127     self.node_secondary_ip = None
7128
7129   @staticmethod
7130   def CheckArguments(mode, remote_node, iallocator):
7131     """Helper function for users of this class.
7132
7133     """
7134     # check for valid parameter combination
7135     if mode == constants.REPLACE_DISK_CHG:
7136       if remote_node is None and iallocator is None:
7137         raise errors.OpPrereqError("When changing the secondary either an"
7138                                    " iallocator script must be used or the"
7139                                    " new node given", errors.ECODE_INVAL)
7140
7141       if remote_node is not None and iallocator is not None:
7142         raise errors.OpPrereqError("Give either the iallocator or the new"
7143                                    " secondary, not both", errors.ECODE_INVAL)
7144
7145     elif remote_node is not None or iallocator is not None:
7146       # Not replacing the secondary
7147       raise errors.OpPrereqError("The iallocator and new node options can"
7148                                  " only be used when changing the"
7149                                  " secondary node", errors.ECODE_INVAL)
7150
7151   @staticmethod
7152   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7153     """Compute a new secondary node using an IAllocator.
7154
7155     """
7156     ial = IAllocator(lu.cfg, lu.rpc,
7157                      mode=constants.IALLOCATOR_MODE_RELOC,
7158                      name=instance_name,
7159                      relocate_from=relocate_from)
7160
7161     ial.Run(iallocator_name)
7162
7163     if not ial.success:
7164       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7165                                  " %s" % (iallocator_name, ial.info),
7166                                  errors.ECODE_NORES)
7167
7168     if len(ial.result) != ial.required_nodes:
7169       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7170                                  " of nodes (%s), required %s" %
7171                                  (iallocator_name,
7172                                   len(ial.result), ial.required_nodes),
7173                                  errors.ECODE_FAULT)
7174
7175     remote_node_name = ial.result[0]
7176
7177     lu.LogInfo("Selected new secondary for instance '%s': %s",
7178                instance_name, remote_node_name)
7179
7180     return remote_node_name
7181
7182   def _FindFaultyDisks(self, node_name):
7183     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7184                                     node_name, True)
7185
7186   def CheckPrereq(self):
7187     """Check prerequisites.
7188
7189     This checks that the instance is in the cluster.
7190
7191     """
7192     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7193     assert instance is not None, \
7194       "Cannot retrieve locked instance %s" % self.instance_name
7195
7196     if instance.disk_template != constants.DT_DRBD8:
7197       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7198                                  " instances", errors.ECODE_INVAL)
7199
7200     if len(instance.secondary_nodes) != 1:
7201       raise errors.OpPrereqError("The instance has a strange layout,"
7202                                  " expected one secondary but found %d" %
7203                                  len(instance.secondary_nodes),
7204                                  errors.ECODE_FAULT)
7205
7206     if not self.delay_iallocator:
7207       self._CheckPrereq2()
7208
7209   def _CheckPrereq2(self):
7210     """Check prerequisites, second part.
7211
7212     This function should always be part of CheckPrereq. It was separated and is
7213     now called from Exec because during node evacuation iallocator was only
7214     called with an unmodified cluster model, not taking planned changes into
7215     account.
7216
7217     """
7218     instance = self.instance
7219     secondary_node = instance.secondary_nodes[0]
7220
7221     if self.iallocator_name is None:
7222       remote_node = self.remote_node
7223     else:
7224       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7225                                        instance.name, instance.secondary_nodes)
7226
7227     if remote_node is not None:
7228       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7229       assert self.remote_node_info is not None, \
7230         "Cannot retrieve locked node %s" % remote_node
7231     else:
7232       self.remote_node_info = None
7233
7234     if remote_node == self.instance.primary_node:
7235       raise errors.OpPrereqError("The specified node is the primary node of"
7236                                  " the instance.", errors.ECODE_INVAL)
7237
7238     if remote_node == secondary_node:
7239       raise errors.OpPrereqError("The specified node is already the"
7240                                  " secondary node of the instance.",
7241                                  errors.ECODE_INVAL)
7242
7243     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7244                                     constants.REPLACE_DISK_CHG):
7245       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7246                                  errors.ECODE_INVAL)
7247
7248     if self.mode == constants.REPLACE_DISK_AUTO:
7249       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7250       faulty_secondary = self._FindFaultyDisks(secondary_node)
7251
7252       if faulty_primary and faulty_secondary:
7253         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7254                                    " one node and can not be repaired"
7255                                    " automatically" % self.instance_name,
7256                                    errors.ECODE_STATE)
7257
7258       if faulty_primary:
7259         self.disks = faulty_primary
7260         self.target_node = instance.primary_node
7261         self.other_node = secondary_node
7262         check_nodes = [self.target_node, self.other_node]
7263       elif faulty_secondary:
7264         self.disks = faulty_secondary
7265         self.target_node = secondary_node
7266         self.other_node = instance.primary_node
7267         check_nodes = [self.target_node, self.other_node]
7268       else:
7269         self.disks = []
7270         check_nodes = []
7271
7272     else:
7273       # Non-automatic modes
7274       if self.mode == constants.REPLACE_DISK_PRI:
7275         self.target_node = instance.primary_node
7276         self.other_node = secondary_node
7277         check_nodes = [self.target_node, self.other_node]
7278
7279       elif self.mode == constants.REPLACE_DISK_SEC:
7280         self.target_node = secondary_node
7281         self.other_node = instance.primary_node
7282         check_nodes = [self.target_node, self.other_node]
7283
7284       elif self.mode == constants.REPLACE_DISK_CHG:
7285         self.new_node = remote_node
7286         self.other_node = instance.primary_node
7287         self.target_node = secondary_node
7288         check_nodes = [self.new_node, self.other_node]
7289
7290         _CheckNodeNotDrained(self.lu, remote_node)
7291
7292         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7293         assert old_node_info is not None
7294         if old_node_info.offline and not self.early_release:
7295           # doesn't make sense to delay the release
7296           self.early_release = True
7297           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7298                           " early-release mode", secondary_node)
7299
7300       else:
7301         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7302                                      self.mode)
7303
7304       # If not specified all disks should be replaced
7305       if not self.disks:
7306         self.disks = range(len(self.instance.disks))
7307
7308     for node in check_nodes:
7309       _CheckNodeOnline(self.lu, node)
7310
7311     # Check whether disks are valid
7312     for disk_idx in self.disks:
7313       instance.FindDisk(disk_idx)
7314
7315     # Get secondary node IP addresses
7316     node_2nd_ip = {}
7317
7318     for node_name in [self.target_node, self.other_node, self.new_node]:
7319       if node_name is not None:
7320         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7321
7322     self.node_secondary_ip = node_2nd_ip
7323
7324   def Exec(self, feedback_fn):
7325     """Execute disk replacement.
7326
7327     This dispatches the disk replacement to the appropriate handler.
7328
7329     """
7330     if self.delay_iallocator:
7331       self._CheckPrereq2()
7332
7333     if not self.disks:
7334       feedback_fn("No disks need replacement")
7335       return
7336
7337     feedback_fn("Replacing disk(s) %s for %s" %
7338                 (utils.CommaJoin(self.disks), self.instance.name))
7339
7340     activate_disks = (not self.instance.admin_up)
7341
7342     # Activate the instance disks if we're replacing them on a down instance
7343     if activate_disks:
7344       _StartInstanceDisks(self.lu, self.instance, True)
7345
7346     try:
7347       # Should we replace the secondary node?
7348       if self.new_node is not None:
7349         fn = self._ExecDrbd8Secondary
7350       else:
7351         fn = self._ExecDrbd8DiskOnly
7352
7353       return fn(feedback_fn)
7354
7355     finally:
7356       # Deactivate the instance disks if we're replacing them on a
7357       # down instance
7358       if activate_disks:
7359         _SafeShutdownInstanceDisks(self.lu, self.instance)
7360
7361   def _CheckVolumeGroup(self, nodes):
7362     self.lu.LogInfo("Checking volume groups")
7363
7364     vgname = self.cfg.GetVGName()
7365
7366     # Make sure volume group exists on all involved nodes
7367     results = self.rpc.call_vg_list(nodes)
7368     if not results:
7369       raise errors.OpExecError("Can't list volume groups on the nodes")
7370
7371     for node in nodes:
7372       res = results[node]
7373       res.Raise("Error checking node %s" % node)
7374       if vgname not in res.payload:
7375         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7376                                  (vgname, node))
7377
7378   def _CheckDisksExistence(self, nodes):
7379     # Check disk existence
7380     for idx, dev in enumerate(self.instance.disks):
7381       if idx not in self.disks:
7382         continue
7383
7384       for node in nodes:
7385         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7386         self.cfg.SetDiskID(dev, node)
7387
7388         result = self.rpc.call_blockdev_find(node, dev)
7389
7390         msg = result.fail_msg
7391         if msg or not result.payload:
7392           if not msg:
7393             msg = "disk not found"
7394           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7395                                    (idx, node, msg))
7396
7397   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7398     for idx, dev in enumerate(self.instance.disks):
7399       if idx not in self.disks:
7400         continue
7401
7402       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7403                       (idx, node_name))
7404
7405       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7406                                    ldisk=ldisk):
7407         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7408                                  " replace disks for instance %s" %
7409                                  (node_name, self.instance.name))
7410
7411   def _CreateNewStorage(self, node_name):
7412     vgname = self.cfg.GetVGName()
7413     iv_names = {}
7414
7415     for idx, dev in enumerate(self.instance.disks):
7416       if idx not in self.disks:
7417         continue
7418
7419       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7420
7421       self.cfg.SetDiskID(dev, node_name)
7422
7423       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7424       names = _GenerateUniqueNames(self.lu, lv_names)
7425
7426       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7427                              logical_id=(vgname, names[0]))
7428       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7429                              logical_id=(vgname, names[1]))
7430
7431       new_lvs = [lv_data, lv_meta]
7432       old_lvs = dev.children
7433       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7434
7435       # we pass force_create=True to force the LVM creation
7436       for new_lv in new_lvs:
7437         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7438                         _GetInstanceInfoText(self.instance), False)
7439
7440     return iv_names
7441
7442   def _CheckDevices(self, node_name, iv_names):
7443     for name, (dev, _, _) in iv_names.iteritems():
7444       self.cfg.SetDiskID(dev, node_name)
7445
7446       result = self.rpc.call_blockdev_find(node_name, dev)
7447
7448       msg = result.fail_msg
7449       if msg or not result.payload:
7450         if not msg:
7451           msg = "disk not found"
7452         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7453                                  (name, msg))
7454
7455       if result.payload.is_degraded:
7456         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7457
7458   def _RemoveOldStorage(self, node_name, iv_names):
7459     for name, (_, old_lvs, _) in iv_names.iteritems():
7460       self.lu.LogInfo("Remove logical volumes for %s" % name)
7461
7462       for lv in old_lvs:
7463         self.cfg.SetDiskID(lv, node_name)
7464
7465         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7466         if msg:
7467           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7468                              hint="remove unused LVs manually")
7469
7470   def _ReleaseNodeLock(self, node_name):
7471     """Releases the lock for a given node."""
7472     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7473
7474   def _ExecDrbd8DiskOnly(self, feedback_fn):
7475     """Replace a disk on the primary or secondary for DRBD 8.
7476
7477     The algorithm for replace is quite complicated:
7478
7479       1. for each disk to be replaced:
7480
7481         1. create new LVs on the target node with unique names
7482         1. detach old LVs from the drbd device
7483         1. rename old LVs to name_replaced.<time_t>
7484         1. rename new LVs to old LVs
7485         1. attach the new LVs (with the old names now) to the drbd device
7486
7487       1. wait for sync across all devices
7488
7489       1. for each modified disk:
7490
7491         1. remove old LVs (which have the name name_replaces.<time_t>)
7492
7493     Failures are not very well handled.
7494
7495     """
7496     steps_total = 6
7497
7498     # Step: check device activation
7499     self.lu.LogStep(1, steps_total, "Check device existence")
7500     self._CheckDisksExistence([self.other_node, self.target_node])
7501     self._CheckVolumeGroup([self.target_node, self.other_node])
7502
7503     # Step: check other node consistency
7504     self.lu.LogStep(2, steps_total, "Check peer consistency")
7505     self._CheckDisksConsistency(self.other_node,
7506                                 self.other_node == self.instance.primary_node,
7507                                 False)
7508
7509     # Step: create new storage
7510     self.lu.LogStep(3, steps_total, "Allocate new storage")
7511     iv_names = self._CreateNewStorage(self.target_node)
7512
7513     # Step: for each lv, detach+rename*2+attach
7514     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7515     for dev, old_lvs, new_lvs in iv_names.itervalues():
7516       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7517
7518       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7519                                                      old_lvs)
7520       result.Raise("Can't detach drbd from local storage on node"
7521                    " %s for device %s" % (self.target_node, dev.iv_name))
7522       #dev.children = []
7523       #cfg.Update(instance)
7524
7525       # ok, we created the new LVs, so now we know we have the needed
7526       # storage; as such, we proceed on the target node to rename
7527       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7528       # using the assumption that logical_id == physical_id (which in
7529       # turn is the unique_id on that node)
7530
7531       # FIXME(iustin): use a better name for the replaced LVs
7532       temp_suffix = int(time.time())
7533       ren_fn = lambda d, suff: (d.physical_id[0],
7534                                 d.physical_id[1] + "_replaced-%s" % suff)
7535
7536       # Build the rename list based on what LVs exist on the node
7537       rename_old_to_new = []
7538       for to_ren in old_lvs:
7539         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7540         if not result.fail_msg and result.payload:
7541           # device exists
7542           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7543
7544       self.lu.LogInfo("Renaming the old LVs on the target node")
7545       result = self.rpc.call_blockdev_rename(self.target_node,
7546                                              rename_old_to_new)
7547       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7548
7549       # Now we rename the new LVs to the old LVs
7550       self.lu.LogInfo("Renaming the new LVs on the target node")
7551       rename_new_to_old = [(new, old.physical_id)
7552                            for old, new in zip(old_lvs, new_lvs)]
7553       result = self.rpc.call_blockdev_rename(self.target_node,
7554                                              rename_new_to_old)
7555       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7556
7557       for old, new in zip(old_lvs, new_lvs):
7558         new.logical_id = old.logical_id
7559         self.cfg.SetDiskID(new, self.target_node)
7560
7561       for disk in old_lvs:
7562         disk.logical_id = ren_fn(disk, temp_suffix)
7563         self.cfg.SetDiskID(disk, self.target_node)
7564
7565       # Now that the new lvs have the old name, we can add them to the device
7566       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7567       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7568                                                   new_lvs)
7569       msg = result.fail_msg
7570       if msg:
7571         for new_lv in new_lvs:
7572           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7573                                                new_lv).fail_msg
7574           if msg2:
7575             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7576                                hint=("cleanup manually the unused logical"
7577                                      "volumes"))
7578         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7579
7580       dev.children = new_lvs
7581
7582       self.cfg.Update(self.instance, feedback_fn)
7583
7584     cstep = 5
7585     if self.early_release:
7586       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7587       cstep += 1
7588       self._RemoveOldStorage(self.target_node, iv_names)
7589       # WARNING: we release both node locks here, do not do other RPCs
7590       # than WaitForSync to the primary node
7591       self._ReleaseNodeLock([self.target_node, self.other_node])
7592
7593     # Wait for sync
7594     # This can fail as the old devices are degraded and _WaitForSync
7595     # does a combined result over all disks, so we don't check its return value
7596     self.lu.LogStep(cstep, steps_total, "Sync devices")
7597     cstep += 1
7598     _WaitForSync(self.lu, self.instance)
7599
7600     # Check all devices manually
7601     self._CheckDevices(self.instance.primary_node, iv_names)
7602
7603     # Step: remove old storage
7604     if not self.early_release:
7605       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7606       cstep += 1
7607       self._RemoveOldStorage(self.target_node, iv_names)
7608
7609   def _ExecDrbd8Secondary(self, feedback_fn):
7610     """Replace the secondary node for DRBD 8.
7611
7612     The algorithm for replace is quite complicated:
7613       - for all disks of the instance:
7614         - create new LVs on the new node with same names
7615         - shutdown the drbd device on the old secondary
7616         - disconnect the drbd network on the primary
7617         - create the drbd device on the new secondary
7618         - network attach the drbd on the primary, using an artifice:
7619           the drbd code for Attach() will connect to the network if it
7620           finds a device which is connected to the good local disks but
7621           not network enabled
7622       - wait for sync across all devices
7623       - remove all disks from the old secondary
7624
7625     Failures are not very well handled.
7626
7627     """
7628     steps_total = 6
7629
7630     # Step: check device activation
7631     self.lu.LogStep(1, steps_total, "Check device existence")
7632     self._CheckDisksExistence([self.instance.primary_node])
7633     self._CheckVolumeGroup([self.instance.primary_node])
7634
7635     # Step: check other node consistency
7636     self.lu.LogStep(2, steps_total, "Check peer consistency")
7637     self._CheckDisksConsistency(self.instance.primary_node, True, True)
7638
7639     # Step: create new storage
7640     self.lu.LogStep(3, steps_total, "Allocate new storage")
7641     for idx, dev in enumerate(self.instance.disks):
7642       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7643                       (self.new_node, idx))
7644       # we pass force_create=True to force LVM creation
7645       for new_lv in dev.children:
7646         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7647                         _GetInstanceInfoText(self.instance), False)
7648
7649     # Step 4: dbrd minors and drbd setups changes
7650     # after this, we must manually remove the drbd minors on both the
7651     # error and the success paths
7652     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7653     minors = self.cfg.AllocateDRBDMinor([self.new_node
7654                                          for dev in self.instance.disks],
7655                                         self.instance.name)
7656     logging.debug("Allocated minors %r", minors)
7657
7658     iv_names = {}
7659     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7660       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7661                       (self.new_node, idx))
7662       # create new devices on new_node; note that we create two IDs:
7663       # one without port, so the drbd will be activated without
7664       # networking information on the new node at this stage, and one
7665       # with network, for the latter activation in step 4
7666       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7667       if self.instance.primary_node == o_node1:
7668         p_minor = o_minor1
7669       else:
7670         assert self.instance.primary_node == o_node2, "Three-node instance?"
7671         p_minor = o_minor2
7672
7673       new_alone_id = (self.instance.primary_node, self.new_node, None,
7674                       p_minor, new_minor, o_secret)
7675       new_net_id = (self.instance.primary_node, self.new_node, o_port,
7676                     p_minor, new_minor, o_secret)
7677
7678       iv_names[idx] = (dev, dev.children, new_net_id)
7679       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7680                     new_net_id)
7681       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7682                               logical_id=new_alone_id,
7683                               children=dev.children,
7684                               size=dev.size)
7685       try:
7686         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7687                               _GetInstanceInfoText(self.instance), False)
7688       except errors.GenericError:
7689         self.cfg.ReleaseDRBDMinors(self.instance.name)
7690         raise
7691
7692     # We have new devices, shutdown the drbd on the old secondary
7693     for idx, dev in enumerate(self.instance.disks):
7694       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7695       self.cfg.SetDiskID(dev, self.target_node)
7696       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7697       if msg:
7698         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7699                            "node: %s" % (idx, msg),
7700                            hint=("Please cleanup this device manually as"
7701                                  " soon as possible"))
7702
7703     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7704     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7705                                                self.node_secondary_ip,
7706                                                self.instance.disks)\
7707                                               [self.instance.primary_node]
7708
7709     msg = result.fail_msg
7710     if msg:
7711       # detaches didn't succeed (unlikely)
7712       self.cfg.ReleaseDRBDMinors(self.instance.name)
7713       raise errors.OpExecError("Can't detach the disks from the network on"
7714                                " old node: %s" % (msg,))
7715
7716     # if we managed to detach at least one, we update all the disks of
7717     # the instance to point to the new secondary
7718     self.lu.LogInfo("Updating instance configuration")
7719     for dev, _, new_logical_id in iv_names.itervalues():
7720       dev.logical_id = new_logical_id
7721       self.cfg.SetDiskID(dev, self.instance.primary_node)
7722
7723     self.cfg.Update(self.instance, feedback_fn)
7724
7725     # and now perform the drbd attach
7726     self.lu.LogInfo("Attaching primary drbds to new secondary"
7727                     " (standalone => connected)")
7728     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7729                                             self.new_node],
7730                                            self.node_secondary_ip,
7731                                            self.instance.disks,
7732                                            self.instance.name,
7733                                            False)
7734     for to_node, to_result in result.items():
7735       msg = to_result.fail_msg
7736       if msg:
7737         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7738                            to_node, msg,
7739                            hint=("please do a gnt-instance info to see the"
7740                                  " status of disks"))
7741     cstep = 5
7742     if self.early_release:
7743       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7744       cstep += 1
7745       self._RemoveOldStorage(self.target_node, iv_names)
7746       # WARNING: we release all node locks here, do not do other RPCs
7747       # than WaitForSync to the primary node
7748       self._ReleaseNodeLock([self.instance.primary_node,
7749                              self.target_node,
7750                              self.new_node])
7751
7752     # Wait for sync
7753     # This can fail as the old devices are degraded and _WaitForSync
7754     # does a combined result over all disks, so we don't check its return value
7755     self.lu.LogStep(cstep, steps_total, "Sync devices")
7756     cstep += 1
7757     _WaitForSync(self.lu, self.instance)
7758
7759     # Check all devices manually
7760     self._CheckDevices(self.instance.primary_node, iv_names)
7761
7762     # Step: remove old storage
7763     if not self.early_release:
7764       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7765       self._RemoveOldStorage(self.target_node, iv_names)
7766
7767
7768 class LURepairNodeStorage(NoHooksLU):
7769   """Repairs the volume group on a node.
7770
7771   """
7772   _OP_REQP = ["node_name"]
7773   REQ_BGL = False
7774
7775   def CheckArguments(self):
7776     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7777
7778     _CheckStorageType(self.op.storage_type)
7779
7780   def ExpandNames(self):
7781     self.needed_locks = {
7782       locking.LEVEL_NODE: [self.op.node_name],
7783       }
7784
7785   def _CheckFaultyDisks(self, instance, node_name):
7786     """Ensure faulty disks abort the opcode or at least warn."""
7787     try:
7788       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7789                                   node_name, True):
7790         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7791                                    " node '%s'" % (instance.name, node_name),
7792                                    errors.ECODE_STATE)
7793     except errors.OpPrereqError, err:
7794       if self.op.ignore_consistency:
7795         self.proc.LogWarning(str(err.args[0]))
7796       else:
7797         raise
7798
7799   def CheckPrereq(self):
7800     """Check prerequisites.
7801
7802     """
7803     storage_type = self.op.storage_type
7804
7805     if (constants.SO_FIX_CONSISTENCY not in
7806         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7807       raise errors.OpPrereqError("Storage units of type '%s' can not be"
7808                                  " repaired" % storage_type,
7809                                  errors.ECODE_INVAL)
7810
7811     # Check whether any instance on this node has faulty disks
7812     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7813       if not inst.admin_up:
7814         continue
7815       check_nodes = set(inst.all_nodes)
7816       check_nodes.discard(self.op.node_name)
7817       for inst_node_name in check_nodes:
7818         self._CheckFaultyDisks(inst, inst_node_name)
7819
7820   def Exec(self, feedback_fn):
7821     feedback_fn("Repairing storage unit '%s' on %s ..." %
7822                 (self.op.name, self.op.node_name))
7823
7824     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7825     result = self.rpc.call_storage_execute(self.op.node_name,
7826                                            self.op.storage_type, st_args,
7827                                            self.op.name,
7828                                            constants.SO_FIX_CONSISTENCY)
7829     result.Raise("Failed to repair storage unit '%s' on %s" %
7830                  (self.op.name, self.op.node_name))
7831
7832
7833 class LUNodeEvacuationStrategy(NoHooksLU):
7834   """Computes the node evacuation strategy.
7835
7836   """
7837   _OP_REQP = ["nodes"]
7838   REQ_BGL = False
7839
7840   def CheckArguments(self):
7841     if not hasattr(self.op, "remote_node"):
7842       self.op.remote_node = None
7843     if not hasattr(self.op, "iallocator"):
7844       self.op.iallocator = None
7845     if self.op.remote_node is not None and self.op.iallocator is not None:
7846       raise errors.OpPrereqError("Give either the iallocator or the new"
7847                                  " secondary, not both", errors.ECODE_INVAL)
7848
7849   def ExpandNames(self):
7850     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7851     self.needed_locks = locks = {}
7852     if self.op.remote_node is None:
7853       locks[locking.LEVEL_NODE] = locking.ALL_SET
7854     else:
7855       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7856       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7857
7858   def CheckPrereq(self):
7859     pass
7860
7861   def Exec(self, feedback_fn):
7862     if self.op.remote_node is not None:
7863       instances = []
7864       for node in self.op.nodes:
7865         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7866       result = []
7867       for i in instances:
7868         if i.primary_node == self.op.remote_node:
7869           raise errors.OpPrereqError("Node %s is the primary node of"
7870                                      " instance %s, cannot use it as"
7871                                      " secondary" %
7872                                      (self.op.remote_node, i.name),
7873                                      errors.ECODE_INVAL)
7874         result.append([i.name, self.op.remote_node])
7875     else:
7876       ial = IAllocator(self.cfg, self.rpc,
7877                        mode=constants.IALLOCATOR_MODE_MEVAC,
7878                        evac_nodes=self.op.nodes)
7879       ial.Run(self.op.iallocator, validate=True)
7880       if not ial.success:
7881         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7882                                  errors.ECODE_NORES)
7883       result = ial.result
7884     return result
7885
7886
7887 class LUGrowDisk(LogicalUnit):
7888   """Grow a disk of an instance.
7889
7890   """
7891   HPATH = "disk-grow"
7892   HTYPE = constants.HTYPE_INSTANCE
7893   _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7894   REQ_BGL = False
7895
7896   def ExpandNames(self):
7897     self._ExpandAndLockInstance()
7898     self.needed_locks[locking.LEVEL_NODE] = []
7899     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7900
7901   def DeclareLocks(self, level):
7902     if level == locking.LEVEL_NODE:
7903       self._LockInstancesNodes()
7904
7905   def BuildHooksEnv(self):
7906     """Build hooks env.
7907
7908     This runs on the master, the primary and all the secondaries.
7909
7910     """
7911     env = {
7912       "DISK": self.op.disk,
7913       "AMOUNT": self.op.amount,
7914       }
7915     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7916     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7917     return env, nl, nl
7918
7919   def CheckPrereq(self):
7920     """Check prerequisites.
7921
7922     This checks that the instance is in the cluster.
7923
7924     """
7925     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7926     assert instance is not None, \
7927       "Cannot retrieve locked instance %s" % self.op.instance_name
7928     nodenames = list(instance.all_nodes)
7929     for node in nodenames:
7930       _CheckNodeOnline(self, node)
7931
7932
7933     self.instance = instance
7934
7935     if instance.disk_template not in constants.DTS_GROWABLE:
7936       raise errors.OpPrereqError("Instance's disk layout does not support"
7937                                  " growing.", errors.ECODE_INVAL)
7938
7939     self.disk = instance.FindDisk(self.op.disk)
7940
7941     if instance.disk_template != constants.DT_FILE:
7942       # TODO: check the free disk space for file, when that feature will be
7943       # supported
7944       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7945
7946   def Exec(self, feedback_fn):
7947     """Execute disk grow.
7948
7949     """
7950     instance = self.instance
7951     disk = self.disk
7952
7953     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
7954     if not disks_ok:
7955       raise errors.OpExecError("Cannot activate block device to grow")
7956
7957     for node in instance.all_nodes:
7958       self.cfg.SetDiskID(disk, node)
7959       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7960       result.Raise("Grow request failed to node %s" % node)
7961
7962       # TODO: Rewrite code to work properly
7963       # DRBD goes into sync mode for a short amount of time after executing the
7964       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7965       # calling "resize" in sync mode fails. Sleeping for a short amount of
7966       # time is a work-around.
7967       time.sleep(5)
7968
7969     disk.RecordGrow(self.op.amount)
7970     self.cfg.Update(instance, feedback_fn)
7971     if self.op.wait_for_sync:
7972       disk_abort = not _WaitForSync(self, instance, disks=[disk])
7973       if disk_abort:
7974         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7975                              " status.\nPlease check the instance.")
7976       if not instance.admin_up:
7977         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
7978     elif not instance.admin_up:
7979       self.proc.LogWarning("Not shutting down the disk even if the instance is"
7980                            " not supposed to be running because no wait for"
7981                            " sync mode was requested.")
7982
7983
7984 class LUQueryInstanceData(NoHooksLU):
7985   """Query runtime instance data.
7986
7987   """
7988   _OP_REQP = ["instances", "static"]
7989   REQ_BGL = False
7990
7991   def ExpandNames(self):
7992     self.needed_locks = {}
7993     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7994
7995     if not isinstance(self.op.instances, list):
7996       raise errors.OpPrereqError("Invalid argument type 'instances'",
7997                                  errors.ECODE_INVAL)
7998
7999     if self.op.instances:
8000       self.wanted_names = []
8001       for name in self.op.instances:
8002         full_name = _ExpandInstanceName(self.cfg, name)
8003         self.wanted_names.append(full_name)
8004       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8005     else:
8006       self.wanted_names = None
8007       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8008
8009     self.needed_locks[locking.LEVEL_NODE] = []
8010     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8011
8012   def DeclareLocks(self, level):
8013     if level == locking.LEVEL_NODE:
8014       self._LockInstancesNodes()
8015
8016   def CheckPrereq(self):
8017     """Check prerequisites.
8018
8019     This only checks the optional instance list against the existing names.
8020
8021     """
8022     if self.wanted_names is None:
8023       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8024
8025     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8026                              in self.wanted_names]
8027     return
8028
8029   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8030     """Returns the status of a block device
8031
8032     """
8033     if self.op.static or not node:
8034       return None
8035
8036     self.cfg.SetDiskID(dev, node)
8037
8038     result = self.rpc.call_blockdev_find(node, dev)
8039     if result.offline:
8040       return None
8041
8042     result.Raise("Can't compute disk status for %s" % instance_name)
8043
8044     status = result.payload
8045     if status is None:
8046       return None
8047
8048     return (status.dev_path, status.major, status.minor,
8049             status.sync_percent, status.estimated_time,
8050             status.is_degraded, status.ldisk_status)
8051
8052   def _ComputeDiskStatus(self, instance, snode, dev):
8053     """Compute block device status.
8054
8055     """
8056     if dev.dev_type in constants.LDS_DRBD:
8057       # we change the snode then (otherwise we use the one passed in)
8058       if dev.logical_id[0] == instance.primary_node:
8059         snode = dev.logical_id[1]
8060       else:
8061         snode = dev.logical_id[0]
8062
8063     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8064                                               instance.name, dev)
8065     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8066
8067     if dev.children:
8068       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8069                       for child in dev.children]
8070     else:
8071       dev_children = []
8072
8073     data = {
8074       "iv_name": dev.iv_name,
8075       "dev_type": dev.dev_type,
8076       "logical_id": dev.logical_id,
8077       "physical_id": dev.physical_id,
8078       "pstatus": dev_pstatus,
8079       "sstatus": dev_sstatus,
8080       "children": dev_children,
8081       "mode": dev.mode,
8082       "size": dev.size,
8083       }
8084
8085     return data
8086
8087   def Exec(self, feedback_fn):
8088     """Gather and return data"""
8089     result = {}
8090
8091     cluster = self.cfg.GetClusterInfo()
8092
8093     for instance in self.wanted_instances:
8094       if not self.op.static:
8095         remote_info = self.rpc.call_instance_info(instance.primary_node,
8096                                                   instance.name,
8097                                                   instance.hypervisor)
8098         remote_info.Raise("Error checking node %s" % instance.primary_node)
8099         remote_info = remote_info.payload
8100         if remote_info and "state" in remote_info:
8101           remote_state = "up"
8102         else:
8103           remote_state = "down"
8104       else:
8105         remote_state = None
8106       if instance.admin_up:
8107         config_state = "up"
8108       else:
8109         config_state = "down"
8110
8111       disks = [self._ComputeDiskStatus(instance, None, device)
8112                for device in instance.disks]
8113
8114       idict = {
8115         "name": instance.name,
8116         "config_state": config_state,
8117         "run_state": remote_state,
8118         "pnode": instance.primary_node,
8119         "snodes": instance.secondary_nodes,
8120         "os": instance.os,
8121         # this happens to be the same format used for hooks
8122         "nics": _NICListToTuple(self, instance.nics),
8123         "disk_template": instance.disk_template,
8124         "disks": disks,
8125         "hypervisor": instance.hypervisor,
8126         "network_port": instance.network_port,
8127         "hv_instance": instance.hvparams,
8128         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8129         "be_instance": instance.beparams,
8130         "be_actual": cluster.FillBE(instance),
8131         "serial_no": instance.serial_no,
8132         "mtime": instance.mtime,
8133         "ctime": instance.ctime,
8134         "uuid": instance.uuid,
8135         }
8136
8137       result[instance.name] = idict
8138
8139     return result
8140
8141
8142 class LUSetInstanceParams(LogicalUnit):
8143   """Modifies an instances's parameters.
8144
8145   """
8146   HPATH = "instance-modify"
8147   HTYPE = constants.HTYPE_INSTANCE
8148   _OP_REQP = ["instance_name"]
8149   REQ_BGL = False
8150
8151   def CheckArguments(self):
8152     if not hasattr(self.op, 'nics'):
8153       self.op.nics = []
8154     if not hasattr(self.op, 'disks'):
8155       self.op.disks = []
8156     if not hasattr(self.op, 'beparams'):
8157       self.op.beparams = {}
8158     if not hasattr(self.op, 'hvparams'):
8159       self.op.hvparams = {}
8160     if not hasattr(self.op, "disk_template"):
8161       self.op.disk_template = None
8162     if not hasattr(self.op, "remote_node"):
8163       self.op.remote_node = None
8164     if not hasattr(self.op, "os_name"):
8165       self.op.os_name = None
8166     if not hasattr(self.op, "force_variant"):
8167       self.op.force_variant = False
8168     self.op.force = getattr(self.op, "force", False)
8169     if not (self.op.nics or self.op.disks or self.op.disk_template or
8170             self.op.hvparams or self.op.beparams or self.op.os_name):
8171       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8172
8173     if self.op.hvparams:
8174       _CheckGlobalHvParams(self.op.hvparams)
8175
8176     # Disk validation
8177     disk_addremove = 0
8178     for disk_op, disk_dict in self.op.disks:
8179       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8180       if disk_op == constants.DDM_REMOVE:
8181         disk_addremove += 1
8182         continue
8183       elif disk_op == constants.DDM_ADD:
8184         disk_addremove += 1
8185       else:
8186         if not isinstance(disk_op, int):
8187           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8188         if not isinstance(disk_dict, dict):
8189           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8190           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8191
8192       if disk_op == constants.DDM_ADD:
8193         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8194         if mode not in constants.DISK_ACCESS_SET:
8195           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8196                                      errors.ECODE_INVAL)
8197         size = disk_dict.get('size', None)
8198         if size is None:
8199           raise errors.OpPrereqError("Required disk parameter size missing",
8200                                      errors.ECODE_INVAL)
8201         try:
8202           size = int(size)
8203         except (TypeError, ValueError), err:
8204           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8205                                      str(err), errors.ECODE_INVAL)
8206         disk_dict['size'] = size
8207       else:
8208         # modification of disk
8209         if 'size' in disk_dict:
8210           raise errors.OpPrereqError("Disk size change not possible, use"
8211                                      " grow-disk", errors.ECODE_INVAL)
8212
8213     if disk_addremove > 1:
8214       raise errors.OpPrereqError("Only one disk add or remove operation"
8215                                  " supported at a time", errors.ECODE_INVAL)
8216
8217     if self.op.disks and self.op.disk_template is not None:
8218       raise errors.OpPrereqError("Disk template conversion and other disk"
8219                                  " changes not supported at the same time",
8220                                  errors.ECODE_INVAL)
8221
8222     if self.op.disk_template:
8223       _CheckDiskTemplate(self.op.disk_template)
8224       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8225           self.op.remote_node is None):
8226         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8227                                    " one requires specifying a secondary node",
8228                                    errors.ECODE_INVAL)
8229
8230     # NIC validation
8231     nic_addremove = 0
8232     for nic_op, nic_dict in self.op.nics:
8233       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8234       if nic_op == constants.DDM_REMOVE:
8235         nic_addremove += 1
8236         continue
8237       elif nic_op == constants.DDM_ADD:
8238         nic_addremove += 1
8239       else:
8240         if not isinstance(nic_op, int):
8241           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8242         if not isinstance(nic_dict, dict):
8243           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8244           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8245
8246       # nic_dict should be a dict
8247       nic_ip = nic_dict.get('ip', None)
8248       if nic_ip is not None:
8249         if nic_ip.lower() == constants.VALUE_NONE:
8250           nic_dict['ip'] = None
8251         else:
8252           if not utils.IsValidIP(nic_ip):
8253             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8254                                        errors.ECODE_INVAL)
8255
8256       nic_bridge = nic_dict.get('bridge', None)
8257       nic_link = nic_dict.get('link', None)
8258       if nic_bridge and nic_link:
8259         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8260                                    " at the same time", errors.ECODE_INVAL)
8261       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8262         nic_dict['bridge'] = None
8263       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8264         nic_dict['link'] = None
8265
8266       if nic_op == constants.DDM_ADD:
8267         nic_mac = nic_dict.get('mac', None)
8268         if nic_mac is None:
8269           nic_dict['mac'] = constants.VALUE_AUTO
8270
8271       if 'mac' in nic_dict:
8272         nic_mac = nic_dict['mac']
8273         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8274           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8275
8276         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8277           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8278                                      " modifying an existing nic",
8279                                      errors.ECODE_INVAL)
8280
8281     if nic_addremove > 1:
8282       raise errors.OpPrereqError("Only one NIC add or remove operation"
8283                                  " supported at a time", errors.ECODE_INVAL)
8284
8285   def ExpandNames(self):
8286     self._ExpandAndLockInstance()
8287     self.needed_locks[locking.LEVEL_NODE] = []
8288     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8289
8290   def DeclareLocks(self, level):
8291     if level == locking.LEVEL_NODE:
8292       self._LockInstancesNodes()
8293       if self.op.disk_template and self.op.remote_node:
8294         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8295         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8296
8297   def BuildHooksEnv(self):
8298     """Build hooks env.
8299
8300     This runs on the master, primary and secondaries.
8301
8302     """
8303     args = dict()
8304     if constants.BE_MEMORY in self.be_new:
8305       args['memory'] = self.be_new[constants.BE_MEMORY]
8306     if constants.BE_VCPUS in self.be_new:
8307       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8308     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8309     # information at all.
8310     if self.op.nics:
8311       args['nics'] = []
8312       nic_override = dict(self.op.nics)
8313       c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8314       for idx, nic in enumerate(self.instance.nics):
8315         if idx in nic_override:
8316           this_nic_override = nic_override[idx]
8317         else:
8318           this_nic_override = {}
8319         if 'ip' in this_nic_override:
8320           ip = this_nic_override['ip']
8321         else:
8322           ip = nic.ip
8323         if 'mac' in this_nic_override:
8324           mac = this_nic_override['mac']
8325         else:
8326           mac = nic.mac
8327         if idx in self.nic_pnew:
8328           nicparams = self.nic_pnew[idx]
8329         else:
8330           nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8331         mode = nicparams[constants.NIC_MODE]
8332         link = nicparams[constants.NIC_LINK]
8333         args['nics'].append((ip, mac, mode, link))
8334       if constants.DDM_ADD in nic_override:
8335         ip = nic_override[constants.DDM_ADD].get('ip', None)
8336         mac = nic_override[constants.DDM_ADD]['mac']
8337         nicparams = self.nic_pnew[constants.DDM_ADD]
8338         mode = nicparams[constants.NIC_MODE]
8339         link = nicparams[constants.NIC_LINK]
8340         args['nics'].append((ip, mac, mode, link))
8341       elif constants.DDM_REMOVE in nic_override:
8342         del args['nics'][-1]
8343
8344     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8345     if self.op.disk_template:
8346       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8347     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8348     return env, nl, nl
8349
8350   @staticmethod
8351   def _GetUpdatedParams(old_params, update_dict,
8352                         default_values, parameter_types):
8353     """Return the new params dict for the given params.
8354
8355     @type old_params: dict
8356     @param old_params: old parameters
8357     @type update_dict: dict
8358     @param update_dict: dict containing new parameter values,
8359                         or constants.VALUE_DEFAULT to reset the
8360                         parameter to its default value
8361     @type default_values: dict
8362     @param default_values: default values for the filled parameters
8363     @type parameter_types: dict
8364     @param parameter_types: dict mapping target dict keys to types
8365                             in constants.ENFORCEABLE_TYPES
8366     @rtype: (dict, dict)
8367     @return: (new_parameters, filled_parameters)
8368
8369     """
8370     params_copy = copy.deepcopy(old_params)
8371     for key, val in update_dict.iteritems():
8372       if val == constants.VALUE_DEFAULT:
8373         try:
8374           del params_copy[key]
8375         except KeyError:
8376           pass
8377       else:
8378         params_copy[key] = val
8379     utils.ForceDictType(params_copy, parameter_types)
8380     params_filled = objects.FillDict(default_values, params_copy)
8381     return (params_copy, params_filled)
8382
8383   def CheckPrereq(self):
8384     """Check prerequisites.
8385
8386     This only checks the instance list against the existing names.
8387
8388     """
8389     self.force = self.op.force
8390
8391     # checking the new params on the primary/secondary nodes
8392
8393     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8394     cluster = self.cluster = self.cfg.GetClusterInfo()
8395     assert self.instance is not None, \
8396       "Cannot retrieve locked instance %s" % self.op.instance_name
8397     pnode = instance.primary_node
8398     nodelist = list(instance.all_nodes)
8399
8400     if self.op.disk_template:
8401       if instance.disk_template == self.op.disk_template:
8402         raise errors.OpPrereqError("Instance already has disk template %s" %
8403                                    instance.disk_template, errors.ECODE_INVAL)
8404
8405       if (instance.disk_template,
8406           self.op.disk_template) not in self._DISK_CONVERSIONS:
8407         raise errors.OpPrereqError("Unsupported disk template conversion from"
8408                                    " %s to %s" % (instance.disk_template,
8409                                                   self.op.disk_template),
8410                                    errors.ECODE_INVAL)
8411       if self.op.disk_template in constants.DTS_NET_MIRROR:
8412         _CheckNodeOnline(self, self.op.remote_node)
8413         _CheckNodeNotDrained(self, self.op.remote_node)
8414         disks = [{"size": d.size} for d in instance.disks]
8415         required = _ComputeDiskSize(self.op.disk_template, disks)
8416         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8417         _CheckInstanceDown(self, instance, "cannot change disk template")
8418
8419     # hvparams processing
8420     if self.op.hvparams:
8421       i_hvdict, hv_new = self._GetUpdatedParams(
8422                              instance.hvparams, self.op.hvparams,
8423                              cluster.hvparams[instance.hypervisor],
8424                              constants.HVS_PARAMETER_TYPES)
8425       # local check
8426       hypervisor.GetHypervisor(
8427         instance.hypervisor).CheckParameterSyntax(hv_new)
8428       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8429       self.hv_new = hv_new # the new actual values
8430       self.hv_inst = i_hvdict # the new dict (without defaults)
8431     else:
8432       self.hv_new = self.hv_inst = {}
8433
8434     # beparams processing
8435     if self.op.beparams:
8436       i_bedict, be_new = self._GetUpdatedParams(
8437                              instance.beparams, self.op.beparams,
8438                              cluster.beparams[constants.PP_DEFAULT],
8439                              constants.BES_PARAMETER_TYPES)
8440       self.be_new = be_new # the new actual values
8441       self.be_inst = i_bedict # the new dict (without defaults)
8442     else:
8443       self.be_new = self.be_inst = {}
8444
8445     self.warn = []
8446
8447     if constants.BE_MEMORY in self.op.beparams and not self.force:
8448       mem_check_list = [pnode]
8449       if be_new[constants.BE_AUTO_BALANCE]:
8450         # either we changed auto_balance to yes or it was from before
8451         mem_check_list.extend(instance.secondary_nodes)
8452       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8453                                                   instance.hypervisor)
8454       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8455                                          instance.hypervisor)
8456       pninfo = nodeinfo[pnode]
8457       msg = pninfo.fail_msg
8458       if msg:
8459         # Assume the primary node is unreachable and go ahead
8460         self.warn.append("Can't get info from primary node %s: %s" %
8461                          (pnode,  msg))
8462       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8463         self.warn.append("Node data from primary node %s doesn't contain"
8464                          " free memory information" % pnode)
8465       elif instance_info.fail_msg:
8466         self.warn.append("Can't get instance runtime information: %s" %
8467                         instance_info.fail_msg)
8468       else:
8469         if instance_info.payload:
8470           current_mem = int(instance_info.payload['memory'])
8471         else:
8472           # Assume instance not running
8473           # (there is a slight race condition here, but it's not very probable,
8474           # and we have no other way to check)
8475           current_mem = 0
8476         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8477                     pninfo.payload['memory_free'])
8478         if miss_mem > 0:
8479           raise errors.OpPrereqError("This change will prevent the instance"
8480                                      " from starting, due to %d MB of memory"
8481                                      " missing on its primary node" % miss_mem,
8482                                      errors.ECODE_NORES)
8483
8484       if be_new[constants.BE_AUTO_BALANCE]:
8485         for node, nres in nodeinfo.items():
8486           if node not in instance.secondary_nodes:
8487             continue
8488           msg = nres.fail_msg
8489           if msg:
8490             self.warn.append("Can't get info from secondary node %s: %s" %
8491                              (node, msg))
8492           elif not isinstance(nres.payload.get('memory_free', None), int):
8493             self.warn.append("Secondary node %s didn't return free"
8494                              " memory information" % node)
8495           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8496             self.warn.append("Not enough memory to failover instance to"
8497                              " secondary node %s" % node)
8498
8499     # NIC processing
8500     self.nic_pnew = {}
8501     self.nic_pinst = {}
8502     for nic_op, nic_dict in self.op.nics:
8503       if nic_op == constants.DDM_REMOVE:
8504         if not instance.nics:
8505           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8506                                      errors.ECODE_INVAL)
8507         continue
8508       if nic_op != constants.DDM_ADD:
8509         # an existing nic
8510         if not instance.nics:
8511           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8512                                      " no NICs" % nic_op,
8513                                      errors.ECODE_INVAL)
8514         if nic_op < 0 or nic_op >= len(instance.nics):
8515           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8516                                      " are 0 to %d" %
8517                                      (nic_op, len(instance.nics) - 1),
8518                                      errors.ECODE_INVAL)
8519         old_nic_params = instance.nics[nic_op].nicparams
8520         old_nic_ip = instance.nics[nic_op].ip
8521       else:
8522         old_nic_params = {}
8523         old_nic_ip = None
8524
8525       update_params_dict = dict([(key, nic_dict[key])
8526                                  for key in constants.NICS_PARAMETERS
8527                                  if key in nic_dict])
8528
8529       if 'bridge' in nic_dict:
8530         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8531
8532       new_nic_params, new_filled_nic_params = \
8533           self._GetUpdatedParams(old_nic_params, update_params_dict,
8534                                  cluster.nicparams[constants.PP_DEFAULT],
8535                                  constants.NICS_PARAMETER_TYPES)
8536       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8537       self.nic_pinst[nic_op] = new_nic_params
8538       self.nic_pnew[nic_op] = new_filled_nic_params
8539       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8540
8541       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8542         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8543         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8544         if msg:
8545           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8546           if self.force:
8547             self.warn.append(msg)
8548           else:
8549             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8550       if new_nic_mode == constants.NIC_MODE_ROUTED:
8551         if 'ip' in nic_dict:
8552           nic_ip = nic_dict['ip']
8553         else:
8554           nic_ip = old_nic_ip
8555         if nic_ip is None:
8556           raise errors.OpPrereqError('Cannot set the nic ip to None'
8557                                      ' on a routed nic', errors.ECODE_INVAL)
8558       if 'mac' in nic_dict:
8559         nic_mac = nic_dict['mac']
8560         if nic_mac is None:
8561           raise errors.OpPrereqError('Cannot set the nic mac to None',
8562                                      errors.ECODE_INVAL)
8563         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8564           # otherwise generate the mac
8565           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8566         else:
8567           # or validate/reserve the current one
8568           try:
8569             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8570           except errors.ReservationError:
8571             raise errors.OpPrereqError("MAC address %s already in use"
8572                                        " in cluster" % nic_mac,
8573                                        errors.ECODE_NOTUNIQUE)
8574
8575     # DISK processing
8576     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8577       raise errors.OpPrereqError("Disk operations not supported for"
8578                                  " diskless instances",
8579                                  errors.ECODE_INVAL)
8580     for disk_op, _ in self.op.disks:
8581       if disk_op == constants.DDM_REMOVE:
8582         if len(instance.disks) == 1:
8583           raise errors.OpPrereqError("Cannot remove the last disk of"
8584                                      " an instance", errors.ECODE_INVAL)
8585         _CheckInstanceDown(self, instance, "cannot remove disks")
8586
8587       if (disk_op == constants.DDM_ADD and
8588           len(instance.nics) >= constants.MAX_DISKS):
8589         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8590                                    " add more" % constants.MAX_DISKS,
8591                                    errors.ECODE_STATE)
8592       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8593         # an existing disk
8594         if disk_op < 0 or disk_op >= len(instance.disks):
8595           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8596                                      " are 0 to %d" %
8597                                      (disk_op, len(instance.disks)),
8598                                      errors.ECODE_INVAL)
8599
8600     # OS change
8601     if self.op.os_name and not self.op.force:
8602       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8603                       self.op.force_variant)
8604
8605     return
8606
8607   def _ConvertPlainToDrbd(self, feedback_fn):
8608     """Converts an instance from plain to drbd.
8609
8610     """
8611     feedback_fn("Converting template to drbd")
8612     instance = self.instance
8613     pnode = instance.primary_node
8614     snode = self.op.remote_node
8615
8616     # create a fake disk info for _GenerateDiskTemplate
8617     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8618     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8619                                       instance.name, pnode, [snode],
8620                                       disk_info, None, None, 0)
8621     info = _GetInstanceInfoText(instance)
8622     feedback_fn("Creating aditional volumes...")
8623     # first, create the missing data and meta devices
8624     for disk in new_disks:
8625       # unfortunately this is... not too nice
8626       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8627                             info, True)
8628       for child in disk.children:
8629         _CreateSingleBlockDev(self, snode, instance, child, info, True)
8630     # at this stage, all new LVs have been created, we can rename the
8631     # old ones
8632     feedback_fn("Renaming original volumes...")
8633     rename_list = [(o, n.children[0].logical_id)
8634                    for (o, n) in zip(instance.disks, new_disks)]
8635     result = self.rpc.call_blockdev_rename(pnode, rename_list)
8636     result.Raise("Failed to rename original LVs")
8637
8638     feedback_fn("Initializing DRBD devices...")
8639     # all child devices are in place, we can now create the DRBD devices
8640     for disk in new_disks:
8641       for node in [pnode, snode]:
8642         f_create = node == pnode
8643         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8644
8645     # at this point, the instance has been modified
8646     instance.disk_template = constants.DT_DRBD8
8647     instance.disks = new_disks
8648     self.cfg.Update(instance, feedback_fn)
8649
8650     # disks are created, waiting for sync
8651     disk_abort = not _WaitForSync(self, instance)
8652     if disk_abort:
8653       raise errors.OpExecError("There are some degraded disks for"
8654                                " this instance, please cleanup manually")
8655
8656   def _ConvertDrbdToPlain(self, feedback_fn):
8657     """Converts an instance from drbd to plain.
8658
8659     """
8660     instance = self.instance
8661     assert len(instance.secondary_nodes) == 1
8662     pnode = instance.primary_node
8663     snode = instance.secondary_nodes[0]
8664     feedback_fn("Converting template to plain")
8665
8666     old_disks = instance.disks
8667     new_disks = [d.children[0] for d in old_disks]
8668
8669     # copy over size and mode
8670     for parent, child in zip(old_disks, new_disks):
8671       child.size = parent.size
8672       child.mode = parent.mode
8673
8674     # update instance structure
8675     instance.disks = new_disks
8676     instance.disk_template = constants.DT_PLAIN
8677     self.cfg.Update(instance, feedback_fn)
8678
8679     feedback_fn("Removing volumes on the secondary node...")
8680     for disk in old_disks:
8681       self.cfg.SetDiskID(disk, snode)
8682       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8683       if msg:
8684         self.LogWarning("Could not remove block device %s on node %s,"
8685                         " continuing anyway: %s", disk.iv_name, snode, msg)
8686
8687     feedback_fn("Removing unneeded volumes on the primary node...")
8688     for idx, disk in enumerate(old_disks):
8689       meta = disk.children[1]
8690       self.cfg.SetDiskID(meta, pnode)
8691       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8692       if msg:
8693         self.LogWarning("Could not remove metadata for disk %d on node %s,"
8694                         " continuing anyway: %s", idx, pnode, msg)
8695
8696
8697   def Exec(self, feedback_fn):
8698     """Modifies an instance.
8699
8700     All parameters take effect only at the next restart of the instance.
8701
8702     """
8703     # Process here the warnings from CheckPrereq, as we don't have a
8704     # feedback_fn there.
8705     for warn in self.warn:
8706       feedback_fn("WARNING: %s" % warn)
8707
8708     result = []
8709     instance = self.instance
8710     # disk changes
8711     for disk_op, disk_dict in self.op.disks:
8712       if disk_op == constants.DDM_REMOVE:
8713         # remove the last disk
8714         device = instance.disks.pop()
8715         device_idx = len(instance.disks)
8716         for node, disk in device.ComputeNodeTree(instance.primary_node):
8717           self.cfg.SetDiskID(disk, node)
8718           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8719           if msg:
8720             self.LogWarning("Could not remove disk/%d on node %s: %s,"
8721                             " continuing anyway", device_idx, node, msg)
8722         result.append(("disk/%d" % device_idx, "remove"))
8723       elif disk_op == constants.DDM_ADD:
8724         # add a new disk
8725         if instance.disk_template == constants.DT_FILE:
8726           file_driver, file_path = instance.disks[0].logical_id
8727           file_path = os.path.dirname(file_path)
8728         else:
8729           file_driver = file_path = None
8730         disk_idx_base = len(instance.disks)
8731         new_disk = _GenerateDiskTemplate(self,
8732                                          instance.disk_template,
8733                                          instance.name, instance.primary_node,
8734                                          instance.secondary_nodes,
8735                                          [disk_dict],
8736                                          file_path,
8737                                          file_driver,
8738                                          disk_idx_base)[0]
8739         instance.disks.append(new_disk)
8740         info = _GetInstanceInfoText(instance)
8741
8742         logging.info("Creating volume %s for instance %s",
8743                      new_disk.iv_name, instance.name)
8744         # Note: this needs to be kept in sync with _CreateDisks
8745         #HARDCODE
8746         for node in instance.all_nodes:
8747           f_create = node == instance.primary_node
8748           try:
8749             _CreateBlockDev(self, node, instance, new_disk,
8750                             f_create, info, f_create)
8751           except errors.OpExecError, err:
8752             self.LogWarning("Failed to create volume %s (%s) on"
8753                             " node %s: %s",
8754                             new_disk.iv_name, new_disk, node, err)
8755         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8756                        (new_disk.size, new_disk.mode)))
8757       else:
8758         # change a given disk
8759         instance.disks[disk_op].mode = disk_dict['mode']
8760         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8761
8762     if self.op.disk_template:
8763       r_shut = _ShutdownInstanceDisks(self, instance)
8764       if not r_shut:
8765         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8766                                  " proceed with disk template conversion")
8767       mode = (instance.disk_template, self.op.disk_template)
8768       try:
8769         self._DISK_CONVERSIONS[mode](self, feedback_fn)
8770       except:
8771         self.cfg.ReleaseDRBDMinors(instance.name)
8772         raise
8773       result.append(("disk_template", self.op.disk_template))
8774
8775     # NIC changes
8776     for nic_op, nic_dict in self.op.nics:
8777       if nic_op == constants.DDM_REMOVE:
8778         # remove the last nic
8779         del instance.nics[-1]
8780         result.append(("nic.%d" % len(instance.nics), "remove"))
8781       elif nic_op == constants.DDM_ADD:
8782         # mac and bridge should be set, by now
8783         mac = nic_dict['mac']
8784         ip = nic_dict.get('ip', None)
8785         nicparams = self.nic_pinst[constants.DDM_ADD]
8786         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8787         instance.nics.append(new_nic)
8788         result.append(("nic.%d" % (len(instance.nics) - 1),
8789                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
8790                        (new_nic.mac, new_nic.ip,
8791                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8792                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8793                        )))
8794       else:
8795         for key in 'mac', 'ip':
8796           if key in nic_dict:
8797             setattr(instance.nics[nic_op], key, nic_dict[key])
8798         if nic_op in self.nic_pinst:
8799           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8800         for key, val in nic_dict.iteritems():
8801           result.append(("nic.%s/%d" % (key, nic_op), val))
8802
8803     # hvparams changes
8804     if self.op.hvparams:
8805       instance.hvparams = self.hv_inst
8806       for key, val in self.op.hvparams.iteritems():
8807         result.append(("hv/%s" % key, val))
8808
8809     # beparams changes
8810     if self.op.beparams:
8811       instance.beparams = self.be_inst
8812       for key, val in self.op.beparams.iteritems():
8813         result.append(("be/%s" % key, val))
8814
8815     # OS change
8816     if self.op.os_name:
8817       instance.os = self.op.os_name
8818
8819     self.cfg.Update(instance, feedback_fn)
8820
8821     return result
8822
8823   _DISK_CONVERSIONS = {
8824     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8825     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8826     }
8827
8828 class LUQueryExports(NoHooksLU):
8829   """Query the exports list
8830
8831   """
8832   _OP_REQP = ['nodes']
8833   REQ_BGL = False
8834
8835   def ExpandNames(self):
8836     self.needed_locks = {}
8837     self.share_locks[locking.LEVEL_NODE] = 1
8838     if not self.op.nodes:
8839       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8840     else:
8841       self.needed_locks[locking.LEVEL_NODE] = \
8842         _GetWantedNodes(self, self.op.nodes)
8843
8844   def CheckPrereq(self):
8845     """Check prerequisites.
8846
8847     """
8848     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8849
8850   def Exec(self, feedback_fn):
8851     """Compute the list of all the exported system images.
8852
8853     @rtype: dict
8854     @return: a dictionary with the structure node->(export-list)
8855         where export-list is a list of the instances exported on
8856         that node.
8857
8858     """
8859     rpcresult = self.rpc.call_export_list(self.nodes)
8860     result = {}
8861     for node in rpcresult:
8862       if rpcresult[node].fail_msg:
8863         result[node] = False
8864       else:
8865         result[node] = rpcresult[node].payload
8866
8867     return result
8868
8869
8870 class LUExportInstance(LogicalUnit):
8871   """Export an instance to an image in the cluster.
8872
8873   """
8874   HPATH = "instance-export"
8875   HTYPE = constants.HTYPE_INSTANCE
8876   _OP_REQP = ["instance_name", "target_node", "shutdown"]
8877   REQ_BGL = False
8878
8879   def CheckArguments(self):
8880     """Check the arguments.
8881
8882     """
8883     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8884                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
8885
8886   def ExpandNames(self):
8887     self._ExpandAndLockInstance()
8888     # FIXME: lock only instance primary and destination node
8889     #
8890     # Sad but true, for now we have do lock all nodes, as we don't know where
8891     # the previous export might be, and and in this LU we search for it and
8892     # remove it from its current node. In the future we could fix this by:
8893     #  - making a tasklet to search (share-lock all), then create the new one,
8894     #    then one to remove, after
8895     #  - removing the removal operation altogether
8896     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8897
8898   def DeclareLocks(self, level):
8899     """Last minute lock declaration."""
8900     # All nodes are locked anyway, so nothing to do here.
8901
8902   def BuildHooksEnv(self):
8903     """Build hooks env.
8904
8905     This will run on the master, primary node and target node.
8906
8907     """
8908     env = {
8909       "EXPORT_NODE": self.op.target_node,
8910       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8911       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8912       }
8913     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8914     nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8915           self.op.target_node]
8916     return env, nl, nl
8917
8918   def CheckPrereq(self):
8919     """Check prerequisites.
8920
8921     This checks that the instance and node names are valid.
8922
8923     """
8924     instance_name = self.op.instance_name
8925     self.instance = self.cfg.GetInstanceInfo(instance_name)
8926     assert self.instance is not None, \
8927           "Cannot retrieve locked instance %s" % self.op.instance_name
8928     _CheckNodeOnline(self, self.instance.primary_node)
8929
8930     self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8931     self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8932     assert self.dst_node is not None
8933
8934     _CheckNodeOnline(self, self.dst_node.name)
8935     _CheckNodeNotDrained(self, self.dst_node.name)
8936
8937     # instance disk type verification
8938     for disk in self.instance.disks:
8939       if disk.dev_type == constants.LD_FILE:
8940         raise errors.OpPrereqError("Export not supported for instances with"
8941                                    " file-based disks", errors.ECODE_INVAL)
8942
8943   def _CreateSnapshots(self, feedback_fn):
8944     """Creates an LVM snapshot for every disk of the instance.
8945
8946     @return: List of snapshots as L{objects.Disk} instances
8947
8948     """
8949     instance = self.instance
8950     src_node = instance.primary_node
8951
8952     vgname = self.cfg.GetVGName()
8953
8954     snap_disks = []
8955
8956     for idx, disk in enumerate(instance.disks):
8957       feedback_fn("Creating a snapshot of disk/%s on node %s" %
8958                   (idx, src_node))
8959
8960       # result.payload will be a snapshot of an lvm leaf of the one we
8961       # passed
8962       result = self.rpc.call_blockdev_snapshot(src_node, disk)
8963       msg = result.fail_msg
8964       if msg:
8965         self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8966                         idx, src_node, msg)
8967         snap_disks.append(False)
8968       else:
8969         disk_id = (vgname, result.payload)
8970         new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8971                                logical_id=disk_id, physical_id=disk_id,
8972                                iv_name=disk.iv_name)
8973         snap_disks.append(new_dev)
8974
8975     return snap_disks
8976
8977   def _RemoveSnapshot(self, feedback_fn, snap_disks, disk_index):
8978     """Removes an LVM snapshot.
8979
8980     @type snap_disks: list
8981     @param snap_disks: The list of all snapshots as returned by
8982                        L{_CreateSnapshots}
8983     @type disk_index: number
8984     @param disk_index: Index of the snapshot to be removed
8985     @rtype: bool
8986     @return: Whether removal was successful or not
8987
8988     """
8989     disk = snap_disks[disk_index]
8990     if disk:
8991       src_node = self.instance.primary_node
8992
8993       feedback_fn("Removing snapshot of disk/%s on node %s" %
8994                   (disk_index, src_node))
8995
8996       result = self.rpc.call_blockdev_remove(src_node, disk)
8997       if not result.fail_msg:
8998         return True
8999
9000       self.LogWarning("Could not remove snapshot for disk/%d from node"
9001                       " %s: %s", disk_index, src_node, result.fail_msg)
9002
9003     return False
9004
9005   def _CleanupExports(self, feedback_fn):
9006     """Removes exports of current instance from all other nodes.
9007
9008     If an instance in a cluster with nodes A..D was exported to node C, its
9009     exports will be removed from the nodes A, B and D.
9010
9011     """
9012     nodelist = self.cfg.GetNodeList()
9013     nodelist.remove(self.dst_node.name)
9014
9015     # on one-node clusters nodelist will be empty after the removal
9016     # if we proceed the backup would be removed because OpQueryExports
9017     # substitutes an empty list with the full cluster node list.
9018     iname = self.instance.name
9019     if nodelist:
9020       feedback_fn("Removing old exports for instance %s" % iname)
9021       exportlist = self.rpc.call_export_list(nodelist)
9022       for node in exportlist:
9023         if exportlist[node].fail_msg:
9024           continue
9025         if iname in exportlist[node].payload:
9026           msg = self.rpc.call_export_remove(node, iname).fail_msg
9027           if msg:
9028             self.LogWarning("Could not remove older export for instance %s"
9029                             " on node %s: %s", iname, node, msg)
9030
9031   def Exec(self, feedback_fn):
9032     """Export an instance to an image in the cluster.
9033
9034     """
9035     instance = self.instance
9036     dst_node = self.dst_node
9037     src_node = instance.primary_node
9038
9039     if self.op.shutdown:
9040       # shutdown the instance, but not the disks
9041       feedback_fn("Shutting down instance %s" % instance.name)
9042       result = self.rpc.call_instance_shutdown(src_node, instance,
9043                                                self.shutdown_timeout)
9044       result.Raise("Could not shutdown instance %s on"
9045                    " node %s" % (instance.name, src_node))
9046
9047     # set the disks ID correctly since call_instance_start needs the
9048     # correct drbd minor to create the symlinks
9049     for disk in instance.disks:
9050       self.cfg.SetDiskID(disk, src_node)
9051
9052     activate_disks = (not instance.admin_up)
9053
9054     if activate_disks:
9055       # Activate the instance disks if we'exporting a stopped instance
9056       feedback_fn("Activating disks for %s" % instance.name)
9057       _StartInstanceDisks(self, instance, None)
9058
9059     try:
9060       # per-disk results
9061       dresults = []
9062       removed_snaps = [False] * len(instance.disks)
9063
9064       snap_disks = None
9065       try:
9066         try:
9067           snap_disks = self._CreateSnapshots(feedback_fn)
9068         finally:
9069           if self.op.shutdown and instance.admin_up:
9070             feedback_fn("Starting instance %s" % instance.name)
9071             result = self.rpc.call_instance_start(src_node, instance,
9072                                                   None, None)
9073             msg = result.fail_msg
9074             if msg:
9075               _ShutdownInstanceDisks(self, instance)
9076               raise errors.OpExecError("Could not start instance: %s" % msg)
9077
9078         assert len(snap_disks) == len(instance.disks)
9079         assert len(removed_snaps) == len(instance.disks)
9080
9081         # TODO: check for size
9082
9083         cluster_name = self.cfg.GetClusterName()
9084         for idx, dev in enumerate(snap_disks):
9085           feedback_fn("Exporting snapshot %s from %s to %s" %
9086                       (idx, src_node, dst_node.name))
9087           if dev:
9088             # FIXME: pass debug from opcode to backend
9089             result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
9090                                                    instance, cluster_name,
9091                                                    idx, self.op.debug_level)
9092             msg = result.fail_msg
9093             if msg:
9094               self.LogWarning("Could not export disk/%s from node %s to"
9095                               " node %s: %s", idx, src_node, dst_node.name, msg)
9096               dresults.append(False)
9097             else:
9098               dresults.append(True)
9099
9100             # Remove snapshot
9101             if self._RemoveSnapshot(feedback_fn, snap_disks, idx):
9102               removed_snaps[idx] = True
9103           else:
9104             dresults.append(False)
9105
9106         assert len(dresults) == len(instance.disks)
9107
9108         # Check for backwards compatibility
9109         assert compat.all(isinstance(i, bool) for i in dresults), \
9110                "Not all results are boolean: %r" % dresults
9111
9112         feedback_fn("Finalizing export on %s" % dst_node.name)
9113         result = self.rpc.call_finalize_export(dst_node.name, instance,
9114                                                snap_disks)
9115         msg = result.fail_msg
9116         fin_resu = not msg
9117         if msg:
9118           self.LogWarning("Could not finalize export for instance %s"
9119                           " on node %s: %s", instance.name, dst_node.name, msg)
9120
9121       finally:
9122         # Remove all snapshots
9123         assert len(removed_snaps) == len(instance.disks)
9124         for idx, removed in enumerate(removed_snaps):
9125           if not removed:
9126             self._RemoveSnapshot(feedback_fn, snap_disks, idx)
9127
9128     finally:
9129       if activate_disks:
9130         feedback_fn("Deactivating disks for %s" % instance.name)
9131         _ShutdownInstanceDisks(self, instance)
9132
9133     self._CleanupExports(feedback_fn)
9134
9135     return fin_resu, dresults
9136
9137
9138 class LURemoveExport(NoHooksLU):
9139   """Remove exports related to the named instance.
9140
9141   """
9142   _OP_REQP = ["instance_name"]
9143   REQ_BGL = False
9144
9145   def ExpandNames(self):
9146     self.needed_locks = {}
9147     # We need all nodes to be locked in order for RemoveExport to work, but we
9148     # don't need to lock the instance itself, as nothing will happen to it (and
9149     # we can remove exports also for a removed instance)
9150     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9151
9152   def CheckPrereq(self):
9153     """Check prerequisites.
9154     """
9155     pass
9156
9157   def Exec(self, feedback_fn):
9158     """Remove any export.
9159
9160     """
9161     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9162     # If the instance was not found we'll try with the name that was passed in.
9163     # This will only work if it was an FQDN, though.
9164     fqdn_warn = False
9165     if not instance_name:
9166       fqdn_warn = True
9167       instance_name = self.op.instance_name
9168
9169     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9170     exportlist = self.rpc.call_export_list(locked_nodes)
9171     found = False
9172     for node in exportlist:
9173       msg = exportlist[node].fail_msg
9174       if msg:
9175         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9176         continue
9177       if instance_name in exportlist[node].payload:
9178         found = True
9179         result = self.rpc.call_export_remove(node, instance_name)
9180         msg = result.fail_msg
9181         if msg:
9182           logging.error("Could not remove export for instance %s"
9183                         " on node %s: %s", instance_name, node, msg)
9184
9185     if fqdn_warn and not found:
9186       feedback_fn("Export not found. If trying to remove an export belonging"
9187                   " to a deleted instance please use its Fully Qualified"
9188                   " Domain Name.")
9189
9190
9191 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9192   """Generic tags LU.
9193
9194   This is an abstract class which is the parent of all the other tags LUs.
9195
9196   """
9197
9198   def ExpandNames(self):
9199     self.needed_locks = {}
9200     if self.op.kind == constants.TAG_NODE:
9201       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9202       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9203     elif self.op.kind == constants.TAG_INSTANCE:
9204       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9205       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9206
9207   def CheckPrereq(self):
9208     """Check prerequisites.
9209
9210     """
9211     if self.op.kind == constants.TAG_CLUSTER:
9212       self.target = self.cfg.GetClusterInfo()
9213     elif self.op.kind == constants.TAG_NODE:
9214       self.target = self.cfg.GetNodeInfo(self.op.name)
9215     elif self.op.kind == constants.TAG_INSTANCE:
9216       self.target = self.cfg.GetInstanceInfo(self.op.name)
9217     else:
9218       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9219                                  str(self.op.kind), errors.ECODE_INVAL)
9220
9221
9222 class LUGetTags(TagsLU):
9223   """Returns the tags of a given object.
9224
9225   """
9226   _OP_REQP = ["kind", "name"]
9227   REQ_BGL = False
9228
9229   def Exec(self, feedback_fn):
9230     """Returns the tag list.
9231
9232     """
9233     return list(self.target.GetTags())
9234
9235
9236 class LUSearchTags(NoHooksLU):
9237   """Searches the tags for a given pattern.
9238
9239   """
9240   _OP_REQP = ["pattern"]
9241   REQ_BGL = False
9242
9243   def ExpandNames(self):
9244     self.needed_locks = {}
9245
9246   def CheckPrereq(self):
9247     """Check prerequisites.
9248
9249     This checks the pattern passed for validity by compiling it.
9250
9251     """
9252     try:
9253       self.re = re.compile(self.op.pattern)
9254     except re.error, err:
9255       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9256                                  (self.op.pattern, err), errors.ECODE_INVAL)
9257
9258   def Exec(self, feedback_fn):
9259     """Returns the tag list.
9260
9261     """
9262     cfg = self.cfg
9263     tgts = [("/cluster", cfg.GetClusterInfo())]
9264     ilist = cfg.GetAllInstancesInfo().values()
9265     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9266     nlist = cfg.GetAllNodesInfo().values()
9267     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9268     results = []
9269     for path, target in tgts:
9270       for tag in target.GetTags():
9271         if self.re.search(tag):
9272           results.append((path, tag))
9273     return results
9274
9275
9276 class LUAddTags(TagsLU):
9277   """Sets a tag on a given object.
9278
9279   """
9280   _OP_REQP = ["kind", "name", "tags"]
9281   REQ_BGL = False
9282
9283   def CheckPrereq(self):
9284     """Check prerequisites.
9285
9286     This checks the type and length of the tag name and value.
9287
9288     """
9289     TagsLU.CheckPrereq(self)
9290     for tag in self.op.tags:
9291       objects.TaggableObject.ValidateTag(tag)
9292
9293   def Exec(self, feedback_fn):
9294     """Sets the tag.
9295
9296     """
9297     try:
9298       for tag in self.op.tags:
9299         self.target.AddTag(tag)
9300     except errors.TagError, err:
9301       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9302     self.cfg.Update(self.target, feedback_fn)
9303
9304
9305 class LUDelTags(TagsLU):
9306   """Delete a list of tags from a given object.
9307
9308   """
9309   _OP_REQP = ["kind", "name", "tags"]
9310   REQ_BGL = False
9311
9312   def CheckPrereq(self):
9313     """Check prerequisites.
9314
9315     This checks that we have the given tag.
9316
9317     """
9318     TagsLU.CheckPrereq(self)
9319     for tag in self.op.tags:
9320       objects.TaggableObject.ValidateTag(tag)
9321     del_tags = frozenset(self.op.tags)
9322     cur_tags = self.target.GetTags()
9323     if not del_tags <= cur_tags:
9324       diff_tags = del_tags - cur_tags
9325       diff_names = ["'%s'" % tag for tag in diff_tags]
9326       diff_names.sort()
9327       raise errors.OpPrereqError("Tag(s) %s not found" %
9328                                  (",".join(diff_names)), errors.ECODE_NOENT)
9329
9330   def Exec(self, feedback_fn):
9331     """Remove the tag from the object.
9332
9333     """
9334     for tag in self.op.tags:
9335       self.target.RemoveTag(tag)
9336     self.cfg.Update(self.target, feedback_fn)
9337
9338
9339 class LUTestDelay(NoHooksLU):
9340   """Sleep for a specified amount of time.
9341
9342   This LU sleeps on the master and/or nodes for a specified amount of
9343   time.
9344
9345   """
9346   _OP_REQP = ["duration", "on_master", "on_nodes"]
9347   REQ_BGL = False
9348
9349   def ExpandNames(self):
9350     """Expand names and set required locks.
9351
9352     This expands the node list, if any.
9353
9354     """
9355     self.needed_locks = {}
9356     if self.op.on_nodes:
9357       # _GetWantedNodes can be used here, but is not always appropriate to use
9358       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9359       # more information.
9360       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9361       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9362
9363   def CheckPrereq(self):
9364     """Check prerequisites.
9365
9366     """
9367
9368   def Exec(self, feedback_fn):
9369     """Do the actual sleep.
9370
9371     """
9372     if self.op.on_master:
9373       if not utils.TestDelay(self.op.duration):
9374         raise errors.OpExecError("Error during master delay test")
9375     if self.op.on_nodes:
9376       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9377       for node, node_result in result.items():
9378         node_result.Raise("Failure during rpc call to node %s" % node)
9379
9380
9381 class IAllocator(object):
9382   """IAllocator framework.
9383
9384   An IAllocator instance has three sets of attributes:
9385     - cfg that is needed to query the cluster
9386     - input data (all members of the _KEYS class attribute are required)
9387     - four buffer attributes (in|out_data|text), that represent the
9388       input (to the external script) in text and data structure format,
9389       and the output from it, again in two formats
9390     - the result variables from the script (success, info, nodes) for
9391       easy usage
9392
9393   """
9394   # pylint: disable-msg=R0902
9395   # lots of instance attributes
9396   _ALLO_KEYS = [
9397     "name", "mem_size", "disks", "disk_template",
9398     "os", "tags", "nics", "vcpus", "hypervisor",
9399     ]
9400   _RELO_KEYS = [
9401     "name", "relocate_from",
9402     ]
9403   _EVAC_KEYS = [
9404     "evac_nodes",
9405     ]
9406
9407   def __init__(self, cfg, rpc, mode, **kwargs):
9408     self.cfg = cfg
9409     self.rpc = rpc
9410     # init buffer variables
9411     self.in_text = self.out_text = self.in_data = self.out_data = None
9412     # init all input fields so that pylint is happy
9413     self.mode = mode
9414     self.mem_size = self.disks = self.disk_template = None
9415     self.os = self.tags = self.nics = self.vcpus = None
9416     self.hypervisor = None
9417     self.relocate_from = None
9418     self.name = None
9419     self.evac_nodes = None
9420     # computed fields
9421     self.required_nodes = None
9422     # init result fields
9423     self.success = self.info = self.result = None
9424     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9425       keyset = self._ALLO_KEYS
9426       fn = self._AddNewInstance
9427     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9428       keyset = self._RELO_KEYS
9429       fn = self._AddRelocateInstance
9430     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9431       keyset = self._EVAC_KEYS
9432       fn = self._AddEvacuateNodes
9433     else:
9434       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9435                                    " IAllocator" % self.mode)
9436     for key in kwargs:
9437       if key not in keyset:
9438         raise errors.ProgrammerError("Invalid input parameter '%s' to"
9439                                      " IAllocator" % key)
9440       setattr(self, key, kwargs[key])
9441
9442     for key in keyset:
9443       if key not in kwargs:
9444         raise errors.ProgrammerError("Missing input parameter '%s' to"
9445                                      " IAllocator" % key)
9446     self._BuildInputData(fn)
9447
9448   def _ComputeClusterData(self):
9449     """Compute the generic allocator input data.
9450
9451     This is the data that is independent of the actual operation.
9452
9453     """
9454     cfg = self.cfg
9455     cluster_info = cfg.GetClusterInfo()
9456     # cluster data
9457     data = {
9458       "version": constants.IALLOCATOR_VERSION,
9459       "cluster_name": cfg.GetClusterName(),
9460       "cluster_tags": list(cluster_info.GetTags()),
9461       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9462       # we don't have job IDs
9463       }
9464     iinfo = cfg.GetAllInstancesInfo().values()
9465     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9466
9467     # node data
9468     node_results = {}
9469     node_list = cfg.GetNodeList()
9470
9471     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9472       hypervisor_name = self.hypervisor
9473     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9474       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9475     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9476       hypervisor_name = cluster_info.enabled_hypervisors[0]
9477
9478     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9479                                         hypervisor_name)
9480     node_iinfo = \
9481       self.rpc.call_all_instances_info(node_list,
9482                                        cluster_info.enabled_hypervisors)
9483     for nname, nresult in node_data.items():
9484       # first fill in static (config-based) values
9485       ninfo = cfg.GetNodeInfo(nname)
9486       pnr = {
9487         "tags": list(ninfo.GetTags()),
9488         "primary_ip": ninfo.primary_ip,
9489         "secondary_ip": ninfo.secondary_ip,
9490         "offline": ninfo.offline,
9491         "drained": ninfo.drained,
9492         "master_candidate": ninfo.master_candidate,
9493         }
9494
9495       if not (ninfo.offline or ninfo.drained):
9496         nresult.Raise("Can't get data for node %s" % nname)
9497         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9498                                 nname)
9499         remote_info = nresult.payload
9500
9501         for attr in ['memory_total', 'memory_free', 'memory_dom0',
9502                      'vg_size', 'vg_free', 'cpu_total']:
9503           if attr not in remote_info:
9504             raise errors.OpExecError("Node '%s' didn't return attribute"
9505                                      " '%s'" % (nname, attr))
9506           if not isinstance(remote_info[attr], int):
9507             raise errors.OpExecError("Node '%s' returned invalid value"
9508                                      " for '%s': %s" %
9509                                      (nname, attr, remote_info[attr]))
9510         # compute memory used by primary instances
9511         i_p_mem = i_p_up_mem = 0
9512         for iinfo, beinfo in i_list:
9513           if iinfo.primary_node == nname:
9514             i_p_mem += beinfo[constants.BE_MEMORY]
9515             if iinfo.name not in node_iinfo[nname].payload:
9516               i_used_mem = 0
9517             else:
9518               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9519             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9520             remote_info['memory_free'] -= max(0, i_mem_diff)
9521
9522             if iinfo.admin_up:
9523               i_p_up_mem += beinfo[constants.BE_MEMORY]
9524
9525         # compute memory used by instances
9526         pnr_dyn = {
9527           "total_memory": remote_info['memory_total'],
9528           "reserved_memory": remote_info['memory_dom0'],
9529           "free_memory": remote_info['memory_free'],
9530           "total_disk": remote_info['vg_size'],
9531           "free_disk": remote_info['vg_free'],
9532           "total_cpus": remote_info['cpu_total'],
9533           "i_pri_memory": i_p_mem,
9534           "i_pri_up_memory": i_p_up_mem,
9535           }
9536         pnr.update(pnr_dyn)
9537
9538       node_results[nname] = pnr
9539     data["nodes"] = node_results
9540
9541     # instance data
9542     instance_data = {}
9543     for iinfo, beinfo in i_list:
9544       nic_data = []
9545       for nic in iinfo.nics:
9546         filled_params = objects.FillDict(
9547             cluster_info.nicparams[constants.PP_DEFAULT],
9548             nic.nicparams)
9549         nic_dict = {"mac": nic.mac,
9550                     "ip": nic.ip,
9551                     "mode": filled_params[constants.NIC_MODE],
9552                     "link": filled_params[constants.NIC_LINK],
9553                    }
9554         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9555           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9556         nic_data.append(nic_dict)
9557       pir = {
9558         "tags": list(iinfo.GetTags()),
9559         "admin_up": iinfo.admin_up,
9560         "vcpus": beinfo[constants.BE_VCPUS],
9561         "memory": beinfo[constants.BE_MEMORY],
9562         "os": iinfo.os,
9563         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9564         "nics": nic_data,
9565         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9566         "disk_template": iinfo.disk_template,
9567         "hypervisor": iinfo.hypervisor,
9568         }
9569       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9570                                                  pir["disks"])
9571       instance_data[iinfo.name] = pir
9572
9573     data["instances"] = instance_data
9574
9575     self.in_data = data
9576
9577   def _AddNewInstance(self):
9578     """Add new instance data to allocator structure.
9579
9580     This in combination with _AllocatorGetClusterData will create the
9581     correct structure needed as input for the allocator.
9582
9583     The checks for the completeness of the opcode must have already been
9584     done.
9585
9586     """
9587     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9588
9589     if self.disk_template in constants.DTS_NET_MIRROR:
9590       self.required_nodes = 2
9591     else:
9592       self.required_nodes = 1
9593     request = {
9594       "name": self.name,
9595       "disk_template": self.disk_template,
9596       "tags": self.tags,
9597       "os": self.os,
9598       "vcpus": self.vcpus,
9599       "memory": self.mem_size,
9600       "disks": self.disks,
9601       "disk_space_total": disk_space,
9602       "nics": self.nics,
9603       "required_nodes": self.required_nodes,
9604       }
9605     return request
9606
9607   def _AddRelocateInstance(self):
9608     """Add relocate instance data to allocator structure.
9609
9610     This in combination with _IAllocatorGetClusterData will create the
9611     correct structure needed as input for the allocator.
9612
9613     The checks for the completeness of the opcode must have already been
9614     done.
9615
9616     """
9617     instance = self.cfg.GetInstanceInfo(self.name)
9618     if instance is None:
9619       raise errors.ProgrammerError("Unknown instance '%s' passed to"
9620                                    " IAllocator" % self.name)
9621
9622     if instance.disk_template not in constants.DTS_NET_MIRROR:
9623       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9624                                  errors.ECODE_INVAL)
9625
9626     if len(instance.secondary_nodes) != 1:
9627       raise errors.OpPrereqError("Instance has not exactly one secondary node",
9628                                  errors.ECODE_STATE)
9629
9630     self.required_nodes = 1
9631     disk_sizes = [{'size': disk.size} for disk in instance.disks]
9632     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9633
9634     request = {
9635       "name": self.name,
9636       "disk_space_total": disk_space,
9637       "required_nodes": self.required_nodes,
9638       "relocate_from": self.relocate_from,
9639       }
9640     return request
9641
9642   def _AddEvacuateNodes(self):
9643     """Add evacuate nodes data to allocator structure.
9644
9645     """
9646     request = {
9647       "evac_nodes": self.evac_nodes
9648       }
9649     return request
9650
9651   def _BuildInputData(self, fn):
9652     """Build input data structures.
9653
9654     """
9655     self._ComputeClusterData()
9656
9657     request = fn()
9658     request["type"] = self.mode
9659     self.in_data["request"] = request
9660
9661     self.in_text = serializer.Dump(self.in_data)
9662
9663   def Run(self, name, validate=True, call_fn=None):
9664     """Run an instance allocator and return the results.
9665
9666     """
9667     if call_fn is None:
9668       call_fn = self.rpc.call_iallocator_runner
9669
9670     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9671     result.Raise("Failure while running the iallocator script")
9672
9673     self.out_text = result.payload
9674     if validate:
9675       self._ValidateResult()
9676
9677   def _ValidateResult(self):
9678     """Process the allocator results.
9679
9680     This will process and if successful save the result in
9681     self.out_data and the other parameters.
9682
9683     """
9684     try:
9685       rdict = serializer.Load(self.out_text)
9686     except Exception, err:
9687       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9688
9689     if not isinstance(rdict, dict):
9690       raise errors.OpExecError("Can't parse iallocator results: not a dict")
9691
9692     # TODO: remove backwards compatiblity in later versions
9693     if "nodes" in rdict and "result" not in rdict:
9694       rdict["result"] = rdict["nodes"]
9695       del rdict["nodes"]
9696
9697     for key in "success", "info", "result":
9698       if key not in rdict:
9699         raise errors.OpExecError("Can't parse iallocator results:"
9700                                  " missing key '%s'" % key)
9701       setattr(self, key, rdict[key])
9702
9703     if not isinstance(rdict["result"], list):
9704       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9705                                " is not a list")
9706     self.out_data = rdict
9707
9708
9709 class LUTestAllocator(NoHooksLU):
9710   """Run allocator tests.
9711
9712   This LU runs the allocator tests
9713
9714   """
9715   _OP_REQP = ["direction", "mode", "name"]
9716
9717   def CheckPrereq(self):
9718     """Check prerequisites.
9719
9720     This checks the opcode parameters depending on the director and mode test.
9721
9722     """
9723     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9724       for attr in ["name", "mem_size", "disks", "disk_template",
9725                    "os", "tags", "nics", "vcpus"]:
9726         if not hasattr(self.op, attr):
9727           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9728                                      attr, errors.ECODE_INVAL)
9729       iname = self.cfg.ExpandInstanceName(self.op.name)
9730       if iname is not None:
9731         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9732                                    iname, errors.ECODE_EXISTS)
9733       if not isinstance(self.op.nics, list):
9734         raise errors.OpPrereqError("Invalid parameter 'nics'",
9735                                    errors.ECODE_INVAL)
9736       for row in self.op.nics:
9737         if (not isinstance(row, dict) or
9738             "mac" not in row or
9739             "ip" not in row or
9740             "bridge" not in row):
9741           raise errors.OpPrereqError("Invalid contents of the 'nics'"
9742                                      " parameter", errors.ECODE_INVAL)
9743       if not isinstance(self.op.disks, list):
9744         raise errors.OpPrereqError("Invalid parameter 'disks'",
9745                                    errors.ECODE_INVAL)
9746       for row in self.op.disks:
9747         if (not isinstance(row, dict) or
9748             "size" not in row or
9749             not isinstance(row["size"], int) or
9750             "mode" not in row or
9751             row["mode"] not in ['r', 'w']):
9752           raise errors.OpPrereqError("Invalid contents of the 'disks'"
9753                                      " parameter", errors.ECODE_INVAL)
9754       if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9755         self.op.hypervisor = self.cfg.GetHypervisorType()
9756     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9757       if not hasattr(self.op, "name"):
9758         raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9759                                    errors.ECODE_INVAL)
9760       fname = _ExpandInstanceName(self.cfg, self.op.name)
9761       self.op.name = fname
9762       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9763     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9764       if not hasattr(self.op, "evac_nodes"):
9765         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9766                                    " opcode input", errors.ECODE_INVAL)
9767     else:
9768       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9769                                  self.op.mode, errors.ECODE_INVAL)
9770
9771     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9772       if not hasattr(self.op, "allocator") or self.op.allocator is None:
9773         raise errors.OpPrereqError("Missing allocator name",
9774                                    errors.ECODE_INVAL)
9775     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9776       raise errors.OpPrereqError("Wrong allocator test '%s'" %
9777                                  self.op.direction, errors.ECODE_INVAL)
9778
9779   def Exec(self, feedback_fn):
9780     """Run the allocator test.
9781
9782     """
9783     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9784       ial = IAllocator(self.cfg, self.rpc,
9785                        mode=self.op.mode,
9786                        name=self.op.name,
9787                        mem_size=self.op.mem_size,
9788                        disks=self.op.disks,
9789                        disk_template=self.op.disk_template,
9790                        os=self.op.os,
9791                        tags=self.op.tags,
9792                        nics=self.op.nics,
9793                        vcpus=self.op.vcpus,
9794                        hypervisor=self.op.hypervisor,
9795                        )
9796     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9797       ial = IAllocator(self.cfg, self.rpc,
9798                        mode=self.op.mode,
9799                        name=self.op.name,
9800                        relocate_from=list(self.relocate_from),
9801                        )
9802     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9803       ial = IAllocator(self.cfg, self.rpc,
9804                        mode=self.op.mode,
9805                        evac_nodes=self.op.evac_nodes)
9806     else:
9807       raise errors.ProgrammerError("Uncatched mode %s in"
9808                                    " LUTestAllocator.Exec", self.op.mode)
9809
9810     if self.op.direction == constants.IALLOCATOR_DIR_IN:
9811       result = ial.in_text
9812     else:
9813       ial.Run(self.op.allocator, validate=False)
9814       result = ial.out_text
9815     return result