code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 import os
  30 import os.path
  31 import time
  32 import re
  33 import platform
  34 import logging
  35 import copy
  36 import OpenSSL
  37
  38 from ganeti import ssh
  39 from ganeti import utils
  40 from ganeti import errors
  41 from ganeti import hypervisor
  42 from ganeti import locking
  43 from ganeti import constants
  44 from ganeti import objects
  45 from ganeti import serializer
  46 from ganeti import ssconf
  47 from ganeti import uidpool
  48
  49
  50 class LogicalUnit(object):
  51   """Logical Unit base class.
  52
  53   Subclasses must follow these rules:
  54     - implement ExpandNames
  55     - implement CheckPrereq (except when tasklets are used)
  56     - implement Exec (except when tasklets are used)
  57     - implement BuildHooksEnv
  58     - redefine HPATH and HTYPE
  59     - optionally redefine their run requirements:
  60         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  61
  62   Note that all commands require root permissions.
  63
  64   @ivar dry_run_result: the value (if any) that will be returned to the caller
  65       in dry-run mode (signalled by opcode dry_run parameter)
  66
  67   """
  68   HPATH = None
  69   HTYPE = None
  70   _OP_REQP = []
  71   REQ_BGL = True
  72
  73   def __init__(self, processor, op, context, rpc):
  74     """Constructor for LogicalUnit.
  75
  76     This needs to be overridden in derived classes in order to check op
  77     validity.
  78
  79     """
  80     self.proc = processor
  81     self.op = op
  82     self.cfg = context.cfg
  83     self.context = context
  84     self.rpc = rpc
  85     # Dicts used to declare locking needs to mcpu
  86     self.needed_locks = None
  87     self.acquired_locks = {}
  88     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
  89     self.add_locks = {}
  90     self.remove_locks = {}
  91     # Used to force good behavior when calling helper functions
  92     self.recalculate_locks = {}
  93     self.__ssh = None
  94     # logging
  95     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
  96     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
  97     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
  98     # support for dry-run
  99     self.dry_run_result = None
 100     # support for generic debug attribute
 101     if (not hasattr(self.op, "debug_level") or
 102         not isinstance(self.op.debug_level, int)):
 103       self.op.debug_level = 0
 104
 105     # Tasklets
 106     self.tasklets = None
 107
 108     for attr_name in self._OP_REQP:
 109       attr_val = getattr(op, attr_name, None)
 110       if attr_val is None:
 111         raise errors.OpPrereqError("Required parameter '%s' missing" %
 112                                    attr_name, errors.ECODE_INVAL)
 113
 114     self.CheckArguments()
 115
 116   def __GetSSH(self):
 117     """Returns the SshRunner object
 118
 119     """
 120     if not self.__ssh:
 121       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 122     return self.__ssh
 123
 124   ssh = property(fget=__GetSSH)
 125
 126   def CheckArguments(self):
 127     """Check syntactic validity for the opcode arguments.
 128
 129     This method is for doing a simple syntactic check and ensure
 130     validity of opcode parameters, without any cluster-related
 131     checks. While the same can be accomplished in ExpandNames and/or
 132     CheckPrereq, doing these separate is better because:
 133
 134       - ExpandNames is left as as purely a lock-related function
 135       - CheckPrereq is run after we have acquired locks (and possible
 136         waited for them)
 137
 138     The function is allowed to change the self.op attribute so that
 139     later methods can no longer worry about missing parameters.
 140
 141     """
 142     pass
 143
 144   def ExpandNames(self):
 145     """Expand names for this LU.
 146
 147     This method is called before starting to execute the opcode, and it should
 148     update all the parameters of the opcode to their canonical form (e.g. a
 149     short node name must be fully expanded after this method has successfully
 150     completed). This way locking, hooks, logging, ecc. can work correctly.
 151
 152     LUs which implement this method must also populate the self.needed_locks
 153     member, as a dict with lock levels as keys, and a list of needed lock names
 154     as values. Rules:
 155
 156       - use an empty dict if you don't need any lock
 157       - if you don't need any lock at a particular level omit that level
 158       - don't put anything for the BGL level
 159       - if you want all locks at a level use locking.ALL_SET as a value
 160
 161     If you need to share locks (rather than acquire them exclusively) at one
 162     level you can modify self.share_locks, setting a true value (usually 1) for
 163     that level. By default locks are not shared.
 164
 165     This function can also define a list of tasklets, which then will be
 166     executed in order instead of the usual LU-level CheckPrereq and Exec
 167     functions, if those are not defined by the LU.
 168
 169     Examples::
 170
 171       # Acquire all nodes and one instance
 172       self.needed_locks = {
 173         locking.LEVEL_NODE: locking.ALL_SET,
 174         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 175       }
 176       # Acquire just two nodes
 177       self.needed_locks = {
 178         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 179       }
 180       # Acquire no locks
 181       self.needed_locks = {} # No, you can't leave it to the default value None
 182
 183     """
 184     # The implementation of this method is mandatory only if the new LU is
 185     # concurrent, so that old LUs don't need to be changed all at the same
 186     # time.
 187     if self.REQ_BGL:
 188       self.needed_locks = {} # Exclusive LUs don't need locks.
 189     else:
 190       raise NotImplementedError
 191
 192   def DeclareLocks(self, level):
 193     """Declare LU locking needs for a level
 194
 195     While most LUs can just declare their locking needs at ExpandNames time,
 196     sometimes there's the need to calculate some locks after having acquired
 197     the ones before. This function is called just before acquiring locks at a
 198     particular level, but after acquiring the ones at lower levels, and permits
 199     such calculations. It can be used to modify self.needed_locks, and by
 200     default it does nothing.
 201
 202     This function is only called if you have something already set in
 203     self.needed_locks for the level.
 204
 205     @param level: Locking level which is going to be locked
 206     @type level: member of ganeti.locking.LEVELS
 207
 208     """
 209
 210   def CheckPrereq(self):
 211     """Check prerequisites for this LU.
 212
 213     This method should check that the prerequisites for the execution
 214     of this LU are fulfilled. It can do internode communication, but
 215     it should be idempotent - no cluster or system changes are
 216     allowed.
 217
 218     The method should raise errors.OpPrereqError in case something is
 219     not fulfilled. Its return value is ignored.
 220
 221     This method should also update all the parameters of the opcode to
 222     their canonical form if it hasn't been done by ExpandNames before.
 223
 224     """
 225     if self.tasklets is not None:
 226       for (idx, tl) in enumerate(self.tasklets):
 227         logging.debug("Checking prerequisites for tasklet %s/%s",
 228                       idx + 1, len(self.tasklets))
 229         tl.CheckPrereq()
 230     else:
 231       raise NotImplementedError
 232
 233   def Exec(self, feedback_fn):
 234     """Execute the LU.
 235
 236     This method should implement the actual work. It should raise
 237     errors.OpExecError for failures that are somewhat dealt with in
 238     code, or expected.
 239
 240     """
 241     if self.tasklets is not None:
 242       for (idx, tl) in enumerate(self.tasklets):
 243         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 244         tl.Exec(feedback_fn)
 245     else:
 246       raise NotImplementedError
 247
 248   def BuildHooksEnv(self):
 249     """Build hooks environment for this LU.
 250
 251     This method should return a three-node tuple consisting of: a dict
 252     containing the environment that will be used for running the
 253     specific hook for this LU, a list of node names on which the hook
 254     should run before the execution, and a list of node names on which
 255     the hook should run after the execution.
 256
 257     The keys of the dict must not have 'GANETI_' prefixed as this will
 258     be handled in the hooks runner. Also note additional keys will be
 259     added by the hooks runner. If the LU doesn't define any
 260     environment, an empty dict (and not None) should be returned.
 261
 262     No nodes should be returned as an empty list (and not None).
 263
 264     Note that if the HPATH for a LU class is None, this function will
 265     not be called.
 266
 267     """
 268     raise NotImplementedError
 269
 270   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 271     """Notify the LU about the results of its hooks.
 272
 273     This method is called every time a hooks phase is executed, and notifies
 274     the Logical Unit about the hooks' result. The LU can then use it to alter
 275     its result based on the hooks.  By default the method does nothing and the
 276     previous result is passed back unchanged but any LU can define it if it
 277     wants to use the local cluster hook-scripts somehow.
 278
 279     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 280         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 281     @param hook_results: the results of the multi-node hooks rpc call
 282     @param feedback_fn: function used send feedback back to the caller
 283     @param lu_result: the previous Exec result this LU had, or None
 284         in the PRE phase
 285     @return: the new Exec result, based on the previous result
 286         and hook results
 287
 288     """
 289     # API must be kept, thus we ignore the unused argument and could
 290     # be a function warnings
 291     # pylint: disable-msg=W0613,R0201
 292     return lu_result
 293
 294   def _ExpandAndLockInstance(self):
 295     """Helper function to expand and lock an instance.
 296
 297     Many LUs that work on an instance take its name in self.op.instance_name
 298     and need to expand it and then declare the expanded name for locking. This
 299     function does it, and then updates self.op.instance_name to the expanded
 300     name. It also initializes needed_locks as a dict, if this hasn't been done
 301     before.
 302
 303     """
 304     if self.needed_locks is None:
 305       self.needed_locks = {}
 306     else:
 307       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 308         "_ExpandAndLockInstance called with instance-level locks set"
 309     self.op.instance_name = _ExpandInstanceName(self.cfg,
 310                                                 self.op.instance_name)
 311     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 312
 313   def _LockInstancesNodes(self, primary_only=False):
 314     """Helper function to declare instances' nodes for locking.
 315
 316     This function should be called after locking one or more instances to lock
 317     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 318     with all primary or secondary nodes for instances already locked and
 319     present in self.needed_locks[locking.LEVEL_INSTANCE].
 320
 321     It should be called from DeclareLocks, and for safety only works if
 322     self.recalculate_locks[locking.LEVEL_NODE] is set.
 323
 324     In the future it may grow parameters to just lock some instance's nodes, or
 325     to just lock primaries or secondary nodes, if needed.
 326
 327     If should be called in DeclareLocks in a way similar to::
 328
 329       if level == locking.LEVEL_NODE:
 330         self._LockInstancesNodes()
 331
 332     @type primary_only: boolean
 333     @param primary_only: only lock primary nodes of locked instances
 334
 335     """
 336     assert locking.LEVEL_NODE in self.recalculate_locks, \
 337       "_LockInstancesNodes helper function called with no nodes to recalculate"
 338
 339     # TODO: check if we're really been called with the instance locks held
 340
 341     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 342     # future we might want to have different behaviors depending on the value
 343     # of self.recalculate_locks[locking.LEVEL_NODE]
 344     wanted_nodes = []
 345     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 346       instance = self.context.cfg.GetInstanceInfo(instance_name)
 347       wanted_nodes.append(instance.primary_node)
 348       if not primary_only:
 349         wanted_nodes.extend(instance.secondary_nodes)
 350
 351     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 352       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 353     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 354       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 355
 356     del self.recalculate_locks[locking.LEVEL_NODE]
 357
 358
 359 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 360   """Simple LU which runs no hooks.
 361
 362   This LU is intended as a parent for other LogicalUnits which will
 363   run no hooks, in order to reduce duplicate code.
 364
 365   """
 366   HPATH = None
 367   HTYPE = None
 368
 369   def BuildHooksEnv(self):
 370     """Empty BuildHooksEnv for NoHooksLu.
 371
 372     This just raises an error.
 373
 374     """
 375     assert False, "BuildHooksEnv called for NoHooksLUs"
 376
 377
 378 class Tasklet:
 379   """Tasklet base class.
 380
 381   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 382   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 383   tasklets know nothing about locks.
 384
 385   Subclasses must follow these rules:
 386     - Implement CheckPrereq
 387     - Implement Exec
 388
 389   """
 390   def __init__(self, lu):
 391     self.lu = lu
 392
 393     # Shortcuts
 394     self.cfg = lu.cfg
 395     self.rpc = lu.rpc
 396
 397   def CheckPrereq(self):
 398     """Check prerequisites for this tasklets.
 399
 400     This method should check whether the prerequisites for the execution of
 401     this tasklet are fulfilled. It can do internode communication, but it
 402     should be idempotent - no cluster or system changes are allowed.
 403
 404     The method should raise errors.OpPrereqError in case something is not
 405     fulfilled. Its return value is ignored.
 406
 407     This method should also update all parameters to their canonical form if it
 408     hasn't been done before.
 409
 410     """
 411     raise NotImplementedError
 412
 413   def Exec(self, feedback_fn):
 414     """Execute the tasklet.
 415
 416     This method should implement the actual work. It should raise
 417     errors.OpExecError for failures that are somewhat dealt with in code, or
 418     expected.
 419
 420     """
 421     raise NotImplementedError
 422
 423
 424 def _GetWantedNodes(lu, nodes):
 425   """Returns list of checked and expanded node names.
 426
 427   @type lu: L{LogicalUnit}
 428   @param lu: the logical unit on whose behalf we execute
 429   @type nodes: list
 430   @param nodes: list of node names or None for all nodes
 431   @rtype: list
 432   @return: the list of nodes, sorted
 433   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 434
 435   """
 436   if not isinstance(nodes, list):
 437     raise errors.OpPrereqError("Invalid argument type 'nodes'",
 438                                errors.ECODE_INVAL)
 439
 440   if not nodes:
 441     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 442       " non-empty list of nodes whose name is to be expanded.")
 443
 444   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 445   return utils.NiceSort(wanted)
 446
 447
 448 def _GetWantedInstances(lu, instances):
 449   """Returns list of checked and expanded instance names.
 450
 451   @type lu: L{LogicalUnit}
 452   @param lu: the logical unit on whose behalf we execute
 453   @type instances: list
 454   @param instances: list of instance names or None for all instances
 455   @rtype: list
 456   @return: the list of instances, sorted
 457   @raise errors.OpPrereqError: if the instances parameter is wrong type
 458   @raise errors.OpPrereqError: if any of the passed instances is not found
 459
 460   """
 461   if not isinstance(instances, list):
 462     raise errors.OpPrereqError("Invalid argument type 'instances'",
 463                                errors.ECODE_INVAL)
 464
 465   if instances:
 466     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 467   else:
 468     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 469   return wanted
 470
 471
 472 def _CheckOutputFields(static, dynamic, selected):
 473   """Checks whether all selected fields are valid.
 474
 475   @type static: L{utils.FieldSet}
 476   @param static: static fields set
 477   @type dynamic: L{utils.FieldSet}
 478   @param dynamic: dynamic fields set
 479
 480   """
 481   f = utils.FieldSet()
 482   f.Extend(static)
 483   f.Extend(dynamic)
 484
 485   delta = f.NonMatching(selected)
 486   if delta:
 487     raise errors.OpPrereqError("Unknown output fields selected: %s"
 488                                % ",".join(delta), errors.ECODE_INVAL)
 489
 490
 491 def _CheckBooleanOpField(op, name):
 492   """Validates boolean opcode parameters.
 493
 494   This will ensure that an opcode parameter is either a boolean value,
 495   or None (but that it always exists).
 496
 497   """
 498   val = getattr(op, name, None)
 499   if not (val is None or isinstance(val, bool)):
 500     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 501                                (name, str(val)), errors.ECODE_INVAL)
 502   setattr(op, name, val)
 503
 504
 505 def _CheckGlobalHvParams(params):
 506   """Validates that given hypervisor params are not global ones.
 507
 508   This will ensure that instances don't get customised versions of
 509   global params.
 510
 511   """
 512   used_globals = constants.HVC_GLOBALS.intersection(params)
 513   if used_globals:
 514     msg = ("The following hypervisor parameters are global and cannot"
 515            " be customized at instance level, please modify them at"
 516            " cluster level: %s" % utils.CommaJoin(used_globals))
 517     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 518
 519
 520 def _CheckNodeOnline(lu, node):
 521   """Ensure that a given node is online.
 522
 523   @param lu: the LU on behalf of which we make the check
 524   @param node: the node to check
 525   @raise errors.OpPrereqError: if the node is offline
 526
 527   """
 528   if lu.cfg.GetNodeInfo(node).offline:
 529     raise errors.OpPrereqError("Can't use offline node %s" % node,
 530                                errors.ECODE_INVAL)
 531
 532
 533 def _CheckNodeNotDrained(lu, node):
 534   """Ensure that a given node is not drained.
 535
 536   @param lu: the LU on behalf of which we make the check
 537   @param node: the node to check
 538   @raise errors.OpPrereqError: if the node is drained
 539
 540   """
 541   if lu.cfg.GetNodeInfo(node).drained:
 542     raise errors.OpPrereqError("Can't use drained node %s" % node,
 543                                errors.ECODE_INVAL)
 544
 545
 546 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 547   """Ensure that a node supports a given OS.
 548
 549   @param lu: the LU on behalf of which we make the check
 550   @param node: the node to check
 551   @param os_name: the OS to query about
 552   @param force_variant: whether to ignore variant errors
 553   @raise errors.OpPrereqError: if the node is not supporting the OS
 554
 555   """
 556   result = lu.rpc.call_os_get(node, os_name)
 557   result.Raise("OS '%s' not in supported OS list for node %s" %
 558                (os_name, node),
 559                prereq=True, ecode=errors.ECODE_INVAL)
 560   if not force_variant:
 561     _CheckOSVariant(result.payload, os_name)
 562
 563
 564 def _RequireFileStorage():
 565   """Checks that file storage is enabled.
 566
 567   @raise errors.OpPrereqError: when file storage is disabled
 568
 569   """
 570   if not constants.ENABLE_FILE_STORAGE:
 571     raise errors.OpPrereqError("File storage disabled at configure time",
 572                                errors.ECODE_INVAL)
 573
 574
 575 def _CheckDiskTemplate(template):
 576   """Ensure a given disk template is valid.
 577
 578   """
 579   if template not in constants.DISK_TEMPLATES:
 580     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 581            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 582     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 583   if template == constants.DT_FILE:
 584     _RequireFileStorage()
 585
 586
 587 def _CheckStorageType(storage_type):
 588   """Ensure a given storage type is valid.
 589
 590   """
 591   if storage_type not in constants.VALID_STORAGE_TYPES:
 592     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 593                                errors.ECODE_INVAL)
 594   if storage_type == constants.ST_FILE:
 595     _RequireFileStorage()
 596
 597
 598
 599 def _CheckInstanceDown(lu, instance, reason):
 600   """Ensure that an instance is not running."""
 601   if instance.admin_up:
 602     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 603                                (instance.name, reason), errors.ECODE_STATE)
 604
 605   pnode = instance.primary_node
 606   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 607   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 608               prereq=True, ecode=errors.ECODE_ENVIRON)
 609
 610   if instance.name in ins_l.payload:
 611     raise errors.OpPrereqError("Instance %s is running, %s" %
 612                                (instance.name, reason), errors.ECODE_STATE)
 613
 614
 615 def _ExpandItemName(fn, name, kind):
 616   """Expand an item name.
 617
 618   @param fn: the function to use for expansion
 619   @param name: requested item name
 620   @param kind: text description ('Node' or 'Instance')
 621   @return: the resolved (full) name
 622   @raise errors.OpPrereqError: if the item is not found
 623
 624   """
 625   full_name = fn(name)
 626   if full_name is None:
 627     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 628                                errors.ECODE_NOENT)
 629   return full_name
 630
 631
 632 def _ExpandNodeName(cfg, name):
 633   """Wrapper over L{_ExpandItemName} for nodes."""
 634   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 635
 636
 637 def _ExpandInstanceName(cfg, name):
 638   """Wrapper over L{_ExpandItemName} for instance."""
 639   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 640
 641
 642 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 643                           memory, vcpus, nics, disk_template, disks,
 644                           bep, hvp, hypervisor_name):
 645   """Builds instance related env variables for hooks
 646
 647   This builds the hook environment from individual variables.
 648
 649   @type name: string
 650   @param name: the name of the instance
 651   @type primary_node: string
 652   @param primary_node: the name of the instance's primary node
 653   @type secondary_nodes: list
 654   @param secondary_nodes: list of secondary nodes as strings
 655   @type os_type: string
 656   @param os_type: the name of the instance's OS
 657   @type status: boolean
 658   @param status: the should_run status of the instance
 659   @type memory: string
 660   @param memory: the memory size of the instance
 661   @type vcpus: string
 662   @param vcpus: the count of VCPUs the instance has
 663   @type nics: list
 664   @param nics: list of tuples (ip, mac, mode, link) representing
 665       the NICs the instance has
 666   @type disk_template: string
 667   @param disk_template: the disk template of the instance
 668   @type disks: list
 669   @param disks: the list of (size, mode) pairs
 670   @type bep: dict
 671   @param bep: the backend parameters for the instance
 672   @type hvp: dict
 673   @param hvp: the hypervisor parameters for the instance
 674   @type hypervisor_name: string
 675   @param hypervisor_name: the hypervisor for the instance
 676   @rtype: dict
 677   @return: the hook environment for this instance
 678
 679   """
 680   if status:
 681     str_status = "up"
 682   else:
 683     str_status = "down"
 684   env = {
 685     "OP_TARGET": name,
 686     "INSTANCE_NAME": name,
 687     "INSTANCE_PRIMARY": primary_node,
 688     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 689     "INSTANCE_OS_TYPE": os_type,
 690     "INSTANCE_STATUS": str_status,
 691     "INSTANCE_MEMORY": memory,
 692     "INSTANCE_VCPUS": vcpus,
 693     "INSTANCE_DISK_TEMPLATE": disk_template,
 694     "INSTANCE_HYPERVISOR": hypervisor_name,
 695   }
 696
 697   if nics:
 698     nic_count = len(nics)
 699     for idx, (ip, mac, mode, link) in enumerate(nics):
 700       if ip is None:
 701         ip = ""
 702       env["INSTANCE_NIC%d_IP" % idx] = ip
 703       env["INSTANCE_NIC%d_MAC" % idx] = mac
 704       env["INSTANCE_NIC%d_MODE" % idx] = mode
 705       env["INSTANCE_NIC%d_LINK" % idx] = link
 706       if mode == constants.NIC_MODE_BRIDGED:
 707         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 708   else:
 709     nic_count = 0
 710
 711   env["INSTANCE_NIC_COUNT"] = nic_count
 712
 713   if disks:
 714     disk_count = len(disks)
 715     for idx, (size, mode) in enumerate(disks):
 716       env["INSTANCE_DISK%d_SIZE" % idx] = size
 717       env["INSTANCE_DISK%d_MODE" % idx] = mode
 718   else:
 719     disk_count = 0
 720
 721   env["INSTANCE_DISK_COUNT"] = disk_count
 722
 723   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 724     for key, value in source.items():
 725       env["INSTANCE_%s_%s" % (kind, key)] = value
 726
 727   return env
 728
 729
 730 def _NICListToTuple(lu, nics):
 731   """Build a list of nic information tuples.
 732
 733   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 734   value in LUQueryInstanceData.
 735
 736   @type lu:  L{LogicalUnit}
 737   @param lu: the logical unit on whose behalf we execute
 738   @type nics: list of L{objects.NIC}
 739   @param nics: list of nics to convert to hooks tuples
 740
 741   """
 742   hooks_nics = []
 743   c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
 744   for nic in nics:
 745     ip = nic.ip
 746     mac = nic.mac
 747     filled_params = objects.FillDict(c_nicparams, nic.nicparams)
 748     mode = filled_params[constants.NIC_MODE]
 749     link = filled_params[constants.NIC_LINK]
 750     hooks_nics.append((ip, mac, mode, link))
 751   return hooks_nics
 752
 753
 754 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 755   """Builds instance related env variables for hooks from an object.
 756
 757   @type lu: L{LogicalUnit}
 758   @param lu: the logical unit on whose behalf we execute
 759   @type instance: L{objects.Instance}
 760   @param instance: the instance for which we should build the
 761       environment
 762   @type override: dict
 763   @param override: dictionary with key/values that will override
 764       our values
 765   @rtype: dict
 766   @return: the hook environment dictionary
 767
 768   """
 769   cluster = lu.cfg.GetClusterInfo()
 770   bep = cluster.FillBE(instance)
 771   hvp = cluster.FillHV(instance)
 772   args = {
 773     'name': instance.name,
 774     'primary_node': instance.primary_node,
 775     'secondary_nodes': instance.secondary_nodes,
 776     'os_type': instance.os,
 777     'status': instance.admin_up,
 778     'memory': bep[constants.BE_MEMORY],
 779     'vcpus': bep[constants.BE_VCPUS],
 780     'nics': _NICListToTuple(lu, instance.nics),
 781     'disk_template': instance.disk_template,
 782     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 783     'bep': bep,
 784     'hvp': hvp,
 785     'hypervisor_name': instance.hypervisor,
 786   }
 787   if override:
 788     args.update(override)
 789   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 790
 791
 792 def _AdjustCandidatePool(lu, exceptions):
 793   """Adjust the candidate pool after node operations.
 794
 795   """
 796   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 797   if mod_list:
 798     lu.LogInfo("Promoted nodes to master candidate role: %s",
 799                utils.CommaJoin(node.name for node in mod_list))
 800     for name in mod_list:
 801       lu.context.ReaddNode(name)
 802   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 803   if mc_now > mc_max:
 804     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 805                (mc_now, mc_max))
 806
 807
 808 def _DecideSelfPromotion(lu, exceptions=None):
 809   """Decide whether I should promote myself as a master candidate.
 810
 811   """
 812   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 813   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 814   # the new node will increase mc_max with one, so:
 815   mc_should = min(mc_should + 1, cp_size)
 816   return mc_now < mc_should
 817
 818
 819 def _CheckNicsBridgesExist(lu, target_nics, target_node,
 820                                profile=constants.PP_DEFAULT):
 821   """Check that the brigdes needed by a list of nics exist.
 822
 823   """
 824   c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
 825   paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
 826                 for nic in target_nics]
 827   brlist = [params[constants.NIC_LINK] for params in paramslist
 828             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 829   if brlist:
 830     result = lu.rpc.call_bridges_exist(target_node, brlist)
 831     result.Raise("Error checking bridges on destination node '%s'" %
 832                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 833
 834
 835 def _CheckInstanceBridgesExist(lu, instance, node=None):
 836   """Check that the brigdes needed by an instance exist.
 837
 838   """
 839   if node is None:
 840     node = instance.primary_node
 841   _CheckNicsBridgesExist(lu, instance.nics, node)
 842
 843
 844 def _CheckOSVariant(os_obj, name):
 845   """Check whether an OS name conforms to the os variants specification.
 846
 847   @type os_obj: L{objects.OS}
 848   @param os_obj: OS object to check
 849   @type name: string
 850   @param name: OS name passed by the user, to check for validity
 851
 852   """
 853   if not os_obj.supported_variants:
 854     return
 855   try:
 856     variant = name.split("+", 1)[1]
 857   except IndexError:
 858     raise errors.OpPrereqError("OS name must include a variant",
 859                                errors.ECODE_INVAL)
 860
 861   if variant not in os_obj.supported_variants:
 862     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
 863
 864
 865 def _GetNodeInstancesInner(cfg, fn):
 866   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 867
 868
 869 def _GetNodeInstances(cfg, node_name):
 870   """Returns a list of all primary and secondary instances on a node.
 871
 872   """
 873
 874   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
 875
 876
 877 def _GetNodePrimaryInstances(cfg, node_name):
 878   """Returns primary instances on a node.
 879
 880   """
 881   return _GetNodeInstancesInner(cfg,
 882                                 lambda inst: node_name == inst.primary_node)
 883
 884
 885 def _GetNodeSecondaryInstances(cfg, node_name):
 886   """Returns secondary instances on a node.
 887
 888   """
 889   return _GetNodeInstancesInner(cfg,
 890                                 lambda inst: node_name in inst.secondary_nodes)
 891
 892
 893 def _GetStorageTypeArgs(cfg, storage_type):
 894   """Returns the arguments for a storage type.
 895
 896   """
 897   # Special case for file storage
 898   if storage_type == constants.ST_FILE:
 899     # storage.FileStorage wants a list of storage directories
 900     return [[cfg.GetFileStorageDir()]]
 901
 902   return []
 903
 904
 905 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
 906   faulty = []
 907
 908   for dev in instance.disks:
 909     cfg.SetDiskID(dev, node_name)
 910
 911   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
 912   result.Raise("Failed to get disk status from node %s" % node_name,
 913                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 914
 915   for idx, bdev_status in enumerate(result.payload):
 916     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
 917       faulty.append(idx)
 918
 919   return faulty
 920
 921
 922 def _FormatTimestamp(secs):
 923   """Formats a Unix timestamp with the local timezone.
 924
 925   """
 926   return time.strftime("%F %T %Z", time.gmtime(secs))
 927
 928
 929 class LUPostInitCluster(LogicalUnit):
 930   """Logical unit for running hooks after cluster initialization.
 931
 932   """
 933   HPATH = "cluster-init"
 934   HTYPE = constants.HTYPE_CLUSTER
 935   _OP_REQP = []
 936
 937   def BuildHooksEnv(self):
 938     """Build hooks env.
 939
 940     """
 941     env = {"OP_TARGET": self.cfg.GetClusterName()}
 942     mn = self.cfg.GetMasterNode()
 943     return env, [], [mn]
 944
 945   def CheckPrereq(self):
 946     """No prerequisites to check.
 947
 948     """
 949     return True
 950
 951   def Exec(self, feedback_fn):
 952     """Nothing to do.
 953
 954     """
 955     return True
 956
 957
 958 class LUDestroyCluster(LogicalUnit):
 959   """Logical unit for destroying the cluster.
 960
 961   """
 962   HPATH = "cluster-destroy"
 963   HTYPE = constants.HTYPE_CLUSTER
 964   _OP_REQP = []
 965
 966   def BuildHooksEnv(self):
 967     """Build hooks env.
 968
 969     """
 970     env = {"OP_TARGET": self.cfg.GetClusterName()}
 971     return env, [], []
 972
 973   def CheckPrereq(self):
 974     """Check prerequisites.
 975
 976     This checks whether the cluster is empty.
 977
 978     Any errors are signaled by raising errors.OpPrereqError.
 979
 980     """
 981     master = self.cfg.GetMasterNode()
 982
 983     nodelist = self.cfg.GetNodeList()
 984     if len(nodelist) != 1 or nodelist[0] != master:
 985       raise errors.OpPrereqError("There are still %d node(s) in"
 986                                  " this cluster." % (len(nodelist) - 1),
 987                                  errors.ECODE_INVAL)
 988     instancelist = self.cfg.GetInstanceList()
 989     if instancelist:
 990       raise errors.OpPrereqError("There are still %d instance(s) in"
 991                                  " this cluster." % len(instancelist),
 992                                  errors.ECODE_INVAL)
 993
 994   def Exec(self, feedback_fn):
 995     """Destroys the cluster.
 996
 997     """
 998     master = self.cfg.GetMasterNode()
 999     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1000
1001     # Run post hooks on master node before it's removed
1002     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1003     try:
1004       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1005     except:
1006       # pylint: disable-msg=W0702
1007       self.LogWarning("Errors occurred running hooks on %s" % master)
1008
1009     result = self.rpc.call_node_stop_master(master, False)
1010     result.Raise("Could not disable the master role")
1011
1012     if modify_ssh_setup:
1013       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1014       utils.CreateBackup(priv_key)
1015       utils.CreateBackup(pub_key)
1016
1017     return master
1018
1019
1020 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
1021                             warn_days=constants.SSL_CERT_EXPIRATION_WARN,
1022                             error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1023   """Verifies certificate details for LUVerifyCluster.
1024
1025   """
1026   if expired:
1027     msg = "Certificate %s is expired" % filename
1028
1029     if not_before is not None and not_after is not None:
1030       msg += (" (valid from %s to %s)" %
1031               (_FormatTimestamp(not_before),
1032                _FormatTimestamp(not_after)))
1033     elif not_before is not None:
1034       msg += " (valid from %s)" % _FormatTimestamp(not_before)
1035     elif not_after is not None:
1036       msg += " (valid until %s)" % _FormatTimestamp(not_after)
1037
1038     return (LUVerifyCluster.ETYPE_ERROR, msg)
1039
1040   elif not_before is not None and not_before > now:
1041     return (LUVerifyCluster.ETYPE_WARNING,
1042             "Certificate %s not yet valid (valid from %s)" %
1043             (filename, _FormatTimestamp(not_before)))
1044
1045   elif not_after is not None:
1046     remaining_days = int((not_after - now) / (24 * 3600))
1047
1048     msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1049
1050     if remaining_days <= error_days:
1051       return (LUVerifyCluster.ETYPE_ERROR, msg)
1052
1053     if remaining_days <= warn_days:
1054       return (LUVerifyCluster.ETYPE_WARNING, msg)
1055
1056   return (None, None)
1057
1058
1059 def _VerifyCertificate(filename):
1060   """Verifies a certificate for LUVerifyCluster.
1061
1062   @type filename: string
1063   @param filename: Path to PEM file
1064
1065   """
1066   try:
1067     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1068                                            utils.ReadFile(filename))
1069   except Exception, err: # pylint: disable-msg=W0703
1070     return (LUVerifyCluster.ETYPE_ERROR,
1071             "Failed to load X509 certificate %s: %s" % (filename, err))
1072
1073   # Depending on the pyOpenSSL version, this can just return (None, None)
1074   (not_before, not_after) = utils.GetX509CertValidity(cert)
1075
1076   return _VerifyCertificateInner(filename, cert.has_expired(),
1077                                  not_before, not_after, time.time())
1078
1079
1080 class LUVerifyCluster(LogicalUnit):
1081   """Verifies the cluster status.
1082
1083   """
1084   HPATH = "cluster-verify"
1085   HTYPE = constants.HTYPE_CLUSTER
1086   _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1087   REQ_BGL = False
1088
1089   TCLUSTER = "cluster"
1090   TNODE = "node"
1091   TINSTANCE = "instance"
1092
1093   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1094   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1095   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1096   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1097   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1098   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1099   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1100   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1101   ENODEDRBD = (TNODE, "ENODEDRBD")
1102   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1103   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1104   ENODEHV = (TNODE, "ENODEHV")
1105   ENODELVM = (TNODE, "ENODELVM")
1106   ENODEN1 = (TNODE, "ENODEN1")
1107   ENODENET = (TNODE, "ENODENET")
1108   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1109   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1110   ENODERPC = (TNODE, "ENODERPC")
1111   ENODESSH = (TNODE, "ENODESSH")
1112   ENODEVERSION = (TNODE, "ENODEVERSION")
1113   ENODESETUP = (TNODE, "ENODESETUP")
1114   ENODETIME = (TNODE, "ENODETIME")
1115
1116   ETYPE_FIELD = "code"
1117   ETYPE_ERROR = "ERROR"
1118   ETYPE_WARNING = "WARNING"
1119
1120   class NodeImage(object):
1121     """A class representing the logical and physical status of a node.
1122
1123     @ivar volumes: a structure as returned from
1124         L{ganeti.backend.GetVolumeList} (runtime)
1125     @ivar instances: a list of running instances (runtime)
1126     @ivar pinst: list of configured primary instances (config)
1127     @ivar sinst: list of configured secondary instances (config)
1128     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1129         of this node (config)
1130     @ivar mfree: free memory, as reported by hypervisor (runtime)
1131     @ivar dfree: free disk, as reported by the node (runtime)
1132     @ivar offline: the offline status (config)
1133     @type rpc_fail: boolean
1134     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1135         not whether the individual keys were correct) (runtime)
1136     @type lvm_fail: boolean
1137     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1138     @type hyp_fail: boolean
1139     @ivar hyp_fail: whether the RPC call didn't return the instance list
1140     @type ghost: boolean
1141     @ivar ghost: whether this is a known node or not (config)
1142
1143     """
1144     def __init__(self, offline=False):
1145       self.volumes = {}
1146       self.instances = []
1147       self.pinst = []
1148       self.sinst = []
1149       self.sbp = {}
1150       self.mfree = 0
1151       self.dfree = 0
1152       self.offline = offline
1153       self.rpc_fail = False
1154       self.lvm_fail = False
1155       self.hyp_fail = False
1156       self.ghost = False
1157
1158   def ExpandNames(self):
1159     self.needed_locks = {
1160       locking.LEVEL_NODE: locking.ALL_SET,
1161       locking.LEVEL_INSTANCE: locking.ALL_SET,
1162     }
1163     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1164
1165   def _Error(self, ecode, item, msg, *args, **kwargs):
1166     """Format an error message.
1167
1168     Based on the opcode's error_codes parameter, either format a
1169     parseable error code, or a simpler error string.
1170
1171     This must be called only from Exec and functions called from Exec.
1172
1173     """
1174     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1175     itype, etxt = ecode
1176     # first complete the msg
1177     if args:
1178       msg = msg % args
1179     # then format the whole message
1180     if self.op.error_codes:
1181       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1182     else:
1183       if item:
1184         item = " " + item
1185       else:
1186         item = ""
1187       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1188     # and finally report it via the feedback_fn
1189     self._feedback_fn("  - %s" % msg)
1190
1191   def _ErrorIf(self, cond, *args, **kwargs):
1192     """Log an error message if the passed condition is True.
1193
1194     """
1195     cond = bool(cond) or self.op.debug_simulate_errors
1196     if cond:
1197       self._Error(*args, **kwargs)
1198     # do not mark the operation as failed for WARN cases only
1199     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1200       self.bad = self.bad or cond
1201
1202   def _VerifyNode(self, ninfo, nresult):
1203     """Run multiple tests against a node.
1204
1205     Test list:
1206
1207       - compares ganeti version
1208       - checks vg existence and size > 20G
1209       - checks config file checksum
1210       - checks ssh to other nodes
1211
1212     @type ninfo: L{objects.Node}
1213     @param ninfo: the node to check
1214     @param nresult: the results from the node
1215     @rtype: boolean
1216     @return: whether overall this call was successful (and we can expect
1217          reasonable values in the respose)
1218
1219     """
1220     node = ninfo.name
1221     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1222
1223     # main result, nresult should be a non-empty dict
1224     test = not nresult or not isinstance(nresult, dict)
1225     _ErrorIf(test, self.ENODERPC, node,
1226                   "unable to verify node: no data returned")
1227     if test:
1228       return False
1229
1230     # compares ganeti version
1231     local_version = constants.PROTOCOL_VERSION
1232     remote_version = nresult.get("version", None)
1233     test = not (remote_version and
1234                 isinstance(remote_version, (list, tuple)) and
1235                 len(remote_version) == 2)
1236     _ErrorIf(test, self.ENODERPC, node,
1237              "connection to node returned invalid data")
1238     if test:
1239       return False
1240
1241     test = local_version != remote_version[0]
1242     _ErrorIf(test, self.ENODEVERSION, node,
1243              "incompatible protocol versions: master %s,"
1244              " node %s", local_version, remote_version[0])
1245     if test:
1246       return False
1247
1248     # node seems compatible, we can actually try to look into its results
1249
1250     # full package version
1251     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1252                   self.ENODEVERSION, node,
1253                   "software version mismatch: master %s, node %s",
1254                   constants.RELEASE_VERSION, remote_version[1],
1255                   code=self.ETYPE_WARNING)
1256
1257     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1258     if isinstance(hyp_result, dict):
1259       for hv_name, hv_result in hyp_result.iteritems():
1260         test = hv_result is not None
1261         _ErrorIf(test, self.ENODEHV, node,
1262                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1263
1264
1265     test = nresult.get(constants.NV_NODESETUP,
1266                            ["Missing NODESETUP results"])
1267     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1268              "; ".join(test))
1269
1270     return True
1271
1272   def _VerifyNodeTime(self, ninfo, nresult,
1273                       nvinfo_starttime, nvinfo_endtime):
1274     """Check the node time.
1275
1276     @type ninfo: L{objects.Node}
1277     @param ninfo: the node to check
1278     @param nresult: the remote results for the node
1279     @param nvinfo_starttime: the start time of the RPC call
1280     @param nvinfo_endtime: the end time of the RPC call
1281
1282     """
1283     node = ninfo.name
1284     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1285
1286     ntime = nresult.get(constants.NV_TIME, None)
1287     try:
1288       ntime_merged = utils.MergeTime(ntime)
1289     except (ValueError, TypeError):
1290       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1291       return
1292
1293     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1294       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1295     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1296       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1297     else:
1298       ntime_diff = None
1299
1300     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1301              "Node time diverges by at least %s from master node time",
1302              ntime_diff)
1303
1304   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1305     """Check the node time.
1306
1307     @type ninfo: L{objects.Node}
1308     @param ninfo: the node to check
1309     @param nresult: the remote results for the node
1310     @param vg_name: the configured VG name
1311
1312     """
1313     if vg_name is None:
1314       return
1315
1316     node = ninfo.name
1317     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1318
1319     # checks vg existence and size > 20G
1320     vglist = nresult.get(constants.NV_VGLIST, None)
1321     test = not vglist
1322     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1323     if not test:
1324       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1325                                             constants.MIN_VG_SIZE)
1326       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1327
1328     # check pv names
1329     pvlist = nresult.get(constants.NV_PVLIST, None)
1330     test = pvlist is None
1331     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1332     if not test:
1333       # check that ':' is not present in PV names, since it's a
1334       # special character for lvcreate (denotes the range of PEs to
1335       # use on the PV)
1336       for _, pvname, owner_vg in pvlist:
1337         test = ":" in pvname
1338         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1339                  " '%s' of VG '%s'", pvname, owner_vg)
1340
1341   def _VerifyNodeNetwork(self, ninfo, nresult):
1342     """Check the node time.
1343
1344     @type ninfo: L{objects.Node}
1345     @param ninfo: the node to check
1346     @param nresult: the remote results for the node
1347
1348     """
1349     node = ninfo.name
1350     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1351
1352     test = constants.NV_NODELIST not in nresult
1353     _ErrorIf(test, self.ENODESSH, node,
1354              "node hasn't returned node ssh connectivity data")
1355     if not test:
1356       if nresult[constants.NV_NODELIST]:
1357         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1358           _ErrorIf(True, self.ENODESSH, node,
1359                    "ssh communication with node '%s': %s", a_node, a_msg)
1360
1361     test = constants.NV_NODENETTEST not in nresult
1362     _ErrorIf(test, self.ENODENET, node,
1363              "node hasn't returned node tcp connectivity data")
1364     if not test:
1365       if nresult[constants.NV_NODENETTEST]:
1366         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1367         for anode in nlist:
1368           _ErrorIf(True, self.ENODENET, node,
1369                    "tcp communication with node '%s': %s",
1370                    anode, nresult[constants.NV_NODENETTEST][anode])
1371
1372   def _VerifyInstance(self, instance, instanceconfig, node_image):
1373     """Verify an instance.
1374
1375     This function checks to see if the required block devices are
1376     available on the instance's node.
1377
1378     """
1379     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1380     node_current = instanceconfig.primary_node
1381
1382     node_vol_should = {}
1383     instanceconfig.MapLVsByNode(node_vol_should)
1384
1385     for node in node_vol_should:
1386       n_img = node_image[node]
1387       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1388         # ignore missing volumes on offline or broken nodes
1389         continue
1390       for volume in node_vol_should[node]:
1391         test = volume not in n_img.volumes
1392         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1393                  "volume %s missing on node %s", volume, node)
1394
1395     if instanceconfig.admin_up:
1396       pri_img = node_image[node_current]
1397       test = instance not in pri_img.instances and not pri_img.offline
1398       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1399                "instance not running on its primary node %s",
1400                node_current)
1401
1402     for node, n_img in node_image.items():
1403       if (not node == node_current):
1404         test = instance in n_img.instances
1405         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1406                  "instance should not run on node %s", node)
1407
1408   def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1409     """Verify if there are any unknown volumes in the cluster.
1410
1411     The .os, .swap and backup volumes are ignored. All other volumes are
1412     reported as unknown.
1413
1414     """
1415     for node, n_img in node_image.items():
1416       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1417         # skip non-healthy nodes
1418         continue
1419       for volume in n_img.volumes:
1420         test = (node not in node_vol_should or
1421                 volume not in node_vol_should[node])
1422         self._ErrorIf(test, self.ENODEORPHANLV, node,
1423                       "volume %s is unknown", volume)
1424
1425   def _VerifyOrphanInstances(self, instancelist, node_image):
1426     """Verify the list of running instances.
1427
1428     This checks what instances are running but unknown to the cluster.
1429
1430     """
1431     for node, n_img in node_image.items():
1432       for o_inst in n_img.instances:
1433         test = o_inst not in instancelist
1434         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1435                       "instance %s on node %s should not exist", o_inst, node)
1436
1437   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1438     """Verify N+1 Memory Resilience.
1439
1440     Check that if one single node dies we can still start all the
1441     instances it was primary for.
1442
1443     """
1444     for node, n_img in node_image.items():
1445       # This code checks that every node which is now listed as
1446       # secondary has enough memory to host all instances it is
1447       # supposed to should a single other node in the cluster fail.
1448       # FIXME: not ready for failover to an arbitrary node
1449       # FIXME: does not support file-backed instances
1450       # WARNING: we currently take into account down instances as well
1451       # as up ones, considering that even if they're down someone
1452       # might want to start them even in the event of a node failure.
1453       for prinode, instances in n_img.sbp.items():
1454         needed_mem = 0
1455         for instance in instances:
1456           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1457           if bep[constants.BE_AUTO_BALANCE]:
1458             needed_mem += bep[constants.BE_MEMORY]
1459         test = n_img.mfree < needed_mem
1460         self._ErrorIf(test, self.ENODEN1, node,
1461                       "not enough memory on to accommodate"
1462                       " failovers should peer node %s fail", prinode)
1463
1464   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1465                        master_files):
1466     """Verifies and computes the node required file checksums.
1467
1468     @type ninfo: L{objects.Node}
1469     @param ninfo: the node to check
1470     @param nresult: the remote results for the node
1471     @param file_list: required list of files
1472     @param local_cksum: dictionary of local files and their checksums
1473     @param master_files: list of files that only masters should have
1474
1475     """
1476     node = ninfo.name
1477     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1478
1479     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1480     test = not isinstance(remote_cksum, dict)
1481     _ErrorIf(test, self.ENODEFILECHECK, node,
1482              "node hasn't returned file checksum data")
1483     if test:
1484       return
1485
1486     for file_name in file_list:
1487       node_is_mc = ninfo.master_candidate
1488       must_have = (file_name not in master_files) or node_is_mc
1489       # missing
1490       test1 = file_name not in remote_cksum
1491       # invalid checksum
1492       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1493       # existing and good
1494       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1495       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1496                "file '%s' missing", file_name)
1497       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1498                "file '%s' has wrong checksum", file_name)
1499       # not candidate and this is not a must-have file
1500       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1501                "file '%s' should not exist on non master"
1502                " candidates (and the file is outdated)", file_name)
1503       # all good, except non-master/non-must have combination
1504       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1505                "file '%s' should not exist"
1506                " on non master candidates", file_name)
1507
1508   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1509     """Verifies and the node DRBD status.
1510
1511     @type ninfo: L{objects.Node}
1512     @param ninfo: the node to check
1513     @param nresult: the remote results for the node
1514     @param instanceinfo: the dict of instances
1515     @param drbd_map: the DRBD map as returned by
1516         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1517
1518     """
1519     node = ninfo.name
1520     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1521
1522     # compute the DRBD minors
1523     node_drbd = {}
1524     for minor, instance in drbd_map[node].items():
1525       test = instance not in instanceinfo
1526       _ErrorIf(test, self.ECLUSTERCFG, None,
1527                "ghost instance '%s' in temporary DRBD map", instance)
1528         # ghost instance should not be running, but otherwise we
1529         # don't give double warnings (both ghost instance and
1530         # unallocated minor in use)
1531       if test:
1532         node_drbd[minor] = (instance, False)
1533       else:
1534         instance = instanceinfo[instance]
1535         node_drbd[minor] = (instance.name, instance.admin_up)
1536
1537     # and now check them
1538     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1539     test = not isinstance(used_minors, (tuple, list))
1540     _ErrorIf(test, self.ENODEDRBD, node,
1541              "cannot parse drbd status file: %s", str(used_minors))
1542     if test:
1543       # we cannot check drbd status
1544       return
1545
1546     for minor, (iname, must_exist) in node_drbd.items():
1547       test = minor not in used_minors and must_exist
1548       _ErrorIf(test, self.ENODEDRBD, node,
1549                "drbd minor %d of instance %s is not active", minor, iname)
1550     for minor in used_minors:
1551       test = minor not in node_drbd
1552       _ErrorIf(test, self.ENODEDRBD, node,
1553                "unallocated drbd minor %d is in use", minor)
1554
1555   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1556     """Verifies and updates the node volume data.
1557
1558     This function will update a L{NodeImage}'s internal structures
1559     with data from the remote call.
1560
1561     @type ninfo: L{objects.Node}
1562     @param ninfo: the node to check
1563     @param nresult: the remote results for the node
1564     @param nimg: the node image object
1565     @param vg_name: the configured VG name
1566
1567     """
1568     node = ninfo.name
1569     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1570
1571     nimg.lvm_fail = True
1572     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1573     if vg_name is None:
1574       pass
1575     elif isinstance(lvdata, basestring):
1576       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1577                utils.SafeEncode(lvdata))
1578     elif not isinstance(lvdata, dict):
1579       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1580     else:
1581       nimg.volumes = lvdata
1582       nimg.lvm_fail = False
1583
1584   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1585     """Verifies and updates the node instance list.
1586
1587     If the listing was successful, then updates this node's instance
1588     list. Otherwise, it marks the RPC call as failed for the instance
1589     list key.
1590
1591     @type ninfo: L{objects.Node}
1592     @param ninfo: the node to check
1593     @param nresult: the remote results for the node
1594     @param nimg: the node image object
1595
1596     """
1597     idata = nresult.get(constants.NV_INSTANCELIST, None)
1598     test = not isinstance(idata, list)
1599     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1600                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1601     if test:
1602       nimg.hyp_fail = True
1603     else:
1604       nimg.instances = idata
1605
1606   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1607     """Verifies and computes a node information map
1608
1609     @type ninfo: L{objects.Node}
1610     @param ninfo: the node to check
1611     @param nresult: the remote results for the node
1612     @param nimg: the node image object
1613     @param vg_name: the configured VG name
1614
1615     """
1616     node = ninfo.name
1617     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1618
1619     # try to read free memory (from the hypervisor)
1620     hv_info = nresult.get(constants.NV_HVINFO, None)
1621     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1622     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1623     if not test:
1624       try:
1625         nimg.mfree = int(hv_info["memory_free"])
1626       except (ValueError, TypeError):
1627         _ErrorIf(True, self.ENODERPC, node,
1628                  "node returned invalid nodeinfo, check hypervisor")
1629
1630     # FIXME: devise a free space model for file based instances as well
1631     if vg_name is not None:
1632       test = (constants.NV_VGLIST not in nresult or
1633               vg_name not in nresult[constants.NV_VGLIST])
1634       _ErrorIf(test, self.ENODELVM, node,
1635                "node didn't return data for the volume group '%s'"
1636                " - it is either missing or broken", vg_name)
1637       if not test:
1638         try:
1639           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1640         except (ValueError, TypeError):
1641           _ErrorIf(True, self.ENODERPC, node,
1642                    "node returned invalid LVM info, check LVM status")
1643
1644   def CheckPrereq(self):
1645     """Check prerequisites.
1646
1647     Transform the list of checks we're going to skip into a set and check that
1648     all its members are valid.
1649
1650     """
1651     self.skip_set = frozenset(self.op.skip_checks)
1652     if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1653       raise errors.OpPrereqError("Invalid checks to be skipped specified",
1654                                  errors.ECODE_INVAL)
1655
1656   def BuildHooksEnv(self):
1657     """Build hooks env.
1658
1659     Cluster-Verify hooks just ran in the post phase and their failure makes
1660     the output be logged in the verify output and the verification to fail.
1661
1662     """
1663     all_nodes = self.cfg.GetNodeList()
1664     env = {
1665       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1666       }
1667     for node in self.cfg.GetAllNodesInfo().values():
1668       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1669
1670     return env, [], all_nodes
1671
1672   def Exec(self, feedback_fn):
1673     """Verify integrity of cluster, performing various test on nodes.
1674
1675     """
1676     self.bad = False
1677     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1678     verbose = self.op.verbose
1679     self._feedback_fn = feedback_fn
1680     feedback_fn("* Verifying global settings")
1681     for msg in self.cfg.VerifyConfig():
1682       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1683
1684     # Check the cluster certificates
1685     for cert_filename in constants.ALL_CERT_FILES:
1686       (errcode, msg) = _VerifyCertificate(cert_filename)
1687       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1688
1689     vg_name = self.cfg.GetVGName()
1690     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1691     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1692     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1693     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1694     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1695                         for iname in instancelist)
1696     i_non_redundant = [] # Non redundant instances
1697     i_non_a_balanced = [] # Non auto-balanced instances
1698     n_offline = 0 # Count of offline nodes
1699     n_drained = 0 # Count of nodes being drained
1700     node_vol_should = {}
1701
1702     # FIXME: verify OS list
1703     # do local checksums
1704     master_files = [constants.CLUSTER_CONF_FILE]
1705
1706     file_names = ssconf.SimpleStore().GetFileList()
1707     file_names.extend(constants.ALL_CERT_FILES)
1708     file_names.extend(master_files)
1709
1710     local_checksums = utils.FingerprintFiles(file_names)
1711
1712     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1713     node_verify_param = {
1714       constants.NV_FILELIST: file_names,
1715       constants.NV_NODELIST: [node.name for node in nodeinfo
1716                               if not node.offline],
1717       constants.NV_HYPERVISOR: hypervisors,
1718       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1719                                   node.secondary_ip) for node in nodeinfo
1720                                  if not node.offline],
1721       constants.NV_INSTANCELIST: hypervisors,
1722       constants.NV_VERSION: None,
1723       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1724       constants.NV_NODESETUP: None,
1725       constants.NV_TIME: None,
1726       }
1727
1728     if vg_name is not None:
1729       node_verify_param[constants.NV_VGLIST] = None
1730       node_verify_param[constants.NV_LVLIST] = vg_name
1731       node_verify_param[constants.NV_PVLIST] = [vg_name]
1732       node_verify_param[constants.NV_DRBDLIST] = None
1733
1734     # Build our expected cluster state
1735     node_image = dict((node.name, self.NodeImage(offline=node.offline))
1736                       for node in nodeinfo)
1737
1738     for instance in instancelist:
1739       inst_config = instanceinfo[instance]
1740
1741       for nname in inst_config.all_nodes:
1742         if nname not in node_image:
1743           # ghost node
1744           gnode = self.NodeImage()
1745           gnode.ghost = True
1746           node_image[nname] = gnode
1747
1748       inst_config.MapLVsByNode(node_vol_should)
1749
1750       pnode = inst_config.primary_node
1751       node_image[pnode].pinst.append(instance)
1752
1753       for snode in inst_config.secondary_nodes:
1754         nimg = node_image[snode]
1755         nimg.sinst.append(instance)
1756         if pnode not in nimg.sbp:
1757           nimg.sbp[pnode] = []
1758         nimg.sbp[pnode].append(instance)
1759
1760     # At this point, we have the in-memory data structures complete,
1761     # except for the runtime information, which we'll gather next
1762
1763     # Due to the way our RPC system works, exact response times cannot be
1764     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1765     # time before and after executing the request, we can at least have a time
1766     # window.
1767     nvinfo_starttime = time.time()
1768     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1769                                            self.cfg.GetClusterName())
1770     nvinfo_endtime = time.time()
1771
1772     cluster = self.cfg.GetClusterInfo()
1773     master_node = self.cfg.GetMasterNode()
1774     all_drbd_map = self.cfg.ComputeDRBDMap()
1775
1776     feedback_fn("* Verifying node status")
1777     for node_i in nodeinfo:
1778       node = node_i.name
1779       nimg = node_image[node]
1780
1781       if node_i.offline:
1782         if verbose:
1783           feedback_fn("* Skipping offline node %s" % (node,))
1784         n_offline += 1
1785         continue
1786
1787       if node == master_node:
1788         ntype = "master"
1789       elif node_i.master_candidate:
1790         ntype = "master candidate"
1791       elif node_i.drained:
1792         ntype = "drained"
1793         n_drained += 1
1794       else:
1795         ntype = "regular"
1796       if verbose:
1797         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1798
1799       msg = all_nvinfo[node].fail_msg
1800       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1801       if msg:
1802         nimg.rpc_fail = True
1803         continue
1804
1805       nresult = all_nvinfo[node].payload
1806
1807       nimg.call_ok = self._VerifyNode(node_i, nresult)
1808       self._VerifyNodeNetwork(node_i, nresult)
1809       self._VerifyNodeLVM(node_i, nresult, vg_name)
1810       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1811                             master_files)
1812       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1813       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1814
1815       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1816       self._UpdateNodeInstances(node_i, nresult, nimg)
1817       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1818
1819     feedback_fn("* Verifying instance status")
1820     for instance in instancelist:
1821       if verbose:
1822         feedback_fn("* Verifying instance %s" % instance)
1823       inst_config = instanceinfo[instance]
1824       self._VerifyInstance(instance, inst_config, node_image)
1825       inst_nodes_offline = []
1826
1827       pnode = inst_config.primary_node
1828       pnode_img = node_image[pnode]
1829       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1830                self.ENODERPC, pnode, "instance %s, connection to"
1831                " primary node failed", instance)
1832
1833       if pnode_img.offline:
1834         inst_nodes_offline.append(pnode)
1835
1836       # If the instance is non-redundant we cannot survive losing its primary
1837       # node, so we are not N+1 compliant. On the other hand we have no disk
1838       # templates with more than one secondary so that situation is not well
1839       # supported either.
1840       # FIXME: does not support file-backed instances
1841       if not inst_config.secondary_nodes:
1842         i_non_redundant.append(instance)
1843       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1844                instance, "instance has multiple secondary nodes: %s",
1845                utils.CommaJoin(inst_config.secondary_nodes),
1846                code=self.ETYPE_WARNING)
1847
1848       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1849         i_non_a_balanced.append(instance)
1850
1851       for snode in inst_config.secondary_nodes:
1852         s_img = node_image[snode]
1853         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1854                  "instance %s, connection to secondary node failed", instance)
1855
1856         if s_img.offline:
1857           inst_nodes_offline.append(snode)
1858
1859       # warn that the instance lives on offline nodes
1860       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1861                "instance lives on offline node(s) %s",
1862                utils.CommaJoin(inst_nodes_offline))
1863       # ... or ghost nodes
1864       for node in inst_config.all_nodes:
1865         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1866                  "instance lives on ghost node %s", node)
1867
1868     feedback_fn("* Verifying orphan volumes")
1869     self._VerifyOrphanVolumes(node_vol_should, node_image)
1870
1871     feedback_fn("* Verifying oprhan instances")
1872     self._VerifyOrphanInstances(instancelist, node_image)
1873
1874     if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1875       feedback_fn("* Verifying N+1 Memory redundancy")
1876       self._VerifyNPlusOneMemory(node_image, instanceinfo)
1877
1878     feedback_fn("* Other Notes")
1879     if i_non_redundant:
1880       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1881                   % len(i_non_redundant))
1882
1883     if i_non_a_balanced:
1884       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1885                   % len(i_non_a_balanced))
1886
1887     if n_offline:
1888       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
1889
1890     if n_drained:
1891       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
1892
1893     return not self.bad
1894
1895   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1896     """Analyze the post-hooks' result
1897
1898     This method analyses the hook result, handles it, and sends some
1899     nicely-formatted feedback back to the user.
1900
1901     @param phase: one of L{constants.HOOKS_PHASE_POST} or
1902         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1903     @param hooks_results: the results of the multi-node hooks rpc call
1904     @param feedback_fn: function used send feedback back to the caller
1905     @param lu_result: previous Exec result
1906     @return: the new Exec result, based on the previous result
1907         and hook results
1908
1909     """
1910     # We only really run POST phase hooks, and are only interested in
1911     # their results
1912     if phase == constants.HOOKS_PHASE_POST:
1913       # Used to change hooks' output to proper indentation
1914       indent_re = re.compile('^', re.M)
1915       feedback_fn("* Hooks Results")
1916       assert hooks_results, "invalid result from hooks"
1917
1918       for node_name in hooks_results:
1919         res = hooks_results[node_name]
1920         msg = res.fail_msg
1921         test = msg and not res.offline
1922         self._ErrorIf(test, self.ENODEHOOKS, node_name,
1923                       "Communication failure in hooks execution: %s", msg)
1924         if res.offline or msg:
1925           # No need to investigate payload if node is offline or gave an error.
1926           # override manually lu_result here as _ErrorIf only
1927           # overrides self.bad
1928           lu_result = 1
1929           continue
1930         for script, hkr, output in res.payload:
1931           test = hkr == constants.HKR_FAIL
1932           self._ErrorIf(test, self.ENODEHOOKS, node_name,
1933                         "Script %s failed, output:", script)
1934           if test:
1935             output = indent_re.sub('      ', output)
1936             feedback_fn("%s" % output)
1937             lu_result = 0
1938
1939       return lu_result
1940
1941
1942 class LUVerifyDisks(NoHooksLU):
1943   """Verifies the cluster disks status.
1944
1945   """
1946   _OP_REQP = []
1947   REQ_BGL = False
1948
1949   def ExpandNames(self):
1950     self.needed_locks = {
1951       locking.LEVEL_NODE: locking.ALL_SET,
1952       locking.LEVEL_INSTANCE: locking.ALL_SET,
1953     }
1954     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1955
1956   def CheckPrereq(self):
1957     """Check prerequisites.
1958
1959     This has no prerequisites.
1960
1961     """
1962     pass
1963
1964   def Exec(self, feedback_fn):
1965     """Verify integrity of cluster disks.
1966
1967     @rtype: tuple of three items
1968     @return: a tuple of (dict of node-to-node_error, list of instances
1969         which need activate-disks, dict of instance: (node, volume) for
1970         missing volumes
1971
1972     """
1973     result = res_nodes, res_instances, res_missing = {}, [], {}
1974
1975     vg_name = self.cfg.GetVGName()
1976     nodes = utils.NiceSort(self.cfg.GetNodeList())
1977     instances = [self.cfg.GetInstanceInfo(name)
1978                  for name in self.cfg.GetInstanceList()]
1979
1980     nv_dict = {}
1981     for inst in instances:
1982       inst_lvs = {}
1983       if (not inst.admin_up or
1984           inst.disk_template not in constants.DTS_NET_MIRROR):
1985         continue
1986       inst.MapLVsByNode(inst_lvs)
1987       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1988       for node, vol_list in inst_lvs.iteritems():
1989         for vol in vol_list:
1990           nv_dict[(node, vol)] = inst
1991
1992     if not nv_dict:
1993       return result
1994
1995     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1996
1997     for node in nodes:
1998       # node_volume
1999       node_res = node_lvs[node]
2000       if node_res.offline:
2001         continue
2002       msg = node_res.fail_msg
2003       if msg:
2004         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2005         res_nodes[node] = msg
2006         continue
2007
2008       lvs = node_res.payload
2009       for lv_name, (_, _, lv_online) in lvs.items():
2010         inst = nv_dict.pop((node, lv_name), None)
2011         if (not lv_online and inst is not None
2012             and inst.name not in res_instances):
2013           res_instances.append(inst.name)
2014
2015     # any leftover items in nv_dict are missing LVs, let's arrange the
2016     # data better
2017     for key, inst in nv_dict.iteritems():
2018       if inst.name not in res_missing:
2019         res_missing[inst.name] = []
2020       res_missing[inst.name].append(key)
2021
2022     return result
2023
2024
2025 class LURepairDiskSizes(NoHooksLU):
2026   """Verifies the cluster disks sizes.
2027
2028   """
2029   _OP_REQP = ["instances"]
2030   REQ_BGL = False
2031
2032   def ExpandNames(self):
2033     if not isinstance(self.op.instances, list):
2034       raise errors.OpPrereqError("Invalid argument type 'instances'",
2035                                  errors.ECODE_INVAL)
2036
2037     if self.op.instances:
2038       self.wanted_names = []
2039       for name in self.op.instances:
2040         full_name = _ExpandInstanceName(self.cfg, name)
2041         self.wanted_names.append(full_name)
2042       self.needed_locks = {
2043         locking.LEVEL_NODE: [],
2044         locking.LEVEL_INSTANCE: self.wanted_names,
2045         }
2046       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2047     else:
2048       self.wanted_names = None
2049       self.needed_locks = {
2050         locking.LEVEL_NODE: locking.ALL_SET,
2051         locking.LEVEL_INSTANCE: locking.ALL_SET,
2052         }
2053     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2054
2055   def DeclareLocks(self, level):
2056     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2057       self._LockInstancesNodes(primary_only=True)
2058
2059   def CheckPrereq(self):
2060     """Check prerequisites.
2061
2062     This only checks the optional instance list against the existing names.
2063
2064     """
2065     if self.wanted_names is None:
2066       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2067
2068     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2069                              in self.wanted_names]
2070
2071   def _EnsureChildSizes(self, disk):
2072     """Ensure children of the disk have the needed disk size.
2073
2074     This is valid mainly for DRBD8 and fixes an issue where the
2075     children have smaller disk size.
2076
2077     @param disk: an L{ganeti.objects.Disk} object
2078
2079     """
2080     if disk.dev_type == constants.LD_DRBD8:
2081       assert disk.children, "Empty children for DRBD8?"
2082       fchild = disk.children[0]
2083       mismatch = fchild.size < disk.size
2084       if mismatch:
2085         self.LogInfo("Child disk has size %d, parent %d, fixing",
2086                      fchild.size, disk.size)
2087         fchild.size = disk.size
2088
2089       # and we recurse on this child only, not on the metadev
2090       return self._EnsureChildSizes(fchild) or mismatch
2091     else:
2092       return False
2093
2094   def Exec(self, feedback_fn):
2095     """Verify the size of cluster disks.
2096
2097     """
2098     # TODO: check child disks too
2099     # TODO: check differences in size between primary/secondary nodes
2100     per_node_disks = {}
2101     for instance in self.wanted_instances:
2102       pnode = instance.primary_node
2103       if pnode not in per_node_disks:
2104         per_node_disks[pnode] = []
2105       for idx, disk in enumerate(instance.disks):
2106         per_node_disks[pnode].append((instance, idx, disk))
2107
2108     changed = []
2109     for node, dskl in per_node_disks.items():
2110       newl = [v[2].Copy() for v in dskl]
2111       for dsk in newl:
2112         self.cfg.SetDiskID(dsk, node)
2113       result = self.rpc.call_blockdev_getsizes(node, newl)
2114       if result.fail_msg:
2115         self.LogWarning("Failure in blockdev_getsizes call to node"
2116                         " %s, ignoring", node)
2117         continue
2118       if len(result.data) != len(dskl):
2119         self.LogWarning("Invalid result from node %s, ignoring node results",
2120                         node)
2121         continue
2122       for ((instance, idx, disk), size) in zip(dskl, result.data):
2123         if size is None:
2124           self.LogWarning("Disk %d of instance %s did not return size"
2125                           " information, ignoring", idx, instance.name)
2126           continue
2127         if not isinstance(size, (int, long)):
2128           self.LogWarning("Disk %d of instance %s did not return valid"
2129                           " size information, ignoring", idx, instance.name)
2130           continue
2131         size = size >> 20
2132         if size != disk.size:
2133           self.LogInfo("Disk %d of instance %s has mismatched size,"
2134                        " correcting: recorded %d, actual %d", idx,
2135                        instance.name, disk.size, size)
2136           disk.size = size
2137           self.cfg.Update(instance, feedback_fn)
2138           changed.append((instance.name, idx, size))
2139         if self._EnsureChildSizes(disk):
2140           self.cfg.Update(instance, feedback_fn)
2141           changed.append((instance.name, idx, disk.size))
2142     return changed
2143
2144
2145 class LURenameCluster(LogicalUnit):
2146   """Rename the cluster.
2147
2148   """
2149   HPATH = "cluster-rename"
2150   HTYPE = constants.HTYPE_CLUSTER
2151   _OP_REQP = ["name"]
2152
2153   def BuildHooksEnv(self):
2154     """Build hooks env.
2155
2156     """
2157     env = {
2158       "OP_TARGET": self.cfg.GetClusterName(),
2159       "NEW_NAME": self.op.name,
2160       }
2161     mn = self.cfg.GetMasterNode()
2162     all_nodes = self.cfg.GetNodeList()
2163     return env, [mn], all_nodes
2164
2165   def CheckPrereq(self):
2166     """Verify that the passed name is a valid one.
2167
2168     """
2169     hostname = utils.GetHostInfo(self.op.name)
2170
2171     new_name = hostname.name
2172     self.ip = new_ip = hostname.ip
2173     old_name = self.cfg.GetClusterName()
2174     old_ip = self.cfg.GetMasterIP()
2175     if new_name == old_name and new_ip == old_ip:
2176       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2177                                  " cluster has changed",
2178                                  errors.ECODE_INVAL)
2179     if new_ip != old_ip:
2180       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2181         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2182                                    " reachable on the network. Aborting." %
2183                                    new_ip, errors.ECODE_NOTUNIQUE)
2184
2185     self.op.name = new_name
2186
2187   def Exec(self, feedback_fn):
2188     """Rename the cluster.
2189
2190     """
2191     clustername = self.op.name
2192     ip = self.ip
2193
2194     # shutdown the master IP
2195     master = self.cfg.GetMasterNode()
2196     result = self.rpc.call_node_stop_master(master, False)
2197     result.Raise("Could not disable the master role")
2198
2199     try:
2200       cluster = self.cfg.GetClusterInfo()
2201       cluster.cluster_name = clustername
2202       cluster.master_ip = ip
2203       self.cfg.Update(cluster, feedback_fn)
2204
2205       # update the known hosts file
2206       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2207       node_list = self.cfg.GetNodeList()
2208       try:
2209         node_list.remove(master)
2210       except ValueError:
2211         pass
2212       result = self.rpc.call_upload_file(node_list,
2213                                          constants.SSH_KNOWN_HOSTS_FILE)
2214       for to_node, to_result in result.iteritems():
2215         msg = to_result.fail_msg
2216         if msg:
2217           msg = ("Copy of file %s to node %s failed: %s" %
2218                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2219           self.proc.LogWarning(msg)
2220
2221     finally:
2222       result = self.rpc.call_node_start_master(master, False, False)
2223       msg = result.fail_msg
2224       if msg:
2225         self.LogWarning("Could not re-enable the master role on"
2226                         " the master, please restart manually: %s", msg)
2227
2228
2229 def _RecursiveCheckIfLVMBased(disk):
2230   """Check if the given disk or its children are lvm-based.
2231
2232   @type disk: L{objects.Disk}
2233   @param disk: the disk to check
2234   @rtype: boolean
2235   @return: boolean indicating whether a LD_LV dev_type was found or not
2236
2237   """
2238   if disk.children:
2239     for chdisk in disk.children:
2240       if _RecursiveCheckIfLVMBased(chdisk):
2241         return True
2242   return disk.dev_type == constants.LD_LV
2243
2244
2245 class LUSetClusterParams(LogicalUnit):
2246   """Change the parameters of the cluster.
2247
2248   """
2249   HPATH = "cluster-modify"
2250   HTYPE = constants.HTYPE_CLUSTER
2251   _OP_REQP = []
2252   REQ_BGL = False
2253
2254   def CheckArguments(self):
2255     """Check parameters
2256
2257     """
2258     if not hasattr(self.op, "candidate_pool_size"):
2259       self.op.candidate_pool_size = None
2260     if self.op.candidate_pool_size is not None:
2261       try:
2262         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2263       except (ValueError, TypeError), err:
2264         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2265                                    str(err), errors.ECODE_INVAL)
2266       if self.op.candidate_pool_size < 1:
2267         raise errors.OpPrereqError("At least one master candidate needed",
2268                                    errors.ECODE_INVAL)
2269
2270     _CheckBooleanOpField(self.op, "maintain_node_health")
2271
2272     if self.op.uid_pool:
2273       uidpool.CheckUidPool(self.op.uid_pool)
2274
2275     if self.op.add_uids:
2276       uidpool.CheckUidPool(self.op.add_uids)
2277
2278     if self.op.remove_uids:
2279       uidpool.CheckUidPool(self.op.remove_uids)
2280
2281   def ExpandNames(self):
2282     # FIXME: in the future maybe other cluster params won't require checking on
2283     # all nodes to be modified.
2284     self.needed_locks = {
2285       locking.LEVEL_NODE: locking.ALL_SET,
2286     }
2287     self.share_locks[locking.LEVEL_NODE] = 1
2288
2289   def BuildHooksEnv(self):
2290     """Build hooks env.
2291
2292     """
2293     env = {
2294       "OP_TARGET": self.cfg.GetClusterName(),
2295       "NEW_VG_NAME": self.op.vg_name,
2296       }
2297     mn = self.cfg.GetMasterNode()
2298     return env, [mn], [mn]
2299
2300   def CheckPrereq(self):
2301     """Check prerequisites.
2302
2303     This checks whether the given params don't conflict and
2304     if the given volume group is valid.
2305
2306     """
2307     if self.op.vg_name is not None and not self.op.vg_name:
2308       instances = self.cfg.GetAllInstancesInfo().values()
2309       for inst in instances:
2310         for disk in inst.disks:
2311           if _RecursiveCheckIfLVMBased(disk):
2312             raise errors.OpPrereqError("Cannot disable lvm storage while"
2313                                        " lvm-based instances exist",
2314                                        errors.ECODE_INVAL)
2315
2316     node_list = self.acquired_locks[locking.LEVEL_NODE]
2317
2318     # if vg_name not None, checks given volume group on all nodes
2319     if self.op.vg_name:
2320       vglist = self.rpc.call_vg_list(node_list)
2321       for node in node_list:
2322         msg = vglist[node].fail_msg
2323         if msg:
2324           # ignoring down node
2325           self.LogWarning("Error while gathering data on node %s"
2326                           " (ignoring node): %s", node, msg)
2327           continue
2328         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2329                                               self.op.vg_name,
2330                                               constants.MIN_VG_SIZE)
2331         if vgstatus:
2332           raise errors.OpPrereqError("Error on node '%s': %s" %
2333                                      (node, vgstatus), errors.ECODE_ENVIRON)
2334
2335     self.cluster = cluster = self.cfg.GetClusterInfo()
2336     # validate params changes
2337     if self.op.beparams:
2338       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2339       self.new_beparams = objects.FillDict(
2340         cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2341
2342     if self.op.nicparams:
2343       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2344       self.new_nicparams = objects.FillDict(
2345         cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2346       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2347       nic_errors = []
2348
2349       # check all instances for consistency
2350       for instance in self.cfg.GetAllInstancesInfo().values():
2351         for nic_idx, nic in enumerate(instance.nics):
2352           params_copy = copy.deepcopy(nic.nicparams)
2353           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2354
2355           # check parameter syntax
2356           try:
2357             objects.NIC.CheckParameterSyntax(params_filled)
2358           except errors.ConfigurationError, err:
2359             nic_errors.append("Instance %s, nic/%d: %s" %
2360                               (instance.name, nic_idx, err))
2361
2362           # if we're moving instances to routed, check that they have an ip
2363           target_mode = params_filled[constants.NIC_MODE]
2364           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2365             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2366                               (instance.name, nic_idx))
2367       if nic_errors:
2368         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2369                                    "\n".join(nic_errors))
2370
2371     # hypervisor list/parameters
2372     self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2373     if self.op.hvparams:
2374       if not isinstance(self.op.hvparams, dict):
2375         raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2376                                    errors.ECODE_INVAL)
2377       for hv_name, hv_dict in self.op.hvparams.items():
2378         if hv_name not in self.new_hvparams:
2379           self.new_hvparams[hv_name] = hv_dict
2380         else:
2381           self.new_hvparams[hv_name].update(hv_dict)
2382
2383     # os hypervisor parameters
2384     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2385     if self.op.os_hvp:
2386       if not isinstance(self.op.os_hvp, dict):
2387         raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2388                                    errors.ECODE_INVAL)
2389       for os_name, hvs in self.op.os_hvp.items():
2390         if not isinstance(hvs, dict):
2391           raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2392                                       " input"), errors.ECODE_INVAL)
2393         if os_name not in self.new_os_hvp:
2394           self.new_os_hvp[os_name] = hvs
2395         else:
2396           for hv_name, hv_dict in hvs.items():
2397             if hv_name not in self.new_os_hvp[os_name]:
2398               self.new_os_hvp[os_name][hv_name] = hv_dict
2399             else:
2400               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2401
2402     if self.op.enabled_hypervisors is not None:
2403       self.hv_list = self.op.enabled_hypervisors
2404       if not self.hv_list:
2405         raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2406                                    " least one member",
2407                                    errors.ECODE_INVAL)
2408       invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2409       if invalid_hvs:
2410         raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2411                                    " entries: %s" %
2412                                    utils.CommaJoin(invalid_hvs),
2413                                    errors.ECODE_INVAL)
2414     else:
2415       self.hv_list = cluster.enabled_hypervisors
2416
2417     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2418       # either the enabled list has changed, or the parameters have, validate
2419       for hv_name, hv_params in self.new_hvparams.items():
2420         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2421             (self.op.enabled_hypervisors and
2422              hv_name in self.op.enabled_hypervisors)):
2423           # either this is a new hypervisor, or its parameters have changed
2424           hv_class = hypervisor.GetHypervisor(hv_name)
2425           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2426           hv_class.CheckParameterSyntax(hv_params)
2427           _CheckHVParams(self, node_list, hv_name, hv_params)
2428
2429     if self.op.os_hvp:
2430       # no need to check any newly-enabled hypervisors, since the
2431       # defaults have already been checked in the above code-block
2432       for os_name, os_hvp in self.new_os_hvp.items():
2433         for hv_name, hv_params in os_hvp.items():
2434           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2435           # we need to fill in the new os_hvp on top of the actual hv_p
2436           cluster_defaults = self.new_hvparams.get(hv_name, {})
2437           new_osp = objects.FillDict(cluster_defaults, hv_params)
2438           hv_class = hypervisor.GetHypervisor(hv_name)
2439           hv_class.CheckParameterSyntax(new_osp)
2440           _CheckHVParams(self, node_list, hv_name, new_osp)
2441
2442
2443   def Exec(self, feedback_fn):
2444     """Change the parameters of the cluster.
2445
2446     """
2447     if self.op.vg_name is not None:
2448       new_volume = self.op.vg_name
2449       if not new_volume:
2450         new_volume = None
2451       if new_volume != self.cfg.GetVGName():
2452         self.cfg.SetVGName(new_volume)
2453       else:
2454         feedback_fn("Cluster LVM configuration already in desired"
2455                     " state, not changing")
2456     if self.op.hvparams:
2457       self.cluster.hvparams = self.new_hvparams
2458     if self.op.os_hvp:
2459       self.cluster.os_hvp = self.new_os_hvp
2460     if self.op.enabled_hypervisors is not None:
2461       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2462     if self.op.beparams:
2463       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2464     if self.op.nicparams:
2465       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2466
2467     if self.op.candidate_pool_size is not None:
2468       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2469       # we need to update the pool size here, otherwise the save will fail
2470       _AdjustCandidatePool(self, [])
2471
2472     if self.op.maintain_node_health is not None:
2473       self.cluster.maintain_node_health = self.op.maintain_node_health
2474
2475     if self.op.add_uids is not None:
2476       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2477
2478     if self.op.remove_uids is not None:
2479       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2480
2481     if self.op.uid_pool is not None:
2482       self.cluster.uid_pool = self.op.uid_pool
2483
2484     self.cfg.Update(self.cluster, feedback_fn)
2485
2486
2487 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2488   """Distribute additional files which are part of the cluster configuration.
2489
2490   ConfigWriter takes care of distributing the config and ssconf files, but
2491   there are more files which should be distributed to all nodes. This function
2492   makes sure those are copied.
2493
2494   @param lu: calling logical unit
2495   @param additional_nodes: list of nodes not in the config to distribute to
2496
2497   """
2498   # 1. Gather target nodes
2499   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2500   dist_nodes = lu.cfg.GetOnlineNodeList()
2501   if additional_nodes is not None:
2502     dist_nodes.extend(additional_nodes)
2503   if myself.name in dist_nodes:
2504     dist_nodes.remove(myself.name)
2505
2506   # 2. Gather files to distribute
2507   dist_files = set([constants.ETC_HOSTS,
2508                     constants.SSH_KNOWN_HOSTS_FILE,
2509                     constants.RAPI_CERT_FILE,
2510                     constants.RAPI_USERS_FILE,
2511                     constants.CONFD_HMAC_KEY,
2512                    ])
2513
2514   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2515   for hv_name in enabled_hypervisors:
2516     hv_class = hypervisor.GetHypervisor(hv_name)
2517     dist_files.update(hv_class.GetAncillaryFiles())
2518
2519   # 3. Perform the files upload
2520   for fname in dist_files:
2521     if os.path.exists(fname):
2522       result = lu.rpc.call_upload_file(dist_nodes, fname)
2523       for to_node, to_result in result.items():
2524         msg = to_result.fail_msg
2525         if msg:
2526           msg = ("Copy of file %s to node %s failed: %s" %
2527                  (fname, to_node, msg))
2528           lu.proc.LogWarning(msg)
2529
2530
2531 class LURedistributeConfig(NoHooksLU):
2532   """Force the redistribution of cluster configuration.
2533
2534   This is a very simple LU.
2535
2536   """
2537   _OP_REQP = []
2538   REQ_BGL = False
2539
2540   def ExpandNames(self):
2541     self.needed_locks = {
2542       locking.LEVEL_NODE: locking.ALL_SET,
2543     }
2544     self.share_locks[locking.LEVEL_NODE] = 1
2545
2546   def CheckPrereq(self):
2547     """Check prerequisites.
2548
2549     """
2550
2551   def Exec(self, feedback_fn):
2552     """Redistribute the configuration.
2553
2554     """
2555     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2556     _RedistributeAncillaryFiles(self)
2557
2558
2559 def _WaitForSync(lu, instance, oneshot=False):
2560   """Sleep and poll for an instance's disk to sync.
2561
2562   """
2563   if not instance.disks:
2564     return True
2565
2566   if not oneshot:
2567     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2568
2569   node = instance.primary_node
2570
2571   for dev in instance.disks:
2572     lu.cfg.SetDiskID(dev, node)
2573
2574   # TODO: Convert to utils.Retry
2575
2576   retries = 0
2577   degr_retries = 10 # in seconds, as we sleep 1 second each time
2578   while True:
2579     max_time = 0
2580     done = True
2581     cumul_degraded = False
2582     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2583     msg = rstats.fail_msg
2584     if msg:
2585       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2586       retries += 1
2587       if retries >= 10:
2588         raise errors.RemoteError("Can't contact node %s for mirror data,"
2589                                  " aborting." % node)
2590       time.sleep(6)
2591       continue
2592     rstats = rstats.payload
2593     retries = 0
2594     for i, mstat in enumerate(rstats):
2595       if mstat is None:
2596         lu.LogWarning("Can't compute data for node %s/%s",
2597                            node, instance.disks[i].iv_name)
2598         continue
2599
2600       cumul_degraded = (cumul_degraded or
2601                         (mstat.is_degraded and mstat.sync_percent is None))
2602       if mstat.sync_percent is not None:
2603         done = False
2604         if mstat.estimated_time is not None:
2605           rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2606           max_time = mstat.estimated_time
2607         else:
2608           rem_time = "no time estimate"
2609         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2610                         (instance.disks[i].iv_name, mstat.sync_percent,
2611                          rem_time))
2612
2613     # if we're done but degraded, let's do a few small retries, to
2614     # make sure we see a stable and not transient situation; therefore
2615     # we force restart of the loop
2616     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2617       logging.info("Degraded disks found, %d retries left", degr_retries)
2618       degr_retries -= 1
2619       time.sleep(1)
2620       continue
2621
2622     if done or oneshot:
2623       break
2624
2625     time.sleep(min(60, max_time))
2626
2627   if done:
2628     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2629   return not cumul_degraded
2630
2631
2632 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2633   """Check that mirrors are not degraded.
2634
2635   The ldisk parameter, if True, will change the test from the
2636   is_degraded attribute (which represents overall non-ok status for
2637   the device(s)) to the ldisk (representing the local storage status).
2638
2639   """
2640   lu.cfg.SetDiskID(dev, node)
2641
2642   result = True
2643
2644   if on_primary or dev.AssembleOnSecondary():
2645     rstats = lu.rpc.call_blockdev_find(node, dev)
2646     msg = rstats.fail_msg
2647     if msg:
2648       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2649       result = False
2650     elif not rstats.payload:
2651       lu.LogWarning("Can't find disk on node %s", node)
2652       result = False
2653     else:
2654       if ldisk:
2655         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2656       else:
2657         result = result and not rstats.payload.is_degraded
2658
2659   if dev.children:
2660     for child in dev.children:
2661       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2662
2663   return result
2664
2665
2666 class LUDiagnoseOS(NoHooksLU):
2667   """Logical unit for OS diagnose/query.
2668
2669   """
2670   _OP_REQP = ["output_fields", "names"]
2671   REQ_BGL = False
2672   _FIELDS_STATIC = utils.FieldSet()
2673   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2674   # Fields that need calculation of global os validity
2675   _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2676
2677   def ExpandNames(self):
2678     if self.op.names:
2679       raise errors.OpPrereqError("Selective OS query not supported",
2680                                  errors.ECODE_INVAL)
2681
2682     _CheckOutputFields(static=self._FIELDS_STATIC,
2683                        dynamic=self._FIELDS_DYNAMIC,
2684                        selected=self.op.output_fields)
2685
2686     # Lock all nodes, in shared mode
2687     # Temporary removal of locks, should be reverted later
2688     # TODO: reintroduce locks when they are lighter-weight
2689     self.needed_locks = {}
2690     #self.share_locks[locking.LEVEL_NODE] = 1
2691     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2692
2693   def CheckPrereq(self):
2694     """Check prerequisites.
2695
2696     """
2697
2698   @staticmethod
2699   def _DiagnoseByOS(rlist):
2700     """Remaps a per-node return list into an a per-os per-node dictionary
2701
2702     @param rlist: a map with node names as keys and OS objects as values
2703
2704     @rtype: dict
2705     @return: a dictionary with osnames as keys and as value another map, with
2706         nodes as keys and tuples of (path, status, diagnose) as values, eg::
2707
2708           {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2709                                      (/srv/..., False, "invalid api")],
2710                            "node2": [(/srv/..., True, "")]}
2711           }
2712
2713     """
2714     all_os = {}
2715     # we build here the list of nodes that didn't fail the RPC (at RPC
2716     # level), so that nodes with a non-responding node daemon don't
2717     # make all OSes invalid
2718     good_nodes = [node_name for node_name in rlist
2719                   if not rlist[node_name].fail_msg]
2720     for node_name, nr in rlist.items():
2721       if nr.fail_msg or not nr.payload:
2722         continue
2723       for name, path, status, diagnose, variants in nr.payload:
2724         if name not in all_os:
2725           # build a list of nodes for this os containing empty lists
2726           # for each node in node_list
2727           all_os[name] = {}
2728           for nname in good_nodes:
2729             all_os[name][nname] = []
2730         all_os[name][node_name].append((path, status, diagnose, variants))
2731     return all_os
2732
2733   def Exec(self, feedback_fn):
2734     """Compute the list of OSes.
2735
2736     """
2737     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2738     node_data = self.rpc.call_os_diagnose(valid_nodes)
2739     pol = self._DiagnoseByOS(node_data)
2740     output = []
2741     calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2742     calc_variants = "variants" in self.op.output_fields
2743
2744     for os_name, os_data in pol.items():
2745       row = []
2746       if calc_valid:
2747         valid = True
2748         variants = None
2749         for osl in os_data.values():
2750           valid = valid and osl and osl[0][1]
2751           if not valid:
2752             variants = None
2753             break
2754           if calc_variants:
2755             node_variants = osl[0][3]
2756             if variants is None:
2757               variants = node_variants
2758             else:
2759               variants = [v for v in variants if v in node_variants]
2760
2761       for field in self.op.output_fields:
2762         if field == "name":
2763           val = os_name
2764         elif field == "valid":
2765           val = valid
2766         elif field == "node_status":
2767           # this is just a copy of the dict
2768           val = {}
2769           for node_name, nos_list in os_data.items():
2770             val[node_name] = nos_list
2771         elif field == "variants":
2772           val =  variants
2773         else:
2774           raise errors.ParameterError(field)
2775         row.append(val)
2776       output.append(row)
2777
2778     return output
2779
2780
2781 class LURemoveNode(LogicalUnit):
2782   """Logical unit for removing a node.
2783
2784   """
2785   HPATH = "node-remove"
2786   HTYPE = constants.HTYPE_NODE
2787   _OP_REQP = ["node_name"]
2788
2789   def BuildHooksEnv(self):
2790     """Build hooks env.
2791
2792     This doesn't run on the target node in the pre phase as a failed
2793     node would then be impossible to remove.
2794
2795     """
2796     env = {
2797       "OP_TARGET": self.op.node_name,
2798       "NODE_NAME": self.op.node_name,
2799       }
2800     all_nodes = self.cfg.GetNodeList()
2801     try:
2802       all_nodes.remove(self.op.node_name)
2803     except ValueError:
2804       logging.warning("Node %s which is about to be removed not found"
2805                       " in the all nodes list", self.op.node_name)
2806     return env, all_nodes, all_nodes
2807
2808   def CheckPrereq(self):
2809     """Check prerequisites.
2810
2811     This checks:
2812      - the node exists in the configuration
2813      - it does not have primary or secondary instances
2814      - it's not the master
2815
2816     Any errors are signaled by raising errors.OpPrereqError.
2817
2818     """
2819     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2820     node = self.cfg.GetNodeInfo(self.op.node_name)
2821     assert node is not None
2822
2823     instance_list = self.cfg.GetInstanceList()
2824
2825     masternode = self.cfg.GetMasterNode()
2826     if node.name == masternode:
2827       raise errors.OpPrereqError("Node is the master node,"
2828                                  " you need to failover first.",
2829                                  errors.ECODE_INVAL)
2830
2831     for instance_name in instance_list:
2832       instance = self.cfg.GetInstanceInfo(instance_name)
2833       if node.name in instance.all_nodes:
2834         raise errors.OpPrereqError("Instance %s is still running on the node,"
2835                                    " please remove first." % instance_name,
2836                                    errors.ECODE_INVAL)
2837     self.op.node_name = node.name
2838     self.node = node
2839
2840   def Exec(self, feedback_fn):
2841     """Removes the node from the cluster.
2842
2843     """
2844     node = self.node
2845     logging.info("Stopping the node daemon and removing configs from node %s",
2846                  node.name)
2847
2848     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2849
2850     # Promote nodes to master candidate as needed
2851     _AdjustCandidatePool(self, exceptions=[node.name])
2852     self.context.RemoveNode(node.name)
2853
2854     # Run post hooks on the node before it's removed
2855     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2856     try:
2857       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2858     except:
2859       # pylint: disable-msg=W0702
2860       self.LogWarning("Errors occurred running hooks on %s" % node.name)
2861
2862     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2863     msg = result.fail_msg
2864     if msg:
2865       self.LogWarning("Errors encountered on the remote node while leaving"
2866                       " the cluster: %s", msg)
2867
2868
2869 class LUQueryNodes(NoHooksLU):
2870   """Logical unit for querying nodes.
2871
2872   """
2873   # pylint: disable-msg=W0142
2874   _OP_REQP = ["output_fields", "names", "use_locking"]
2875   REQ_BGL = False
2876
2877   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2878                     "master_candidate", "offline", "drained"]
2879
2880   _FIELDS_DYNAMIC = utils.FieldSet(
2881     "dtotal", "dfree",
2882     "mtotal", "mnode", "mfree",
2883     "bootid",
2884     "ctotal", "cnodes", "csockets",
2885     )
2886
2887   _FIELDS_STATIC = utils.FieldSet(*[
2888     "pinst_cnt", "sinst_cnt",
2889     "pinst_list", "sinst_list",
2890     "pip", "sip", "tags",
2891     "master",
2892     "role"] + _SIMPLE_FIELDS
2893     )
2894
2895   def ExpandNames(self):
2896     _CheckOutputFields(static=self._FIELDS_STATIC,
2897                        dynamic=self._FIELDS_DYNAMIC,
2898                        selected=self.op.output_fields)
2899
2900     self.needed_locks = {}
2901     self.share_locks[locking.LEVEL_NODE] = 1
2902
2903     if self.op.names:
2904       self.wanted = _GetWantedNodes(self, self.op.names)
2905     else:
2906       self.wanted = locking.ALL_SET
2907
2908     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2909     self.do_locking = self.do_node_query and self.op.use_locking
2910     if self.do_locking:
2911       # if we don't request only static fields, we need to lock the nodes
2912       self.needed_locks[locking.LEVEL_NODE] = self.wanted
2913
2914   def CheckPrereq(self):
2915     """Check prerequisites.
2916
2917     """
2918     # The validation of the node list is done in the _GetWantedNodes,
2919     # if non empty, and if empty, there's no validation to do
2920     pass
2921
2922   def Exec(self, feedback_fn):
2923     """Computes the list of nodes and their attributes.
2924
2925     """
2926     all_info = self.cfg.GetAllNodesInfo()
2927     if self.do_locking:
2928       nodenames = self.acquired_locks[locking.LEVEL_NODE]
2929     elif self.wanted != locking.ALL_SET:
2930       nodenames = self.wanted
2931       missing = set(nodenames).difference(all_info.keys())
2932       if missing:
2933         raise errors.OpExecError(
2934           "Some nodes were removed before retrieving their data: %s" % missing)
2935     else:
2936       nodenames = all_info.keys()
2937
2938     nodenames = utils.NiceSort(nodenames)
2939     nodelist = [all_info[name] for name in nodenames]
2940
2941     # begin data gathering
2942
2943     if self.do_node_query:
2944       live_data = {}
2945       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2946                                           self.cfg.GetHypervisorType())
2947       for name in nodenames:
2948         nodeinfo = node_data[name]
2949         if not nodeinfo.fail_msg and nodeinfo.payload:
2950           nodeinfo = nodeinfo.payload
2951           fn = utils.TryConvert
2952           live_data[name] = {
2953             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2954             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2955             "mfree": fn(int, nodeinfo.get('memory_free', None)),
2956             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2957             "dfree": fn(int, nodeinfo.get('vg_free', None)),
2958             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2959             "bootid": nodeinfo.get('bootid', None),
2960             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2961             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2962             }
2963         else:
2964           live_data[name] = {}
2965     else:
2966       live_data = dict.fromkeys(nodenames, {})
2967
2968     node_to_primary = dict([(name, set()) for name in nodenames])
2969     node_to_secondary = dict([(name, set()) for name in nodenames])
2970
2971     inst_fields = frozenset(("pinst_cnt", "pinst_list",
2972                              "sinst_cnt", "sinst_list"))
2973     if inst_fields & frozenset(self.op.output_fields):
2974       inst_data = self.cfg.GetAllInstancesInfo()
2975
2976       for inst in inst_data.values():
2977         if inst.primary_node in node_to_primary:
2978           node_to_primary[inst.primary_node].add(inst.name)
2979         for secnode in inst.secondary_nodes:
2980           if secnode in node_to_secondary:
2981             node_to_secondary[secnode].add(inst.name)
2982
2983     master_node = self.cfg.GetMasterNode()
2984
2985     # end data gathering
2986
2987     output = []
2988     for node in nodelist:
2989       node_output = []
2990       for field in self.op.output_fields:
2991         if field in self._SIMPLE_FIELDS:
2992           val = getattr(node, field)
2993         elif field == "pinst_list":
2994           val = list(node_to_primary[node.name])
2995         elif field == "sinst_list":
2996           val = list(node_to_secondary[node.name])
2997         elif field == "pinst_cnt":
2998           val = len(node_to_primary[node.name])
2999         elif field == "sinst_cnt":
3000           val = len(node_to_secondary[node.name])
3001         elif field == "pip":
3002           val = node.primary_ip
3003         elif field == "sip":
3004           val = node.secondary_ip
3005         elif field == "tags":
3006           val = list(node.GetTags())
3007         elif field == "master":
3008           val = node.name == master_node
3009         elif self._FIELDS_DYNAMIC.Matches(field):
3010           val = live_data[node.name].get(field, None)
3011         elif field == "role":
3012           if node.name == master_node:
3013             val = "M"
3014           elif node.master_candidate:
3015             val = "C"
3016           elif node.drained:
3017             val = "D"
3018           elif node.offline:
3019             val = "O"
3020           else:
3021             val = "R"
3022         else:
3023           raise errors.ParameterError(field)
3024         node_output.append(val)
3025       output.append(node_output)
3026
3027     return output
3028
3029
3030 class LUQueryNodeVolumes(NoHooksLU):
3031   """Logical unit for getting volumes on node(s).
3032
3033   """
3034   _OP_REQP = ["nodes", "output_fields"]
3035   REQ_BGL = False
3036   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3037   _FIELDS_STATIC = utils.FieldSet("node")
3038
3039   def ExpandNames(self):
3040     _CheckOutputFields(static=self._FIELDS_STATIC,
3041                        dynamic=self._FIELDS_DYNAMIC,
3042                        selected=self.op.output_fields)
3043
3044     self.needed_locks = {}
3045     self.share_locks[locking.LEVEL_NODE] = 1
3046     if not self.op.nodes:
3047       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3048     else:
3049       self.needed_locks[locking.LEVEL_NODE] = \
3050         _GetWantedNodes(self, self.op.nodes)
3051
3052   def CheckPrereq(self):
3053     """Check prerequisites.
3054
3055     This checks that the fields required are valid output fields.
3056
3057     """
3058     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3059
3060   def Exec(self, feedback_fn):
3061     """Computes the list of nodes and their attributes.
3062
3063     """
3064     nodenames = self.nodes
3065     volumes = self.rpc.call_node_volumes(nodenames)
3066
3067     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3068              in self.cfg.GetInstanceList()]
3069
3070     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3071
3072     output = []
3073     for node in nodenames:
3074       nresult = volumes[node]
3075       if nresult.offline:
3076         continue
3077       msg = nresult.fail_msg
3078       if msg:
3079         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3080         continue
3081
3082       node_vols = nresult.payload[:]
3083       node_vols.sort(key=lambda vol: vol['dev'])
3084
3085       for vol in node_vols:
3086         node_output = []
3087         for field in self.op.output_fields:
3088           if field == "node":
3089             val = node
3090           elif field == "phys":
3091             val = vol['dev']
3092           elif field == "vg":
3093             val = vol['vg']
3094           elif field == "name":
3095             val = vol['name']
3096           elif field == "size":
3097             val = int(float(vol['size']))
3098           elif field == "instance":
3099             for inst in ilist:
3100               if node not in lv_by_node[inst]:
3101                 continue
3102               if vol['name'] in lv_by_node[inst][node]:
3103                 val = inst.name
3104                 break
3105             else:
3106               val = '-'
3107           else:
3108             raise errors.ParameterError(field)
3109           node_output.append(str(val))
3110
3111         output.append(node_output)
3112
3113     return output
3114
3115
3116 class LUQueryNodeStorage(NoHooksLU):
3117   """Logical unit for getting information on storage units on node(s).
3118
3119   """
3120   _OP_REQP = ["nodes", "storage_type", "output_fields"]
3121   REQ_BGL = False
3122   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3123
3124   def CheckArguments(self):
3125     _CheckStorageType(self.op.storage_type)
3126
3127     _CheckOutputFields(static=self._FIELDS_STATIC,
3128                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3129                        selected=self.op.output_fields)
3130
3131   def ExpandNames(self):
3132     self.needed_locks = {}
3133     self.share_locks[locking.LEVEL_NODE] = 1
3134
3135     if self.op.nodes:
3136       self.needed_locks[locking.LEVEL_NODE] = \
3137         _GetWantedNodes(self, self.op.nodes)
3138     else:
3139       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3140
3141   def CheckPrereq(self):
3142     """Check prerequisites.
3143
3144     This checks that the fields required are valid output fields.
3145
3146     """
3147     self.op.name = getattr(self.op, "name", None)
3148
3149     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3150
3151   def Exec(self, feedback_fn):
3152     """Computes the list of nodes and their attributes.
3153
3154     """
3155     # Always get name to sort by
3156     if constants.SF_NAME in self.op.output_fields:
3157       fields = self.op.output_fields[:]
3158     else:
3159       fields = [constants.SF_NAME] + self.op.output_fields
3160
3161     # Never ask for node or type as it's only known to the LU
3162     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3163       while extra in fields:
3164         fields.remove(extra)
3165
3166     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3167     name_idx = field_idx[constants.SF_NAME]
3168
3169     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3170     data = self.rpc.call_storage_list(self.nodes,
3171                                       self.op.storage_type, st_args,
3172                                       self.op.name, fields)
3173
3174     result = []
3175
3176     for node in utils.NiceSort(self.nodes):
3177       nresult = data[node]
3178       if nresult.offline:
3179         continue
3180
3181       msg = nresult.fail_msg
3182       if msg:
3183         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3184         continue
3185
3186       rows = dict([(row[name_idx], row) for row in nresult.payload])
3187
3188       for name in utils.NiceSort(rows.keys()):
3189         row = rows[name]
3190
3191         out = []
3192
3193         for field in self.op.output_fields:
3194           if field == constants.SF_NODE:
3195             val = node
3196           elif field == constants.SF_TYPE:
3197             val = self.op.storage_type
3198           elif field in field_idx:
3199             val = row[field_idx[field]]
3200           else:
3201             raise errors.ParameterError(field)
3202
3203           out.append(val)
3204
3205         result.append(out)
3206
3207     return result
3208
3209
3210 class LUModifyNodeStorage(NoHooksLU):
3211   """Logical unit for modifying a storage volume on a node.
3212
3213   """
3214   _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3215   REQ_BGL = False
3216
3217   def CheckArguments(self):
3218     self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3219
3220     _CheckStorageType(self.op.storage_type)
3221
3222   def ExpandNames(self):
3223     self.needed_locks = {
3224       locking.LEVEL_NODE: self.op.node_name,
3225       }
3226
3227   def CheckPrereq(self):
3228     """Check prerequisites.
3229
3230     """
3231     storage_type = self.op.storage_type
3232
3233     try:
3234       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3235     except KeyError:
3236       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3237                                  " modified" % storage_type,
3238                                  errors.ECODE_INVAL)
3239
3240     diff = set(self.op.changes.keys()) - modifiable
3241     if diff:
3242       raise errors.OpPrereqError("The following fields can not be modified for"
3243                                  " storage units of type '%s': %r" %
3244                                  (storage_type, list(diff)),
3245                                  errors.ECODE_INVAL)
3246
3247   def Exec(self, feedback_fn):
3248     """Computes the list of nodes and their attributes.
3249
3250     """
3251     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3252     result = self.rpc.call_storage_modify(self.op.node_name,
3253                                           self.op.storage_type, st_args,
3254                                           self.op.name, self.op.changes)
3255     result.Raise("Failed to modify storage unit '%s' on %s" %
3256                  (self.op.name, self.op.node_name))
3257
3258
3259 class LUAddNode(LogicalUnit):
3260   """Logical unit for adding node to the cluster.
3261
3262   """
3263   HPATH = "node-add"
3264   HTYPE = constants.HTYPE_NODE
3265   _OP_REQP = ["node_name"]
3266
3267   def CheckArguments(self):
3268     # validate/normalize the node name
3269     self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3270
3271   def BuildHooksEnv(self):
3272     """Build hooks env.
3273
3274     This will run on all nodes before, and on all nodes + the new node after.
3275
3276     """
3277     env = {
3278       "OP_TARGET": self.op.node_name,
3279       "NODE_NAME": self.op.node_name,
3280       "NODE_PIP": self.op.primary_ip,
3281       "NODE_SIP": self.op.secondary_ip,
3282       }
3283     nodes_0 = self.cfg.GetNodeList()
3284     nodes_1 = nodes_0 + [self.op.node_name, ]
3285     return env, nodes_0, nodes_1
3286
3287   def CheckPrereq(self):
3288     """Check prerequisites.
3289
3290     This checks:
3291      - the new node is not already in the config
3292      - it is resolvable
3293      - its parameters (single/dual homed) matches the cluster
3294
3295     Any errors are signaled by raising errors.OpPrereqError.
3296
3297     """
3298     node_name = self.op.node_name
3299     cfg = self.cfg
3300
3301     dns_data = utils.GetHostInfo(node_name)
3302
3303     node = dns_data.name
3304     primary_ip = self.op.primary_ip = dns_data.ip
3305     secondary_ip = getattr(self.op, "secondary_ip", None)
3306     if secondary_ip is None:
3307       secondary_ip = primary_ip
3308     if not utils.IsValidIP(secondary_ip):
3309       raise errors.OpPrereqError("Invalid secondary IP given",
3310                                  errors.ECODE_INVAL)
3311     self.op.secondary_ip = secondary_ip
3312
3313     node_list = cfg.GetNodeList()
3314     if not self.op.readd and node in node_list:
3315       raise errors.OpPrereqError("Node %s is already in the configuration" %
3316                                  node, errors.ECODE_EXISTS)
3317     elif self.op.readd and node not in node_list:
3318       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3319                                  errors.ECODE_NOENT)
3320
3321     for existing_node_name in node_list:
3322       existing_node = cfg.GetNodeInfo(existing_node_name)
3323
3324       if self.op.readd and node == existing_node_name:
3325         if (existing_node.primary_ip != primary_ip or
3326             existing_node.secondary_ip != secondary_ip):
3327           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3328                                      " address configuration as before",
3329                                      errors.ECODE_INVAL)
3330         continue
3331
3332       if (existing_node.primary_ip == primary_ip or
3333           existing_node.secondary_ip == primary_ip or
3334           existing_node.primary_ip == secondary_ip or
3335           existing_node.secondary_ip == secondary_ip):
3336         raise errors.OpPrereqError("New node ip address(es) conflict with"
3337                                    " existing node %s" % existing_node.name,
3338                                    errors.ECODE_NOTUNIQUE)
3339
3340     # check that the type of the node (single versus dual homed) is the
3341     # same as for the master
3342     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3343     master_singlehomed = myself.secondary_ip == myself.primary_ip
3344     newbie_singlehomed = secondary_ip == primary_ip
3345     if master_singlehomed != newbie_singlehomed:
3346       if master_singlehomed:
3347         raise errors.OpPrereqError("The master has no private ip but the"
3348                                    " new node has one",
3349                                    errors.ECODE_INVAL)
3350       else:
3351         raise errors.OpPrereqError("The master has a private ip but the"
3352                                    " new node doesn't have one",
3353                                    errors.ECODE_INVAL)
3354
3355     # checks reachability
3356     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3357       raise errors.OpPrereqError("Node not reachable by ping",
3358                                  errors.ECODE_ENVIRON)
3359
3360     if not newbie_singlehomed:
3361       # check reachability from my secondary ip to newbie's secondary ip
3362       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3363                            source=myself.secondary_ip):
3364         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3365                                    " based ping to noded port",
3366                                    errors.ECODE_ENVIRON)
3367
3368     if self.op.readd:
3369       exceptions = [node]
3370     else:
3371       exceptions = []
3372
3373     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3374
3375     if self.op.readd:
3376       self.new_node = self.cfg.GetNodeInfo(node)
3377       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3378     else:
3379       self.new_node = objects.Node(name=node,
3380                                    primary_ip=primary_ip,
3381                                    secondary_ip=secondary_ip,
3382                                    master_candidate=self.master_candidate,
3383                                    offline=False, drained=False)
3384
3385   def Exec(self, feedback_fn):
3386     """Adds the new node to the cluster.
3387
3388     """
3389     new_node = self.new_node
3390     node = new_node.name
3391
3392     # for re-adds, reset the offline/drained/master-candidate flags;
3393     # we need to reset here, otherwise offline would prevent RPC calls
3394     # later in the procedure; this also means that if the re-add
3395     # fails, we are left with a non-offlined, broken node
3396     if self.op.readd:
3397       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3398       self.LogInfo("Readding a node, the offline/drained flags were reset")
3399       # if we demote the node, we do cleanup later in the procedure
3400       new_node.master_candidate = self.master_candidate
3401
3402     # notify the user about any possible mc promotion
3403     if new_node.master_candidate:
3404       self.LogInfo("Node will be a master candidate")
3405
3406     # check connectivity
3407     result = self.rpc.call_version([node])[node]
3408     result.Raise("Can't get version information from node %s" % node)
3409     if constants.PROTOCOL_VERSION == result.payload:
3410       logging.info("Communication to node %s fine, sw version %s match",
3411                    node, result.payload)
3412     else:
3413       raise errors.OpExecError("Version mismatch master version %s,"
3414                                " node version %s" %
3415                                (constants.PROTOCOL_VERSION, result.payload))
3416
3417     # setup ssh on node
3418     if self.cfg.GetClusterInfo().modify_ssh_setup:
3419       logging.info("Copy ssh key to node %s", node)
3420       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3421       keyarray = []
3422       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3423                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3424                   priv_key, pub_key]
3425
3426       for i in keyfiles:
3427         keyarray.append(utils.ReadFile(i))
3428
3429       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3430                                       keyarray[2], keyarray[3], keyarray[4],
3431                                       keyarray[5])
3432       result.Raise("Cannot transfer ssh keys to the new node")
3433
3434     # Add node to our /etc/hosts, and add key to known_hosts
3435     if self.cfg.GetClusterInfo().modify_etc_hosts:
3436       utils.AddHostToEtcHosts(new_node.name)
3437
3438     if new_node.secondary_ip != new_node.primary_ip:
3439       result = self.rpc.call_node_has_ip_address(new_node.name,
3440                                                  new_node.secondary_ip)
3441       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3442                    prereq=True, ecode=errors.ECODE_ENVIRON)
3443       if not result.payload:
3444         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3445                                  " you gave (%s). Please fix and re-run this"
3446                                  " command." % new_node.secondary_ip)
3447
3448     node_verify_list = [self.cfg.GetMasterNode()]
3449     node_verify_param = {
3450       constants.NV_NODELIST: [node],
3451       # TODO: do a node-net-test as well?
3452     }
3453
3454     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3455                                        self.cfg.GetClusterName())
3456     for verifier in node_verify_list:
3457       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3458       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3459       if nl_payload:
3460         for failed in nl_payload:
3461           feedback_fn("ssh/hostname verification failed"
3462                       " (checking from %s): %s" %
3463                       (verifier, nl_payload[failed]))
3464         raise errors.OpExecError("ssh/hostname verification failed.")
3465
3466     if self.op.readd:
3467       _RedistributeAncillaryFiles(self)
3468       self.context.ReaddNode(new_node)
3469       # make sure we redistribute the config
3470       self.cfg.Update(new_node, feedback_fn)
3471       # and make sure the new node will not have old files around
3472       if not new_node.master_candidate:
3473         result = self.rpc.call_node_demote_from_mc(new_node.name)
3474         msg = result.fail_msg
3475         if msg:
3476           self.LogWarning("Node failed to demote itself from master"
3477                           " candidate status: %s" % msg)
3478     else:
3479       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3480       self.context.AddNode(new_node, self.proc.GetECId())
3481
3482
3483 class LUSetNodeParams(LogicalUnit):
3484   """Modifies the parameters of a node.
3485
3486   """
3487   HPATH = "node-modify"
3488   HTYPE = constants.HTYPE_NODE
3489   _OP_REQP = ["node_name"]
3490   REQ_BGL = False
3491
3492   def CheckArguments(self):
3493     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3494     _CheckBooleanOpField(self.op, 'master_candidate')
3495     _CheckBooleanOpField(self.op, 'offline')
3496     _CheckBooleanOpField(self.op, 'drained')
3497     _CheckBooleanOpField(self.op, 'auto_promote')
3498     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3499     if all_mods.count(None) == 3:
3500       raise errors.OpPrereqError("Please pass at least one modification",
3501                                  errors.ECODE_INVAL)
3502     if all_mods.count(True) > 1:
3503       raise errors.OpPrereqError("Can't set the node into more than one"
3504                                  " state at the same time",
3505                                  errors.ECODE_INVAL)
3506
3507     # Boolean value that tells us whether we're offlining or draining the node
3508     self.offline_or_drain = (self.op.offline == True or
3509                              self.op.drained == True)
3510     self.deoffline_or_drain = (self.op.offline == False or
3511                                self.op.drained == False)
3512     self.might_demote = (self.op.master_candidate == False or
3513                          self.offline_or_drain)
3514
3515     self.lock_all = self.op.auto_promote and self.might_demote
3516
3517
3518   def ExpandNames(self):
3519     if self.lock_all:
3520       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3521     else:
3522       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3523
3524   def BuildHooksEnv(self):
3525     """Build hooks env.
3526
3527     This runs on the master node.
3528
3529     """
3530     env = {
3531       "OP_TARGET": self.op.node_name,
3532       "MASTER_CANDIDATE": str(self.op.master_candidate),
3533       "OFFLINE": str(self.op.offline),
3534       "DRAINED": str(self.op.drained),
3535       }
3536     nl = [self.cfg.GetMasterNode(),
3537           self.op.node_name]
3538     return env, nl, nl
3539
3540   def CheckPrereq(self):
3541     """Check prerequisites.
3542
3543     This only checks the instance list against the existing names.
3544
3545     """
3546     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3547
3548     if (self.op.master_candidate is not None or
3549         self.op.drained is not None or
3550         self.op.offline is not None):
3551       # we can't change the master's node flags
3552       if self.op.node_name == self.cfg.GetMasterNode():
3553         raise errors.OpPrereqError("The master role can be changed"
3554                                    " only via masterfailover",
3555                                    errors.ECODE_INVAL)
3556
3557
3558     if node.master_candidate and self.might_demote and not self.lock_all:
3559       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3560       # check if after removing the current node, we're missing master
3561       # candidates
3562       (mc_remaining, mc_should, _) = \
3563           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3564       if mc_remaining < mc_should:
3565         raise errors.OpPrereqError("Not enough master candidates, please"
3566                                    " pass auto_promote to allow promotion",
3567                                    errors.ECODE_INVAL)
3568
3569     if (self.op.master_candidate == True and
3570         ((node.offline and not self.op.offline == False) or
3571          (node.drained and not self.op.drained == False))):
3572       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3573                                  " to master_candidate" % node.name,
3574                                  errors.ECODE_INVAL)
3575
3576     # If we're being deofflined/drained, we'll MC ourself if needed
3577     if (self.deoffline_or_drain and not self.offline_or_drain and not
3578         self.op.master_candidate == True and not node.master_candidate):
3579       self.op.master_candidate = _DecideSelfPromotion(self)
3580       if self.op.master_candidate:
3581         self.LogInfo("Autopromoting node to master candidate")
3582
3583     return
3584
3585   def Exec(self, feedback_fn):
3586     """Modifies a node.
3587
3588     """
3589     node = self.node
3590
3591     result = []
3592     changed_mc = False
3593
3594     if self.op.offline is not None:
3595       node.offline = self.op.offline
3596       result.append(("offline", str(self.op.offline)))
3597       if self.op.offline == True:
3598         if node.master_candidate:
3599           node.master_candidate = False
3600           changed_mc = True
3601           result.append(("master_candidate", "auto-demotion due to offline"))
3602         if node.drained:
3603           node.drained = False
3604           result.append(("drained", "clear drained status due to offline"))
3605
3606     if self.op.master_candidate is not None:
3607       node.master_candidate = self.op.master_candidate
3608       changed_mc = True
3609       result.append(("master_candidate", str(self.op.master_candidate)))
3610       if self.op.master_candidate == False:
3611         rrc = self.rpc.call_node_demote_from_mc(node.name)
3612         msg = rrc.fail_msg
3613         if msg:
3614           self.LogWarning("Node failed to demote itself: %s" % msg)
3615
3616     if self.op.drained is not None:
3617       node.drained = self.op.drained
3618       result.append(("drained", str(self.op.drained)))
3619       if self.op.drained == True:
3620         if node.master_candidate:
3621           node.master_candidate = False
3622           changed_mc = True
3623           result.append(("master_candidate", "auto-demotion due to drain"))
3624           rrc = self.rpc.call_node_demote_from_mc(node.name)
3625           msg = rrc.fail_msg
3626           if msg:
3627             self.LogWarning("Node failed to demote itself: %s" % msg)
3628         if node.offline:
3629           node.offline = False
3630           result.append(("offline", "clear offline status due to drain"))
3631
3632     # we locked all nodes, we adjust the CP before updating this node
3633     if self.lock_all:
3634       _AdjustCandidatePool(self, [node.name])
3635
3636     # this will trigger configuration file update, if needed
3637     self.cfg.Update(node, feedback_fn)
3638
3639     # this will trigger job queue propagation or cleanup
3640     if changed_mc:
3641       self.context.ReaddNode(node)
3642
3643     return result
3644
3645
3646 class LUPowercycleNode(NoHooksLU):
3647   """Powercycles a node.
3648
3649   """
3650   _OP_REQP = ["node_name", "force"]
3651   REQ_BGL = False
3652
3653   def CheckArguments(self):
3654     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3655     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3656       raise errors.OpPrereqError("The node is the master and the force"
3657                                  " parameter was not set",
3658                                  errors.ECODE_INVAL)
3659
3660   def ExpandNames(self):
3661     """Locking for PowercycleNode.
3662
3663     This is a last-resort option and shouldn't block on other
3664     jobs. Therefore, we grab no locks.
3665
3666     """
3667     self.needed_locks = {}
3668
3669   def CheckPrereq(self):
3670     """Check prerequisites.
3671
3672     This LU has no prereqs.
3673
3674     """
3675     pass
3676
3677   def Exec(self, feedback_fn):
3678     """Reboots a node.
3679
3680     """
3681     result = self.rpc.call_node_powercycle(self.op.node_name,
3682                                            self.cfg.GetHypervisorType())
3683     result.Raise("Failed to schedule the reboot")
3684     return result.payload
3685
3686
3687 class LUQueryClusterInfo(NoHooksLU):
3688   """Query cluster configuration.
3689
3690   """
3691   _OP_REQP = []
3692   REQ_BGL = False
3693
3694   def ExpandNames(self):
3695     self.needed_locks = {}
3696
3697   def CheckPrereq(self):
3698     """No prerequsites needed for this LU.
3699
3700     """
3701     pass
3702
3703   def Exec(self, feedback_fn):
3704     """Return cluster config.
3705
3706     """
3707     cluster = self.cfg.GetClusterInfo()
3708     os_hvp = {}
3709
3710     # Filter just for enabled hypervisors
3711     for os_name, hv_dict in cluster.os_hvp.items():
3712       os_hvp[os_name] = {}
3713       for hv_name, hv_params in hv_dict.items():
3714         if hv_name in cluster.enabled_hypervisors:
3715           os_hvp[os_name][hv_name] = hv_params
3716
3717     result = {
3718       "software_version": constants.RELEASE_VERSION,
3719       "protocol_version": constants.PROTOCOL_VERSION,
3720       "config_version": constants.CONFIG_VERSION,
3721       "os_api_version": max(constants.OS_API_VERSIONS),
3722       "export_version": constants.EXPORT_VERSION,
3723       "architecture": (platform.architecture()[0], platform.machine()),
3724       "name": cluster.cluster_name,
3725       "master": cluster.master_node,
3726       "default_hypervisor": cluster.enabled_hypervisors[0],
3727       "enabled_hypervisors": cluster.enabled_hypervisors,
3728       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3729                         for hypervisor_name in cluster.enabled_hypervisors]),
3730       "os_hvp": os_hvp,
3731       "beparams": cluster.beparams,
3732       "nicparams": cluster.nicparams,
3733       "candidate_pool_size": cluster.candidate_pool_size,
3734       "master_netdev": cluster.master_netdev,
3735       "volume_group_name": cluster.volume_group_name,
3736       "file_storage_dir": cluster.file_storage_dir,
3737       "maintain_node_health": cluster.maintain_node_health,
3738       "ctime": cluster.ctime,
3739       "mtime": cluster.mtime,
3740       "uuid": cluster.uuid,
3741       "tags": list(cluster.GetTags()),
3742       "uid_pool": cluster.uid_pool,
3743       }
3744
3745     return result
3746
3747
3748 class LUQueryConfigValues(NoHooksLU):
3749   """Return configuration values.
3750
3751   """
3752   _OP_REQP = []
3753   REQ_BGL = False
3754   _FIELDS_DYNAMIC = utils.FieldSet()
3755   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3756                                   "watcher_pause")
3757
3758   def ExpandNames(self):
3759     self.needed_locks = {}
3760
3761     _CheckOutputFields(static=self._FIELDS_STATIC,
3762                        dynamic=self._FIELDS_DYNAMIC,
3763                        selected=self.op.output_fields)
3764
3765   def CheckPrereq(self):
3766     """No prerequisites.
3767
3768     """
3769     pass
3770
3771   def Exec(self, feedback_fn):
3772     """Dump a representation of the cluster config to the standard output.
3773
3774     """
3775     values = []
3776     for field in self.op.output_fields:
3777       if field == "cluster_name":
3778         entry = self.cfg.GetClusterName()
3779       elif field == "master_node":
3780         entry = self.cfg.GetMasterNode()
3781       elif field == "drain_flag":
3782         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3783       elif field == "watcher_pause":
3784         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3785       else:
3786         raise errors.ParameterError(field)
3787       values.append(entry)
3788     return values
3789
3790
3791 class LUActivateInstanceDisks(NoHooksLU):
3792   """Bring up an instance's disks.
3793
3794   """
3795   _OP_REQP = ["instance_name"]
3796   REQ_BGL = False
3797
3798   def ExpandNames(self):
3799     self._ExpandAndLockInstance()
3800     self.needed_locks[locking.LEVEL_NODE] = []
3801     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3802
3803   def DeclareLocks(self, level):
3804     if level == locking.LEVEL_NODE:
3805       self._LockInstancesNodes()
3806
3807   def CheckPrereq(self):
3808     """Check prerequisites.
3809
3810     This checks that the instance is in the cluster.
3811
3812     """
3813     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3814     assert self.instance is not None, \
3815       "Cannot retrieve locked instance %s" % self.op.instance_name
3816     _CheckNodeOnline(self, self.instance.primary_node)
3817     if not hasattr(self.op, "ignore_size"):
3818       self.op.ignore_size = False
3819
3820   def Exec(self, feedback_fn):
3821     """Activate the disks.
3822
3823     """
3824     disks_ok, disks_info = \
3825               _AssembleInstanceDisks(self, self.instance,
3826                                      ignore_size=self.op.ignore_size)
3827     if not disks_ok:
3828       raise errors.OpExecError("Cannot activate block devices")
3829
3830     return disks_info
3831
3832
3833 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3834                            ignore_size=False):
3835   """Prepare the block devices for an instance.
3836
3837   This sets up the block devices on all nodes.
3838
3839   @type lu: L{LogicalUnit}
3840   @param lu: the logical unit on whose behalf we execute
3841   @type instance: L{objects.Instance}
3842   @param instance: the instance for whose disks we assemble
3843   @type ignore_secondaries: boolean
3844   @param ignore_secondaries: if true, errors on secondary nodes
3845       won't result in an error return from the function
3846   @type ignore_size: boolean
3847   @param ignore_size: if true, the current known size of the disk
3848       will not be used during the disk activation, useful for cases
3849       when the size is wrong
3850   @return: False if the operation failed, otherwise a list of
3851       (host, instance_visible_name, node_visible_name)
3852       with the mapping from node devices to instance devices
3853
3854   """
3855   device_info = []
3856   disks_ok = True
3857   iname = instance.name
3858   # With the two passes mechanism we try to reduce the window of
3859   # opportunity for the race condition of switching DRBD to primary
3860   # before handshaking occured, but we do not eliminate it
3861
3862   # The proper fix would be to wait (with some limits) until the
3863   # connection has been made and drbd transitions from WFConnection
3864   # into any other network-connected state (Connected, SyncTarget,
3865   # SyncSource, etc.)
3866
3867   # 1st pass, assemble on all nodes in secondary mode
3868   for inst_disk in instance.disks:
3869     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3870       if ignore_size:
3871         node_disk = node_disk.Copy()
3872         node_disk.UnsetSize()
3873       lu.cfg.SetDiskID(node_disk, node)
3874       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3875       msg = result.fail_msg
3876       if msg:
3877         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3878                            " (is_primary=False, pass=1): %s",
3879                            inst_disk.iv_name, node, msg)
3880         if not ignore_secondaries:
3881           disks_ok = False
3882
3883   # FIXME: race condition on drbd migration to primary
3884
3885   # 2nd pass, do only the primary node
3886   for inst_disk in instance.disks:
3887     dev_path = None
3888
3889     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3890       if node != instance.primary_node:
3891         continue
3892       if ignore_size:
3893         node_disk = node_disk.Copy()
3894         node_disk.UnsetSize()
3895       lu.cfg.SetDiskID(node_disk, node)
3896       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3897       msg = result.fail_msg
3898       if msg:
3899         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3900                            " (is_primary=True, pass=2): %s",
3901                            inst_disk.iv_name, node, msg)
3902         disks_ok = False
3903       else:
3904         dev_path = result.payload
3905
3906     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3907
3908   # leave the disks configured for the primary node
3909   # this is a workaround that would be fixed better by
3910   # improving the logical/physical id handling
3911   for disk in instance.disks:
3912     lu.cfg.SetDiskID(disk, instance.primary_node)
3913
3914   return disks_ok, device_info
3915
3916
3917 def _StartInstanceDisks(lu, instance, force):
3918   """Start the disks of an instance.
3919
3920   """
3921   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3922                                            ignore_secondaries=force)
3923   if not disks_ok:
3924     _ShutdownInstanceDisks(lu, instance)
3925     if force is not None and not force:
3926       lu.proc.LogWarning("", hint="If the message above refers to a"
3927                          " secondary node,"
3928                          " you can retry the operation using '--force'.")
3929     raise errors.OpExecError("Disk consistency error")
3930
3931
3932 class LUDeactivateInstanceDisks(NoHooksLU):
3933   """Shutdown an instance's disks.
3934
3935   """
3936   _OP_REQP = ["instance_name"]
3937   REQ_BGL = False
3938
3939   def ExpandNames(self):
3940     self._ExpandAndLockInstance()
3941     self.needed_locks[locking.LEVEL_NODE] = []
3942     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3943
3944   def DeclareLocks(self, level):
3945     if level == locking.LEVEL_NODE:
3946       self._LockInstancesNodes()
3947
3948   def CheckPrereq(self):
3949     """Check prerequisites.
3950
3951     This checks that the instance is in the cluster.
3952
3953     """
3954     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3955     assert self.instance is not None, \
3956       "Cannot retrieve locked instance %s" % self.op.instance_name
3957
3958   def Exec(self, feedback_fn):
3959     """Deactivate the disks
3960
3961     """
3962     instance = self.instance
3963     _SafeShutdownInstanceDisks(self, instance)
3964
3965
3966 def _SafeShutdownInstanceDisks(lu, instance):
3967   """Shutdown block devices of an instance.
3968
3969   This function checks if an instance is running, before calling
3970   _ShutdownInstanceDisks.
3971
3972   """
3973   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3974   _ShutdownInstanceDisks(lu, instance)
3975
3976
3977 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3978   """Shutdown block devices of an instance.
3979
3980   This does the shutdown on all nodes of the instance.
3981
3982   If the ignore_primary is false, errors on the primary node are
3983   ignored.
3984
3985   """
3986   all_result = True
3987   for disk in instance.disks:
3988     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3989       lu.cfg.SetDiskID(top_disk, node)
3990       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3991       msg = result.fail_msg
3992       if msg:
3993         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3994                       disk.iv_name, node, msg)
3995         if not ignore_primary or node != instance.primary_node:
3996           all_result = False
3997   return all_result
3998
3999
4000 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4001   """Checks if a node has enough free memory.
4002
4003   This function check if a given node has the needed amount of free
4004   memory. In case the node has less memory or we cannot get the
4005   information from the node, this function raise an OpPrereqError
4006   exception.
4007
4008   @type lu: C{LogicalUnit}
4009   @param lu: a logical unit from which we get configuration data
4010   @type node: C{str}
4011   @param node: the node to check
4012   @type reason: C{str}
4013   @param reason: string to use in the error message
4014   @type requested: C{int}
4015   @param requested: the amount of memory in MiB to check for
4016   @type hypervisor_name: C{str}
4017   @param hypervisor_name: the hypervisor to ask for memory stats
4018   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4019       we cannot check the node
4020
4021   """
4022   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4023   nodeinfo[node].Raise("Can't get data from node %s" % node,
4024                        prereq=True, ecode=errors.ECODE_ENVIRON)
4025   free_mem = nodeinfo[node].payload.get('memory_free', None)
4026   if not isinstance(free_mem, int):
4027     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4028                                " was '%s'" % (node, free_mem),
4029                                errors.ECODE_ENVIRON)
4030   if requested > free_mem:
4031     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4032                                " needed %s MiB, available %s MiB" %
4033                                (node, reason, requested, free_mem),
4034                                errors.ECODE_NORES)
4035
4036
4037 def _CheckNodesFreeDisk(lu, nodenames, requested):
4038   """Checks if nodes have enough free disk space in the default VG.
4039
4040   This function check if all given nodes have the needed amount of
4041   free disk. In case any node has less disk or we cannot get the
4042   information from the node, this function raise an OpPrereqError
4043   exception.
4044
4045   @type lu: C{LogicalUnit}
4046   @param lu: a logical unit from which we get configuration data
4047   @type nodenames: C{list}
4048   @param nodenames: the list of node names to check
4049   @type requested: C{int}
4050   @param requested: the amount of disk in MiB to check for
4051   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4052       we cannot check the node
4053
4054   """
4055   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4056                                    lu.cfg.GetHypervisorType())
4057   for node in nodenames:
4058     info = nodeinfo[node]
4059     info.Raise("Cannot get current information from node %s" % node,
4060                prereq=True, ecode=errors.ECODE_ENVIRON)
4061     vg_free = info.payload.get("vg_free", None)
4062     if not isinstance(vg_free, int):
4063       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4064                                  " result was '%s'" % (node, vg_free),
4065                                  errors.ECODE_ENVIRON)
4066     if requested > vg_free:
4067       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4068                                  " required %d MiB, available %d MiB" %
4069                                  (node, requested, vg_free),
4070                                  errors.ECODE_NORES)
4071
4072
4073 class LUStartupInstance(LogicalUnit):
4074   """Starts an instance.
4075
4076   """
4077   HPATH = "instance-start"
4078   HTYPE = constants.HTYPE_INSTANCE
4079   _OP_REQP = ["instance_name", "force"]
4080   REQ_BGL = False
4081
4082   def ExpandNames(self):
4083     self._ExpandAndLockInstance()
4084
4085   def BuildHooksEnv(self):
4086     """Build hooks env.
4087
4088     This runs on master, primary and secondary nodes of the instance.
4089
4090     """
4091     env = {
4092       "FORCE": self.op.force,
4093       }
4094     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4095     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4096     return env, nl, nl
4097
4098   def CheckPrereq(self):
4099     """Check prerequisites.
4100
4101     This checks that the instance is in the cluster.
4102
4103     """
4104     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4105     assert self.instance is not None, \
4106       "Cannot retrieve locked instance %s" % self.op.instance_name
4107
4108     # extra beparams
4109     self.beparams = getattr(self.op, "beparams", {})
4110     if self.beparams:
4111       if not isinstance(self.beparams, dict):
4112         raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4113                                    " dict" % (type(self.beparams), ),
4114                                    errors.ECODE_INVAL)
4115       # fill the beparams dict
4116       utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4117       self.op.beparams = self.beparams
4118
4119     # extra hvparams
4120     self.hvparams = getattr(self.op, "hvparams", {})
4121     if self.hvparams:
4122       if not isinstance(self.hvparams, dict):
4123         raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4124                                    " dict" % (type(self.hvparams), ),
4125                                    errors.ECODE_INVAL)
4126
4127       # check hypervisor parameter syntax (locally)
4128       cluster = self.cfg.GetClusterInfo()
4129       utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4130       filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4131                                     instance.hvparams)
4132       filled_hvp.update(self.hvparams)
4133       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4134       hv_type.CheckParameterSyntax(filled_hvp)
4135       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4136       self.op.hvparams = self.hvparams
4137
4138     _CheckNodeOnline(self, instance.primary_node)
4139
4140     bep = self.cfg.GetClusterInfo().FillBE(instance)
4141     # check bridges existence
4142     _CheckInstanceBridgesExist(self, instance)
4143
4144     remote_info = self.rpc.call_instance_info(instance.primary_node,
4145                                               instance.name,
4146                                               instance.hypervisor)
4147     remote_info.Raise("Error checking node %s" % instance.primary_node,
4148                       prereq=True, ecode=errors.ECODE_ENVIRON)
4149     if not remote_info.payload: # not running already
4150       _CheckNodeFreeMemory(self, instance.primary_node,
4151                            "starting instance %s" % instance.name,
4152                            bep[constants.BE_MEMORY], instance.hypervisor)
4153
4154   def Exec(self, feedback_fn):
4155     """Start the instance.
4156
4157     """
4158     instance = self.instance
4159     force = self.op.force
4160
4161     self.cfg.MarkInstanceUp(instance.name)
4162
4163     node_current = instance.primary_node
4164
4165     _StartInstanceDisks(self, instance, force)
4166
4167     result = self.rpc.call_instance_start(node_current, instance,
4168                                           self.hvparams, self.beparams)
4169     msg = result.fail_msg
4170     if msg:
4171       _ShutdownInstanceDisks(self, instance)
4172       raise errors.OpExecError("Could not start instance: %s" % msg)
4173
4174
4175 class LURebootInstance(LogicalUnit):
4176   """Reboot an instance.
4177
4178   """
4179   HPATH = "instance-reboot"
4180   HTYPE = constants.HTYPE_INSTANCE
4181   _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4182   REQ_BGL = False
4183
4184   def CheckArguments(self):
4185     """Check the arguments.
4186
4187     """
4188     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4189                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4190
4191   def ExpandNames(self):
4192     if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4193                                    constants.INSTANCE_REBOOT_HARD,
4194                                    constants.INSTANCE_REBOOT_FULL]:
4195       raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4196                                   (constants.INSTANCE_REBOOT_SOFT,
4197                                    constants.INSTANCE_REBOOT_HARD,
4198                                    constants.INSTANCE_REBOOT_FULL))
4199     self._ExpandAndLockInstance()
4200
4201   def BuildHooksEnv(self):
4202     """Build hooks env.
4203
4204     This runs on master, primary and secondary nodes of the instance.
4205
4206     """
4207     env = {
4208       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4209       "REBOOT_TYPE": self.op.reboot_type,
4210       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4211       }
4212     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4213     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4214     return env, nl, nl
4215
4216   def CheckPrereq(self):
4217     """Check prerequisites.
4218
4219     This checks that the instance is in the cluster.
4220
4221     """
4222     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4223     assert self.instance is not None, \
4224       "Cannot retrieve locked instance %s" % self.op.instance_name
4225
4226     _CheckNodeOnline(self, instance.primary_node)
4227
4228     # check bridges existence
4229     _CheckInstanceBridgesExist(self, instance)
4230
4231   def Exec(self, feedback_fn):
4232     """Reboot the instance.
4233
4234     """
4235     instance = self.instance
4236     ignore_secondaries = self.op.ignore_secondaries
4237     reboot_type = self.op.reboot_type
4238
4239     node_current = instance.primary_node
4240
4241     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4242                        constants.INSTANCE_REBOOT_HARD]:
4243       for disk in instance.disks:
4244         self.cfg.SetDiskID(disk, node_current)
4245       result = self.rpc.call_instance_reboot(node_current, instance,
4246                                              reboot_type,
4247                                              self.shutdown_timeout)
4248       result.Raise("Could not reboot instance")
4249     else:
4250       result = self.rpc.call_instance_shutdown(node_current, instance,
4251                                                self.shutdown_timeout)
4252       result.Raise("Could not shutdown instance for full reboot")
4253       _ShutdownInstanceDisks(self, instance)
4254       _StartInstanceDisks(self, instance, ignore_secondaries)
4255       result = self.rpc.call_instance_start(node_current, instance, None, None)
4256       msg = result.fail_msg
4257       if msg:
4258         _ShutdownInstanceDisks(self, instance)
4259         raise errors.OpExecError("Could not start instance for"
4260                                  " full reboot: %s" % msg)
4261
4262     self.cfg.MarkInstanceUp(instance.name)
4263
4264
4265 class LUShutdownInstance(LogicalUnit):
4266   """Shutdown an instance.
4267
4268   """
4269   HPATH = "instance-stop"
4270   HTYPE = constants.HTYPE_INSTANCE
4271   _OP_REQP = ["instance_name"]
4272   REQ_BGL = False
4273
4274   def CheckArguments(self):
4275     """Check the arguments.
4276
4277     """
4278     self.timeout = getattr(self.op, "timeout",
4279                            constants.DEFAULT_SHUTDOWN_TIMEOUT)
4280
4281   def ExpandNames(self):
4282     self._ExpandAndLockInstance()
4283
4284   def BuildHooksEnv(self):
4285     """Build hooks env.
4286
4287     This runs on master, primary and secondary nodes of the instance.
4288
4289     """
4290     env = _BuildInstanceHookEnvByObject(self, self.instance)
4291     env["TIMEOUT"] = self.timeout
4292     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4293     return env, nl, nl
4294
4295   def CheckPrereq(self):
4296     """Check prerequisites.
4297
4298     This checks that the instance is in the cluster.
4299
4300     """
4301     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4302     assert self.instance is not None, \
4303       "Cannot retrieve locked instance %s" % self.op.instance_name
4304     _CheckNodeOnline(self, self.instance.primary_node)
4305
4306   def Exec(self, feedback_fn):
4307     """Shutdown the instance.
4308
4309     """
4310     instance = self.instance
4311     node_current = instance.primary_node
4312     timeout = self.timeout
4313     self.cfg.MarkInstanceDown(instance.name)
4314     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4315     msg = result.fail_msg
4316     if msg:
4317       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4318
4319     _ShutdownInstanceDisks(self, instance)
4320
4321
4322 class LUReinstallInstance(LogicalUnit):
4323   """Reinstall an instance.
4324
4325   """
4326   HPATH = "instance-reinstall"
4327   HTYPE = constants.HTYPE_INSTANCE
4328   _OP_REQP = ["instance_name"]
4329   REQ_BGL = False
4330
4331   def ExpandNames(self):
4332     self._ExpandAndLockInstance()
4333
4334   def BuildHooksEnv(self):
4335     """Build hooks env.
4336
4337     This runs on master, primary and secondary nodes of the instance.
4338
4339     """
4340     env = _BuildInstanceHookEnvByObject(self, self.instance)
4341     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4342     return env, nl, nl
4343
4344   def CheckPrereq(self):
4345     """Check prerequisites.
4346
4347     This checks that the instance is in the cluster and is not running.
4348
4349     """
4350     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4351     assert instance is not None, \
4352       "Cannot retrieve locked instance %s" % self.op.instance_name
4353     _CheckNodeOnline(self, instance.primary_node)
4354
4355     if instance.disk_template == constants.DT_DISKLESS:
4356       raise errors.OpPrereqError("Instance '%s' has no disks" %
4357                                  self.op.instance_name,
4358                                  errors.ECODE_INVAL)
4359     _CheckInstanceDown(self, instance, "cannot reinstall")
4360
4361     self.op.os_type = getattr(self.op, "os_type", None)
4362     self.op.force_variant = getattr(self.op, "force_variant", False)
4363     if self.op.os_type is not None:
4364       # OS verification
4365       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4366       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4367
4368     self.instance = instance
4369
4370   def Exec(self, feedback_fn):
4371     """Reinstall the instance.
4372
4373     """
4374     inst = self.instance
4375
4376     if self.op.os_type is not None:
4377       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4378       inst.os = self.op.os_type
4379       self.cfg.Update(inst, feedback_fn)
4380
4381     _StartInstanceDisks(self, inst, None)
4382     try:
4383       feedback_fn("Running the instance OS create scripts...")
4384       # FIXME: pass debug option from opcode to backend
4385       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4386                                              self.op.debug_level)
4387       result.Raise("Could not install OS for instance %s on node %s" %
4388                    (inst.name, inst.primary_node))
4389     finally:
4390       _ShutdownInstanceDisks(self, inst)
4391
4392
4393 class LURecreateInstanceDisks(LogicalUnit):
4394   """Recreate an instance's missing disks.
4395
4396   """
4397   HPATH = "instance-recreate-disks"
4398   HTYPE = constants.HTYPE_INSTANCE
4399   _OP_REQP = ["instance_name", "disks"]
4400   REQ_BGL = False
4401
4402   def CheckArguments(self):
4403     """Check the arguments.
4404
4405     """
4406     if not isinstance(self.op.disks, list):
4407       raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4408     for item in self.op.disks:
4409       if (not isinstance(item, int) or
4410           item < 0):
4411         raise errors.OpPrereqError("Invalid disk specification '%s'" %
4412                                    str(item), errors.ECODE_INVAL)
4413
4414   def ExpandNames(self):
4415     self._ExpandAndLockInstance()
4416
4417   def BuildHooksEnv(self):
4418     """Build hooks env.
4419
4420     This runs on master, primary and secondary nodes of the instance.
4421
4422     """
4423     env = _BuildInstanceHookEnvByObject(self, self.instance)
4424     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4425     return env, nl, nl
4426
4427   def CheckPrereq(self):
4428     """Check prerequisites.
4429
4430     This checks that the instance is in the cluster and is not running.
4431
4432     """
4433     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4434     assert instance is not None, \
4435       "Cannot retrieve locked instance %s" % self.op.instance_name
4436     _CheckNodeOnline(self, instance.primary_node)
4437
4438     if instance.disk_template == constants.DT_DISKLESS:
4439       raise errors.OpPrereqError("Instance '%s' has no disks" %
4440                                  self.op.instance_name, errors.ECODE_INVAL)
4441     _CheckInstanceDown(self, instance, "cannot recreate disks")
4442
4443     if not self.op.disks:
4444       self.op.disks = range(len(instance.disks))
4445     else:
4446       for idx in self.op.disks:
4447         if idx >= len(instance.disks):
4448           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4449                                      errors.ECODE_INVAL)
4450
4451     self.instance = instance
4452
4453   def Exec(self, feedback_fn):
4454     """Recreate the disks.
4455
4456     """
4457     to_skip = []
4458     for idx, _ in enumerate(self.instance.disks):
4459       if idx not in self.op.disks: # disk idx has not been passed in
4460         to_skip.append(idx)
4461         continue
4462
4463     _CreateDisks(self, self.instance, to_skip=to_skip)
4464
4465
4466 class LURenameInstance(LogicalUnit):
4467   """Rename an instance.
4468
4469   """
4470   HPATH = "instance-rename"
4471   HTYPE = constants.HTYPE_INSTANCE
4472   _OP_REQP = ["instance_name", "new_name"]
4473
4474   def BuildHooksEnv(self):
4475     """Build hooks env.
4476
4477     This runs on master, primary and secondary nodes of the instance.
4478
4479     """
4480     env = _BuildInstanceHookEnvByObject(self, self.instance)
4481     env["INSTANCE_NEW_NAME"] = self.op.new_name
4482     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4483     return env, nl, nl
4484
4485   def CheckPrereq(self):
4486     """Check prerequisites.
4487
4488     This checks that the instance is in the cluster and is not running.
4489
4490     """
4491     self.op.instance_name = _ExpandInstanceName(self.cfg,
4492                                                 self.op.instance_name)
4493     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4494     assert instance is not None
4495     _CheckNodeOnline(self, instance.primary_node)
4496     _CheckInstanceDown(self, instance, "cannot rename")
4497     self.instance = instance
4498
4499     # new name verification
4500     name_info = utils.GetHostInfo(self.op.new_name)
4501
4502     self.op.new_name = new_name = name_info.name
4503     instance_list = self.cfg.GetInstanceList()
4504     if new_name in instance_list:
4505       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4506                                  new_name, errors.ECODE_EXISTS)
4507
4508     if not getattr(self.op, "ignore_ip", False):
4509       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4510         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4511                                    (name_info.ip, new_name),
4512                                    errors.ECODE_NOTUNIQUE)
4513
4514
4515   def Exec(self, feedback_fn):
4516     """Reinstall the instance.
4517
4518     """
4519     inst = self.instance
4520     old_name = inst.name
4521
4522     if inst.disk_template == constants.DT_FILE:
4523       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4524
4525     self.cfg.RenameInstance(inst.name, self.op.new_name)
4526     # Change the instance lock. This is definitely safe while we hold the BGL
4527     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4528     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4529
4530     # re-read the instance from the configuration after rename
4531     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4532
4533     if inst.disk_template == constants.DT_FILE:
4534       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4535       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4536                                                      old_file_storage_dir,
4537                                                      new_file_storage_dir)
4538       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4539                    " (but the instance has been renamed in Ganeti)" %
4540                    (inst.primary_node, old_file_storage_dir,
4541                     new_file_storage_dir))
4542
4543     _StartInstanceDisks(self, inst, None)
4544     try:
4545       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4546                                                  old_name, self.op.debug_level)
4547       msg = result.fail_msg
4548       if msg:
4549         msg = ("Could not run OS rename script for instance %s on node %s"
4550                " (but the instance has been renamed in Ganeti): %s" %
4551                (inst.name, inst.primary_node, msg))
4552         self.proc.LogWarning(msg)
4553     finally:
4554       _ShutdownInstanceDisks(self, inst)
4555
4556
4557 class LURemoveInstance(LogicalUnit):
4558   """Remove an instance.
4559
4560   """
4561   HPATH = "instance-remove"
4562   HTYPE = constants.HTYPE_INSTANCE
4563   _OP_REQP = ["instance_name", "ignore_failures"]
4564   REQ_BGL = False
4565
4566   def CheckArguments(self):
4567     """Check the arguments.
4568
4569     """
4570     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4571                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4572
4573   def ExpandNames(self):
4574     self._ExpandAndLockInstance()
4575     self.needed_locks[locking.LEVEL_NODE] = []
4576     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4577
4578   def DeclareLocks(self, level):
4579     if level == locking.LEVEL_NODE:
4580       self._LockInstancesNodes()
4581
4582   def BuildHooksEnv(self):
4583     """Build hooks env.
4584
4585     This runs on master, primary and secondary nodes of the instance.
4586
4587     """
4588     env = _BuildInstanceHookEnvByObject(self, self.instance)
4589     env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4590     nl = [self.cfg.GetMasterNode()]
4591     nl_post = list(self.instance.all_nodes) + nl
4592     return env, nl, nl_post
4593
4594   def CheckPrereq(self):
4595     """Check prerequisites.
4596
4597     This checks that the instance is in the cluster.
4598
4599     """
4600     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4601     assert self.instance is not None, \
4602       "Cannot retrieve locked instance %s" % self.op.instance_name
4603
4604   def Exec(self, feedback_fn):
4605     """Remove the instance.
4606
4607     """
4608     instance = self.instance
4609     logging.info("Shutting down instance %s on node %s",
4610                  instance.name, instance.primary_node)
4611
4612     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4613                                              self.shutdown_timeout)
4614     msg = result.fail_msg
4615     if msg:
4616       if self.op.ignore_failures:
4617         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4618       else:
4619         raise errors.OpExecError("Could not shutdown instance %s on"
4620                                  " node %s: %s" %
4621                                  (instance.name, instance.primary_node, msg))
4622
4623     logging.info("Removing block devices for instance %s", instance.name)
4624
4625     if not _RemoveDisks(self, instance):
4626       if self.op.ignore_failures:
4627         feedback_fn("Warning: can't remove instance's disks")
4628       else:
4629         raise errors.OpExecError("Can't remove instance's disks")
4630
4631     logging.info("Removing instance %s out of cluster config", instance.name)
4632
4633     self.cfg.RemoveInstance(instance.name)
4634     self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4635
4636
4637 class LUQueryInstances(NoHooksLU):
4638   """Logical unit for querying instances.
4639
4640   """
4641   # pylint: disable-msg=W0142
4642   _OP_REQP = ["output_fields", "names", "use_locking"]
4643   REQ_BGL = False
4644   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4645                     "serial_no", "ctime", "mtime", "uuid"]
4646   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4647                                     "admin_state",
4648                                     "disk_template", "ip", "mac", "bridge",
4649                                     "nic_mode", "nic_link",
4650                                     "sda_size", "sdb_size", "vcpus", "tags",
4651                                     "network_port", "beparams",
4652                                     r"(disk)\.(size)/([0-9]+)",
4653                                     r"(disk)\.(sizes)", "disk_usage",
4654                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4655                                     r"(nic)\.(bridge)/([0-9]+)",
4656                                     r"(nic)\.(macs|ips|modes|links|bridges)",
4657                                     r"(disk|nic)\.(count)",
4658                                     "hvparams",
4659                                     ] + _SIMPLE_FIELDS +
4660                                   ["hv/%s" % name
4661                                    for name in constants.HVS_PARAMETERS
4662                                    if name not in constants.HVC_GLOBALS] +
4663                                   ["be/%s" % name
4664                                    for name in constants.BES_PARAMETERS])
4665   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4666
4667
4668   def ExpandNames(self):
4669     _CheckOutputFields(static=self._FIELDS_STATIC,
4670                        dynamic=self._FIELDS_DYNAMIC,
4671                        selected=self.op.output_fields)
4672
4673     self.needed_locks = {}
4674     self.share_locks[locking.LEVEL_INSTANCE] = 1
4675     self.share_locks[locking.LEVEL_NODE] = 1
4676
4677     if self.op.names:
4678       self.wanted = _GetWantedInstances(self, self.op.names)
4679     else:
4680       self.wanted = locking.ALL_SET
4681
4682     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4683     self.do_locking = self.do_node_query and self.op.use_locking
4684     if self.do_locking:
4685       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4686       self.needed_locks[locking.LEVEL_NODE] = []
4687       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4688
4689   def DeclareLocks(self, level):
4690     if level == locking.LEVEL_NODE and self.do_locking:
4691       self._LockInstancesNodes()
4692
4693   def CheckPrereq(self):
4694     """Check prerequisites.
4695
4696     """
4697     pass
4698
4699   def Exec(self, feedback_fn):
4700     """Computes the list of nodes and their attributes.
4701
4702     """
4703     # pylint: disable-msg=R0912
4704     # way too many branches here
4705     all_info = self.cfg.GetAllInstancesInfo()
4706     if self.wanted == locking.ALL_SET:
4707       # caller didn't specify instance names, so ordering is not important
4708       if self.do_locking:
4709         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4710       else:
4711         instance_names = all_info.keys()
4712       instance_names = utils.NiceSort(instance_names)
4713     else:
4714       # caller did specify names, so we must keep the ordering
4715       if self.do_locking:
4716         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4717       else:
4718         tgt_set = all_info.keys()
4719       missing = set(self.wanted).difference(tgt_set)
4720       if missing:
4721         raise errors.OpExecError("Some instances were removed before"
4722                                  " retrieving their data: %s" % missing)
4723       instance_names = self.wanted
4724
4725     instance_list = [all_info[iname] for iname in instance_names]
4726
4727     # begin data gathering
4728
4729     nodes = frozenset([inst.primary_node for inst in instance_list])
4730     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4731
4732     bad_nodes = []
4733     off_nodes = []
4734     if self.do_node_query:
4735       live_data = {}
4736       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4737       for name in nodes:
4738         result = node_data[name]
4739         if result.offline:
4740           # offline nodes will be in both lists
4741           off_nodes.append(name)
4742         if result.fail_msg:
4743           bad_nodes.append(name)
4744         else:
4745           if result.payload:
4746             live_data.update(result.payload)
4747           # else no instance is alive
4748     else:
4749       live_data = dict([(name, {}) for name in instance_names])
4750
4751     # end data gathering
4752
4753     HVPREFIX = "hv/"
4754     BEPREFIX = "be/"
4755     output = []
4756     cluster = self.cfg.GetClusterInfo()
4757     for instance in instance_list:
4758       iout = []
4759       i_hv = cluster.FillHV(instance, skip_globals=True)
4760       i_be = cluster.FillBE(instance)
4761       i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4762                                  nic.nicparams) for nic in instance.nics]
4763       for field in self.op.output_fields:
4764         st_match = self._FIELDS_STATIC.Matches(field)
4765         if field in self._SIMPLE_FIELDS:
4766           val = getattr(instance, field)
4767         elif field == "pnode":
4768           val = instance.primary_node
4769         elif field == "snodes":
4770           val = list(instance.secondary_nodes)
4771         elif field == "admin_state":
4772           val = instance.admin_up
4773         elif field == "oper_state":
4774           if instance.primary_node in bad_nodes:
4775             val = None
4776           else:
4777             val = bool(live_data.get(instance.name))
4778         elif field == "status":
4779           if instance.primary_node in off_nodes:
4780             val = "ERROR_nodeoffline"
4781           elif instance.primary_node in bad_nodes:
4782             val = "ERROR_nodedown"
4783           else:
4784             running = bool(live_data.get(instance.name))
4785             if running:
4786               if instance.admin_up:
4787                 val = "running"
4788               else:
4789                 val = "ERROR_up"
4790             else:
4791               if instance.admin_up:
4792                 val = "ERROR_down"
4793               else:
4794                 val = "ADMIN_down"
4795         elif field == "oper_ram":
4796           if instance.primary_node in bad_nodes:
4797             val = None
4798           elif instance.name in live_data:
4799             val = live_data[instance.name].get("memory", "?")
4800           else:
4801             val = "-"
4802         elif field == "vcpus":
4803           val = i_be[constants.BE_VCPUS]
4804         elif field == "disk_template":
4805           val = instance.disk_template
4806         elif field == "ip":
4807           if instance.nics:
4808             val = instance.nics[0].ip
4809           else:
4810             val = None
4811         elif field == "nic_mode":
4812           if instance.nics:
4813             val = i_nicp[0][constants.NIC_MODE]
4814           else:
4815             val = None
4816         elif field == "nic_link":
4817           if instance.nics:
4818             val = i_nicp[0][constants.NIC_LINK]
4819           else:
4820             val = None
4821         elif field == "bridge":
4822           if (instance.nics and
4823               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4824             val = i_nicp[0][constants.NIC_LINK]
4825           else:
4826             val = None
4827         elif field == "mac":
4828           if instance.nics:
4829             val = instance.nics[0].mac
4830           else:
4831             val = None
4832         elif field == "sda_size" or field == "sdb_size":
4833           idx = ord(field[2]) - ord('a')
4834           try:
4835             val = instance.FindDisk(idx).size
4836           except errors.OpPrereqError:
4837             val = None
4838         elif field == "disk_usage": # total disk usage per node
4839           disk_sizes = [{'size': disk.size} for disk in instance.disks]
4840           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4841         elif field == "tags":
4842           val = list(instance.GetTags())
4843         elif field == "hvparams":
4844           val = i_hv
4845         elif (field.startswith(HVPREFIX) and
4846               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4847               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4848           val = i_hv.get(field[len(HVPREFIX):], None)
4849         elif field == "beparams":
4850           val = i_be
4851         elif (field.startswith(BEPREFIX) and
4852               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4853           val = i_be.get(field[len(BEPREFIX):], None)
4854         elif st_match and st_match.groups():
4855           # matches a variable list
4856           st_groups = st_match.groups()
4857           if st_groups and st_groups[0] == "disk":
4858             if st_groups[1] == "count":
4859               val = len(instance.disks)
4860             elif st_groups[1] == "sizes":
4861               val = [disk.size for disk in instance.disks]
4862             elif st_groups[1] == "size":
4863               try:
4864                 val = instance.FindDisk(st_groups[2]).size
4865               except errors.OpPrereqError:
4866                 val = None
4867             else:
4868               assert False, "Unhandled disk parameter"
4869           elif st_groups[0] == "nic":
4870             if st_groups[1] == "count":
4871               val = len(instance.nics)
4872             elif st_groups[1] == "macs":
4873               val = [nic.mac for nic in instance.nics]
4874             elif st_groups[1] == "ips":
4875               val = [nic.ip for nic in instance.nics]
4876             elif st_groups[1] == "modes":
4877               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4878             elif st_groups[1] == "links":
4879               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4880             elif st_groups[1] == "bridges":
4881               val = []
4882               for nicp in i_nicp:
4883                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4884                   val.append(nicp[constants.NIC_LINK])
4885                 else:
4886                   val.append(None)
4887             else:
4888               # index-based item
4889               nic_idx = int(st_groups[2])
4890               if nic_idx >= len(instance.nics):
4891                 val = None
4892               else:
4893                 if st_groups[1] == "mac":
4894                   val = instance.nics[nic_idx].mac
4895                 elif st_groups[1] == "ip":
4896                   val = instance.nics[nic_idx].ip
4897                 elif st_groups[1] == "mode":
4898                   val = i_nicp[nic_idx][constants.NIC_MODE]
4899                 elif st_groups[1] == "link":
4900                   val = i_nicp[nic_idx][constants.NIC_LINK]
4901                 elif st_groups[1] == "bridge":
4902                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4903                   if nic_mode == constants.NIC_MODE_BRIDGED:
4904                     val = i_nicp[nic_idx][constants.NIC_LINK]
4905                   else:
4906                     val = None
4907                 else:
4908                   assert False, "Unhandled NIC parameter"
4909           else:
4910             assert False, ("Declared but unhandled variable parameter '%s'" %
4911                            field)
4912         else:
4913           assert False, "Declared but unhandled parameter '%s'" % field
4914         iout.append(val)
4915       output.append(iout)
4916
4917     return output
4918
4919
4920 class LUFailoverInstance(LogicalUnit):
4921   """Failover an instance.
4922
4923   """
4924   HPATH = "instance-failover"
4925   HTYPE = constants.HTYPE_INSTANCE
4926   _OP_REQP = ["instance_name", "ignore_consistency"]
4927   REQ_BGL = False
4928
4929   def CheckArguments(self):
4930     """Check the arguments.
4931
4932     """
4933     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4934                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4935
4936   def ExpandNames(self):
4937     self._ExpandAndLockInstance()
4938     self.needed_locks[locking.LEVEL_NODE] = []
4939     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4940
4941   def DeclareLocks(self, level):
4942     if level == locking.LEVEL_NODE:
4943       self._LockInstancesNodes()
4944
4945   def BuildHooksEnv(self):
4946     """Build hooks env.
4947
4948     This runs on master, primary and secondary nodes of the instance.
4949
4950     """
4951     instance = self.instance
4952     source_node = instance.primary_node
4953     target_node = instance.secondary_nodes[0]
4954     env = {
4955       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4956       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4957       "OLD_PRIMARY": source_node,
4958       "OLD_SECONDARY": target_node,
4959       "NEW_PRIMARY": target_node,
4960       "NEW_SECONDARY": source_node,
4961       }
4962     env.update(_BuildInstanceHookEnvByObject(self, instance))
4963     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4964     nl_post = list(nl)
4965     nl_post.append(source_node)
4966     return env, nl, nl_post
4967
4968   def CheckPrereq(self):
4969     """Check prerequisites.
4970
4971     This checks that the instance is in the cluster.
4972
4973     """
4974     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4975     assert self.instance is not None, \
4976       "Cannot retrieve locked instance %s" % self.op.instance_name
4977
4978     bep = self.cfg.GetClusterInfo().FillBE(instance)
4979     if instance.disk_template not in constants.DTS_NET_MIRROR:
4980       raise errors.OpPrereqError("Instance's disk layout is not"
4981                                  " network mirrored, cannot failover.",
4982                                  errors.ECODE_STATE)
4983
4984     secondary_nodes = instance.secondary_nodes
4985     if not secondary_nodes:
4986       raise errors.ProgrammerError("no secondary node but using "
4987                                    "a mirrored disk template")
4988
4989     target_node = secondary_nodes[0]
4990     _CheckNodeOnline(self, target_node)
4991     _CheckNodeNotDrained(self, target_node)
4992     if instance.admin_up:
4993       # check memory requirements on the secondary node
4994       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4995                            instance.name, bep[constants.BE_MEMORY],
4996                            instance.hypervisor)
4997     else:
4998       self.LogInfo("Not checking memory on the secondary node as"
4999                    " instance will not be started")
5000
5001     # check bridge existance
5002     _CheckInstanceBridgesExist(self, instance, node=target_node)
5003
5004   def Exec(self, feedback_fn):
5005     """Failover an instance.
5006
5007     The failover is done by shutting it down on its present node and
5008     starting it on the secondary.
5009
5010     """
5011     instance = self.instance
5012
5013     source_node = instance.primary_node
5014     target_node = instance.secondary_nodes[0]
5015
5016     if instance.admin_up:
5017       feedback_fn("* checking disk consistency between source and target")
5018       for dev in instance.disks:
5019         # for drbd, these are drbd over lvm
5020         if not _CheckDiskConsistency(self, dev, target_node, False):
5021           if not self.op.ignore_consistency:
5022             raise errors.OpExecError("Disk %s is degraded on target node,"
5023                                      " aborting failover." % dev.iv_name)
5024     else:
5025       feedback_fn("* not checking disk consistency as instance is not running")
5026
5027     feedback_fn("* shutting down instance on source node")
5028     logging.info("Shutting down instance %s on node %s",
5029                  instance.name, source_node)
5030
5031     result = self.rpc.call_instance_shutdown(source_node, instance,
5032                                              self.shutdown_timeout)
5033     msg = result.fail_msg
5034     if msg:
5035       if self.op.ignore_consistency:
5036         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5037                              " Proceeding anyway. Please make sure node"
5038                              " %s is down. Error details: %s",
5039                              instance.name, source_node, source_node, msg)
5040       else:
5041         raise errors.OpExecError("Could not shutdown instance %s on"
5042                                  " node %s: %s" %
5043                                  (instance.name, source_node, msg))
5044
5045     feedback_fn("* deactivating the instance's disks on source node")
5046     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5047       raise errors.OpExecError("Can't shut down the instance's disks.")
5048
5049     instance.primary_node = target_node
5050     # distribute new instance config to the other nodes
5051     self.cfg.Update(instance, feedback_fn)
5052
5053     # Only start the instance if it's marked as up
5054     if instance.admin_up:
5055       feedback_fn("* activating the instance's disks on target node")
5056       logging.info("Starting instance %s on node %s",
5057                    instance.name, target_node)
5058
5059       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5060                                                ignore_secondaries=True)
5061       if not disks_ok:
5062         _ShutdownInstanceDisks(self, instance)
5063         raise errors.OpExecError("Can't activate the instance's disks")
5064
5065       feedback_fn("* starting the instance on the target node")
5066       result = self.rpc.call_instance_start(target_node, instance, None, None)
5067       msg = result.fail_msg
5068       if msg:
5069         _ShutdownInstanceDisks(self, instance)
5070         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5071                                  (instance.name, target_node, msg))
5072
5073
5074 class LUMigrateInstance(LogicalUnit):
5075   """Migrate an instance.
5076
5077   This is migration without shutting down, compared to the failover,
5078   which is done with shutdown.
5079
5080   """
5081   HPATH = "instance-migrate"
5082   HTYPE = constants.HTYPE_INSTANCE
5083   _OP_REQP = ["instance_name", "live", "cleanup"]
5084
5085   REQ_BGL = False
5086
5087   def ExpandNames(self):
5088     self._ExpandAndLockInstance()
5089
5090     self.needed_locks[locking.LEVEL_NODE] = []
5091     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5092
5093     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5094                                        self.op.live, self.op.cleanup)
5095     self.tasklets = [self._migrater]
5096
5097   def DeclareLocks(self, level):
5098     if level == locking.LEVEL_NODE:
5099       self._LockInstancesNodes()
5100
5101   def BuildHooksEnv(self):
5102     """Build hooks env.
5103
5104     This runs on master, primary and secondary nodes of the instance.
5105
5106     """
5107     instance = self._migrater.instance
5108     source_node = instance.primary_node
5109     target_node = instance.secondary_nodes[0]
5110     env = _BuildInstanceHookEnvByObject(self, instance)
5111     env["MIGRATE_LIVE"] = self.op.live
5112     env["MIGRATE_CLEANUP"] = self.op.cleanup
5113     env.update({
5114         "OLD_PRIMARY": source_node,
5115         "OLD_SECONDARY": target_node,
5116         "NEW_PRIMARY": target_node,
5117         "NEW_SECONDARY": source_node,
5118         })
5119     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5120     nl_post = list(nl)
5121     nl_post.append(source_node)
5122     return env, nl, nl_post
5123
5124
5125 class LUMoveInstance(LogicalUnit):
5126   """Move an instance by data-copying.
5127
5128   """
5129   HPATH = "instance-move"
5130   HTYPE = constants.HTYPE_INSTANCE
5131   _OP_REQP = ["instance_name", "target_node"]
5132   REQ_BGL = False
5133
5134   def CheckArguments(self):
5135     """Check the arguments.
5136
5137     """
5138     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5139                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
5140
5141   def ExpandNames(self):
5142     self._ExpandAndLockInstance()
5143     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5144     self.op.target_node = target_node
5145     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5146     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5147
5148   def DeclareLocks(self, level):
5149     if level == locking.LEVEL_NODE:
5150       self._LockInstancesNodes(primary_only=True)
5151
5152   def BuildHooksEnv(self):
5153     """Build hooks env.
5154
5155     This runs on master, primary and secondary nodes of the instance.
5156
5157     """
5158     env = {
5159       "TARGET_NODE": self.op.target_node,
5160       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5161       }
5162     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5163     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5164                                        self.op.target_node]
5165     return env, nl, nl
5166
5167   def CheckPrereq(self):
5168     """Check prerequisites.
5169
5170     This checks that the instance is in the cluster.
5171
5172     """
5173     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5174     assert self.instance is not None, \
5175       "Cannot retrieve locked instance %s" % self.op.instance_name
5176
5177     node = self.cfg.GetNodeInfo(self.op.target_node)
5178     assert node is not None, \
5179       "Cannot retrieve locked node %s" % self.op.target_node
5180
5181     self.target_node = target_node = node.name
5182
5183     if target_node == instance.primary_node:
5184       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5185                                  (instance.name, target_node),
5186                                  errors.ECODE_STATE)
5187
5188     bep = self.cfg.GetClusterInfo().FillBE(instance)
5189
5190     for idx, dsk in enumerate(instance.disks):
5191       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5192         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5193                                    " cannot copy" % idx, errors.ECODE_STATE)
5194
5195     _CheckNodeOnline(self, target_node)
5196     _CheckNodeNotDrained(self, target_node)
5197
5198     if instance.admin_up:
5199       # check memory requirements on the secondary node
5200       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5201                            instance.name, bep[constants.BE_MEMORY],
5202                            instance.hypervisor)
5203     else:
5204       self.LogInfo("Not checking memory on the secondary node as"
5205                    " instance will not be started")
5206
5207     # check bridge existance
5208     _CheckInstanceBridgesExist(self, instance, node=target_node)
5209
5210   def Exec(self, feedback_fn):
5211     """Move an instance.
5212
5213     The move is done by shutting it down on its present node, copying
5214     the data over (slow) and starting it on the new node.
5215
5216     """
5217     instance = self.instance
5218
5219     source_node = instance.primary_node
5220     target_node = self.target_node
5221
5222     self.LogInfo("Shutting down instance %s on source node %s",
5223                  instance.name, source_node)
5224
5225     result = self.rpc.call_instance_shutdown(source_node, instance,
5226                                              self.shutdown_timeout)
5227     msg = result.fail_msg
5228     if msg:
5229       if self.op.ignore_consistency:
5230         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5231                              " Proceeding anyway. Please make sure node"
5232                              " %s is down. Error details: %s",
5233                              instance.name, source_node, source_node, msg)
5234       else:
5235         raise errors.OpExecError("Could not shutdown instance %s on"
5236                                  " node %s: %s" %
5237                                  (instance.name, source_node, msg))
5238
5239     # create the target disks
5240     try:
5241       _CreateDisks(self, instance, target_node=target_node)
5242     except errors.OpExecError:
5243       self.LogWarning("Device creation failed, reverting...")
5244       try:
5245         _RemoveDisks(self, instance, target_node=target_node)
5246       finally:
5247         self.cfg.ReleaseDRBDMinors(instance.name)
5248         raise
5249
5250     cluster_name = self.cfg.GetClusterInfo().cluster_name
5251
5252     errs = []
5253     # activate, get path, copy the data over
5254     for idx, disk in enumerate(instance.disks):
5255       self.LogInfo("Copying data for disk %d", idx)
5256       result = self.rpc.call_blockdev_assemble(target_node, disk,
5257                                                instance.name, True)
5258       if result.fail_msg:
5259         self.LogWarning("Can't assemble newly created disk %d: %s",
5260                         idx, result.fail_msg)
5261         errs.append(result.fail_msg)
5262         break
5263       dev_path = result.payload
5264       result = self.rpc.call_blockdev_export(source_node, disk,
5265                                              target_node, dev_path,
5266                                              cluster_name)
5267       if result.fail_msg:
5268         self.LogWarning("Can't copy data over for disk %d: %s",
5269                         idx, result.fail_msg)
5270         errs.append(result.fail_msg)
5271         break
5272
5273     if errs:
5274       self.LogWarning("Some disks failed to copy, aborting")
5275       try:
5276         _RemoveDisks(self, instance, target_node=target_node)
5277       finally:
5278         self.cfg.ReleaseDRBDMinors(instance.name)
5279         raise errors.OpExecError("Errors during disk copy: %s" %
5280                                  (",".join(errs),))
5281
5282     instance.primary_node = target_node
5283     self.cfg.Update(instance, feedback_fn)
5284
5285     self.LogInfo("Removing the disks on the original node")
5286     _RemoveDisks(self, instance, target_node=source_node)
5287
5288     # Only start the instance if it's marked as up
5289     if instance.admin_up:
5290       self.LogInfo("Starting instance %s on node %s",
5291                    instance.name, target_node)
5292
5293       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5294                                            ignore_secondaries=True)
5295       if not disks_ok:
5296         _ShutdownInstanceDisks(self, instance)
5297         raise errors.OpExecError("Can't activate the instance's disks")
5298
5299       result = self.rpc.call_instance_start(target_node, instance, None, None)
5300       msg = result.fail_msg
5301       if msg:
5302         _ShutdownInstanceDisks(self, instance)
5303         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5304                                  (instance.name, target_node, msg))
5305
5306
5307 class LUMigrateNode(LogicalUnit):
5308   """Migrate all instances from a node.
5309
5310   """
5311   HPATH = "node-migrate"
5312   HTYPE = constants.HTYPE_NODE
5313   _OP_REQP = ["node_name", "live"]
5314   REQ_BGL = False
5315
5316   def ExpandNames(self):
5317     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5318
5319     self.needed_locks = {
5320       locking.LEVEL_NODE: [self.op.node_name],
5321       }
5322
5323     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5324
5325     # Create tasklets for migrating instances for all instances on this node
5326     names = []
5327     tasklets = []
5328
5329     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5330       logging.debug("Migrating instance %s", inst.name)
5331       names.append(inst.name)
5332
5333       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5334
5335     self.tasklets = tasklets
5336
5337     # Declare instance locks
5338     self.needed_locks[locking.LEVEL_INSTANCE] = names
5339
5340   def DeclareLocks(self, level):
5341     if level == locking.LEVEL_NODE:
5342       self._LockInstancesNodes()
5343
5344   def BuildHooksEnv(self):
5345     """Build hooks env.
5346
5347     This runs on the master, the primary and all the secondaries.
5348
5349     """
5350     env = {
5351       "NODE_NAME": self.op.node_name,
5352       }
5353
5354     nl = [self.cfg.GetMasterNode()]
5355
5356     return (env, nl, nl)
5357
5358
5359 class TLMigrateInstance(Tasklet):
5360   def __init__(self, lu, instance_name, live, cleanup):
5361     """Initializes this class.
5362
5363     """
5364     Tasklet.__init__(self, lu)
5365
5366     # Parameters
5367     self.instance_name = instance_name
5368     self.live = live
5369     self.cleanup = cleanup
5370
5371   def CheckPrereq(self):
5372     """Check prerequisites.
5373
5374     This checks that the instance is in the cluster.
5375
5376     """
5377     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5378     instance = self.cfg.GetInstanceInfo(instance_name)
5379     assert instance is not None
5380
5381     if instance.disk_template != constants.DT_DRBD8:
5382       raise errors.OpPrereqError("Instance's disk layout is not"
5383                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5384
5385     secondary_nodes = instance.secondary_nodes
5386     if not secondary_nodes:
5387       raise errors.ConfigurationError("No secondary node but using"
5388                                       " drbd8 disk template")
5389
5390     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5391
5392     target_node = secondary_nodes[0]
5393     # check memory requirements on the secondary node
5394     _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5395                          instance.name, i_be[constants.BE_MEMORY],
5396                          instance.hypervisor)
5397
5398     # check bridge existance
5399     _CheckInstanceBridgesExist(self, instance, node=target_node)
5400
5401     if not self.cleanup:
5402       _CheckNodeNotDrained(self, target_node)
5403       result = self.rpc.call_instance_migratable(instance.primary_node,
5404                                                  instance)
5405       result.Raise("Can't migrate, please use failover",
5406                    prereq=True, ecode=errors.ECODE_STATE)
5407
5408     self.instance = instance
5409
5410   def _WaitUntilSync(self):
5411     """Poll with custom rpc for disk sync.
5412
5413     This uses our own step-based rpc call.
5414
5415     """
5416     self.feedback_fn("* wait until resync is done")
5417     all_done = False
5418     while not all_done:
5419       all_done = True
5420       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5421                                             self.nodes_ip,
5422                                             self.instance.disks)
5423       min_percent = 100
5424       for node, nres in result.items():
5425         nres.Raise("Cannot resync disks on node %s" % node)
5426         node_done, node_percent = nres.payload
5427         all_done = all_done and node_done
5428         if node_percent is not None:
5429           min_percent = min(min_percent, node_percent)
5430       if not all_done:
5431         if min_percent < 100:
5432           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5433         time.sleep(2)
5434
5435   def _EnsureSecondary(self, node):
5436     """Demote a node to secondary.
5437
5438     """
5439     self.feedback_fn("* switching node %s to secondary mode" % node)
5440
5441     for dev in self.instance.disks:
5442       self.cfg.SetDiskID(dev, node)
5443
5444     result = self.rpc.call_blockdev_close(node, self.instance.name,
5445                                           self.instance.disks)
5446     result.Raise("Cannot change disk to secondary on node %s" % node)
5447
5448   def _GoStandalone(self):
5449     """Disconnect from the network.
5450
5451     """
5452     self.feedback_fn("* changing into standalone mode")
5453     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5454                                                self.instance.disks)
5455     for node, nres in result.items():
5456       nres.Raise("Cannot disconnect disks node %s" % node)
5457
5458   def _GoReconnect(self, multimaster):
5459     """Reconnect to the network.
5460
5461     """
5462     if multimaster:
5463       msg = "dual-master"
5464     else:
5465       msg = "single-master"
5466     self.feedback_fn("* changing disks into %s mode" % msg)
5467     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5468                                            self.instance.disks,
5469                                            self.instance.name, multimaster)
5470     for node, nres in result.items():
5471       nres.Raise("Cannot change disks config on node %s" % node)
5472
5473   def _ExecCleanup(self):
5474     """Try to cleanup after a failed migration.
5475
5476     The cleanup is done by:
5477       - check that the instance is running only on one node
5478         (and update the config if needed)
5479       - change disks on its secondary node to secondary
5480       - wait until disks are fully synchronized
5481       - disconnect from the network
5482       - change disks into single-master mode
5483       - wait again until disks are fully synchronized
5484
5485     """
5486     instance = self.instance
5487     target_node = self.target_node
5488     source_node = self.source_node
5489
5490     # check running on only one node
5491     self.feedback_fn("* checking where the instance actually runs"
5492                      " (if this hangs, the hypervisor might be in"
5493                      " a bad state)")
5494     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5495     for node, result in ins_l.items():
5496       result.Raise("Can't contact node %s" % node)
5497
5498     runningon_source = instance.name in ins_l[source_node].payload
5499     runningon_target = instance.name in ins_l[target_node].payload
5500
5501     if runningon_source and runningon_target:
5502       raise errors.OpExecError("Instance seems to be running on two nodes,"
5503                                " or the hypervisor is confused. You will have"
5504                                " to ensure manually that it runs only on one"
5505                                " and restart this operation.")
5506
5507     if not (runningon_source or runningon_target):
5508       raise errors.OpExecError("Instance does not seem to be running at all."
5509                                " In this case, it's safer to repair by"
5510                                " running 'gnt-instance stop' to ensure disk"
5511                                " shutdown, and then restarting it.")
5512
5513     if runningon_target:
5514       # the migration has actually succeeded, we need to update the config
5515       self.feedback_fn("* instance running on secondary node (%s),"
5516                        " updating config" % target_node)
5517       instance.primary_node = target_node
5518       self.cfg.Update(instance, self.feedback_fn)
5519       demoted_node = source_node
5520     else:
5521       self.feedback_fn("* instance confirmed to be running on its"
5522                        " primary node (%s)" % source_node)
5523       demoted_node = target_node
5524
5525     self._EnsureSecondary(demoted_node)
5526     try:
5527       self._WaitUntilSync()
5528     except errors.OpExecError:
5529       # we ignore here errors, since if the device is standalone, it
5530       # won't be able to sync
5531       pass
5532     self._GoStandalone()
5533     self._GoReconnect(False)
5534     self._WaitUntilSync()
5535
5536     self.feedback_fn("* done")
5537
5538   def _RevertDiskStatus(self):
5539     """Try to revert the disk status after a failed migration.
5540
5541     """
5542     target_node = self.target_node
5543     try:
5544       self._EnsureSecondary(target_node)
5545       self._GoStandalone()
5546       self._GoReconnect(False)
5547       self._WaitUntilSync()
5548     except errors.OpExecError, err:
5549       self.lu.LogWarning("Migration failed and I can't reconnect the"
5550                          " drives: error '%s'\n"
5551                          "Please look and recover the instance status" %
5552                          str(err))
5553
5554   def _AbortMigration(self):
5555     """Call the hypervisor code to abort a started migration.
5556
5557     """
5558     instance = self.instance
5559     target_node = self.target_node
5560     migration_info = self.migration_info
5561
5562     abort_result = self.rpc.call_finalize_migration(target_node,
5563                                                     instance,
5564                                                     migration_info,
5565                                                     False)
5566     abort_msg = abort_result.fail_msg
5567     if abort_msg:
5568       logging.error("Aborting migration failed on target node %s: %s",
5569                     target_node, abort_msg)
5570       # Don't raise an exception here, as we stil have to try to revert the
5571       # disk status, even if this step failed.
5572
5573   def _ExecMigration(self):
5574     """Migrate an instance.
5575
5576     The migrate is done by:
5577       - change the disks into dual-master mode
5578       - wait until disks are fully synchronized again
5579       - migrate the instance
5580       - change disks on the new secondary node (the old primary) to secondary
5581       - wait until disks are fully synchronized
5582       - change disks into single-master mode
5583
5584     """
5585     instance = self.instance
5586     target_node = self.target_node
5587     source_node = self.source_node
5588
5589     self.feedback_fn("* checking disk consistency between source and target")
5590     for dev in instance.disks:
5591       if not _CheckDiskConsistency(self, dev, target_node, False):
5592         raise errors.OpExecError("Disk %s is degraded or not fully"
5593                                  " synchronized on target node,"
5594                                  " aborting migrate." % dev.iv_name)
5595
5596     # First get the migration information from the remote node
5597     result = self.rpc.call_migration_info(source_node, instance)
5598     msg = result.fail_msg
5599     if msg:
5600       log_err = ("Failed fetching source migration information from %s: %s" %
5601                  (source_node, msg))
5602       logging.error(log_err)
5603       raise errors.OpExecError(log_err)
5604
5605     self.migration_info = migration_info = result.payload
5606
5607     # Then switch the disks to master/master mode
5608     self._EnsureSecondary(target_node)
5609     self._GoStandalone()
5610     self._GoReconnect(True)
5611     self._WaitUntilSync()
5612
5613     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5614     result = self.rpc.call_accept_instance(target_node,
5615                                            instance,
5616                                            migration_info,
5617                                            self.nodes_ip[target_node])
5618
5619     msg = result.fail_msg
5620     if msg:
5621       logging.error("Instance pre-migration failed, trying to revert"
5622                     " disk status: %s", msg)
5623       self.feedback_fn("Pre-migration failed, aborting")
5624       self._AbortMigration()
5625       self._RevertDiskStatus()
5626       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5627                                (instance.name, msg))
5628
5629     self.feedback_fn("* migrating instance to %s" % target_node)
5630     time.sleep(10)
5631     result = self.rpc.call_instance_migrate(source_node, instance,
5632                                             self.nodes_ip[target_node],
5633                                             self.live)
5634     msg = result.fail_msg
5635     if msg:
5636       logging.error("Instance migration failed, trying to revert"
5637                     " disk status: %s", msg)
5638       self.feedback_fn("Migration failed, aborting")
5639       self._AbortMigration()
5640       self._RevertDiskStatus()
5641       raise errors.OpExecError("Could not migrate instance %s: %s" %
5642                                (instance.name, msg))
5643     time.sleep(10)
5644
5645     instance.primary_node = target_node
5646     # distribute new instance config to the other nodes
5647     self.cfg.Update(instance, self.feedback_fn)
5648
5649     result = self.rpc.call_finalize_migration(target_node,
5650                                               instance,
5651                                               migration_info,
5652                                               True)
5653     msg = result.fail_msg
5654     if msg:
5655       logging.error("Instance migration succeeded, but finalization failed:"
5656                     " %s", msg)
5657       raise errors.OpExecError("Could not finalize instance migration: %s" %
5658                                msg)
5659
5660     self._EnsureSecondary(source_node)
5661     self._WaitUntilSync()
5662     self._GoStandalone()
5663     self._GoReconnect(False)
5664     self._WaitUntilSync()
5665
5666     self.feedback_fn("* done")
5667
5668   def Exec(self, feedback_fn):
5669     """Perform the migration.
5670
5671     """
5672     feedback_fn("Migrating instance %s" % self.instance.name)
5673
5674     self.feedback_fn = feedback_fn
5675
5676     self.source_node = self.instance.primary_node
5677     self.target_node = self.instance.secondary_nodes[0]
5678     self.all_nodes = [self.source_node, self.target_node]
5679     self.nodes_ip = {
5680       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5681       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5682       }
5683
5684     if self.cleanup:
5685       return self._ExecCleanup()
5686     else:
5687       return self._ExecMigration()
5688
5689
5690 def _CreateBlockDev(lu, node, instance, device, force_create,
5691                     info, force_open):
5692   """Create a tree of block devices on a given node.
5693
5694   If this device type has to be created on secondaries, create it and
5695   all its children.
5696
5697   If not, just recurse to children keeping the same 'force' value.
5698
5699   @param lu: the lu on whose behalf we execute
5700   @param node: the node on which to create the device
5701   @type instance: L{objects.Instance}
5702   @param instance: the instance which owns the device
5703   @type device: L{objects.Disk}
5704   @param device: the device to create
5705   @type force_create: boolean
5706   @param force_create: whether to force creation of this device; this
5707       will be change to True whenever we find a device which has
5708       CreateOnSecondary() attribute
5709   @param info: the extra 'metadata' we should attach to the device
5710       (this will be represented as a LVM tag)
5711   @type force_open: boolean
5712   @param force_open: this parameter will be passes to the
5713       L{backend.BlockdevCreate} function where it specifies
5714       whether we run on primary or not, and it affects both
5715       the child assembly and the device own Open() execution
5716
5717   """
5718   if device.CreateOnSecondary():
5719     force_create = True
5720
5721   if device.children:
5722     for child in device.children:
5723       _CreateBlockDev(lu, node, instance, child, force_create,
5724                       info, force_open)
5725
5726   if not force_create:
5727     return
5728
5729   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5730
5731
5732 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5733   """Create a single block device on a given node.
5734
5735   This will not recurse over children of the device, so they must be
5736   created in advance.
5737
5738   @param lu: the lu on whose behalf we execute
5739   @param node: the node on which to create the device
5740   @type instance: L{objects.Instance}
5741   @param instance: the instance which owns the device
5742   @type device: L{objects.Disk}
5743   @param device: the device to create
5744   @param info: the extra 'metadata' we should attach to the device
5745       (this will be represented as a LVM tag)
5746   @type force_open: boolean
5747   @param force_open: this parameter will be passes to the
5748       L{backend.BlockdevCreate} function where it specifies
5749       whether we run on primary or not, and it affects both
5750       the child assembly and the device own Open() execution
5751
5752   """
5753   lu.cfg.SetDiskID(device, node)
5754   result = lu.rpc.call_blockdev_create(node, device, device.size,
5755                                        instance.name, force_open, info)
5756   result.Raise("Can't create block device %s on"
5757                " node %s for instance %s" % (device, node, instance.name))
5758   if device.physical_id is None:
5759     device.physical_id = result.payload
5760
5761
5762 def _GenerateUniqueNames(lu, exts):
5763   """Generate a suitable LV name.
5764
5765   This will generate a logical volume name for the given instance.
5766
5767   """
5768   results = []
5769   for val in exts:
5770     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5771     results.append("%s%s" % (new_id, val))
5772   return results
5773
5774
5775 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5776                          p_minor, s_minor):
5777   """Generate a drbd8 device complete with its children.
5778
5779   """
5780   port = lu.cfg.AllocatePort()
5781   vgname = lu.cfg.GetVGName()
5782   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5783   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5784                           logical_id=(vgname, names[0]))
5785   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5786                           logical_id=(vgname, names[1]))
5787   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5788                           logical_id=(primary, secondary, port,
5789                                       p_minor, s_minor,
5790                                       shared_secret),
5791                           children=[dev_data, dev_meta],
5792                           iv_name=iv_name)
5793   return drbd_dev
5794
5795
5796 def _GenerateDiskTemplate(lu, template_name,
5797                           instance_name, primary_node,
5798                           secondary_nodes, disk_info,
5799                           file_storage_dir, file_driver,
5800                           base_index):
5801   """Generate the entire disk layout for a given template type.
5802
5803   """
5804   #TODO: compute space requirements
5805
5806   vgname = lu.cfg.GetVGName()
5807   disk_count = len(disk_info)
5808   disks = []
5809   if template_name == constants.DT_DISKLESS:
5810     pass
5811   elif template_name == constants.DT_PLAIN:
5812     if len(secondary_nodes) != 0:
5813       raise errors.ProgrammerError("Wrong template configuration")
5814
5815     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5816                                       for i in range(disk_count)])
5817     for idx, disk in enumerate(disk_info):
5818       disk_index = idx + base_index
5819       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5820                               logical_id=(vgname, names[idx]),
5821                               iv_name="disk/%d" % disk_index,
5822                               mode=disk["mode"])
5823       disks.append(disk_dev)
5824   elif template_name == constants.DT_DRBD8:
5825     if len(secondary_nodes) != 1:
5826       raise errors.ProgrammerError("Wrong template configuration")
5827     remote_node = secondary_nodes[0]
5828     minors = lu.cfg.AllocateDRBDMinor(
5829       [primary_node, remote_node] * len(disk_info), instance_name)
5830
5831     names = []
5832     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5833                                                for i in range(disk_count)]):
5834       names.append(lv_prefix + "_data")
5835       names.append(lv_prefix + "_meta")
5836     for idx, disk in enumerate(disk_info):
5837       disk_index = idx + base_index
5838       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5839                                       disk["size"], names[idx*2:idx*2+2],
5840                                       "disk/%d" % disk_index,
5841                                       minors[idx*2], minors[idx*2+1])
5842       disk_dev.mode = disk["mode"]
5843       disks.append(disk_dev)
5844   elif template_name == constants.DT_FILE:
5845     if len(secondary_nodes) != 0:
5846       raise errors.ProgrammerError("Wrong template configuration")
5847
5848     _RequireFileStorage()
5849
5850     for idx, disk in enumerate(disk_info):
5851       disk_index = idx + base_index
5852       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5853                               iv_name="disk/%d" % disk_index,
5854                               logical_id=(file_driver,
5855                                           "%s/disk%d" % (file_storage_dir,
5856                                                          disk_index)),
5857                               mode=disk["mode"])
5858       disks.append(disk_dev)
5859   else:
5860     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5861   return disks
5862
5863
5864 def _GetInstanceInfoText(instance):
5865   """Compute that text that should be added to the disk's metadata.
5866
5867   """
5868   return "originstname+%s" % instance.name
5869
5870
5871 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5872   """Create all disks for an instance.
5873
5874   This abstracts away some work from AddInstance.
5875
5876   @type lu: L{LogicalUnit}
5877   @param lu: the logical unit on whose behalf we execute
5878   @type instance: L{objects.Instance}
5879   @param instance: the instance whose disks we should create
5880   @type to_skip: list
5881   @param to_skip: list of indices to skip
5882   @type target_node: string
5883   @param target_node: if passed, overrides the target node for creation
5884   @rtype: boolean
5885   @return: the success of the creation
5886
5887   """
5888   info = _GetInstanceInfoText(instance)
5889   if target_node is None:
5890     pnode = instance.primary_node
5891     all_nodes = instance.all_nodes
5892   else:
5893     pnode = target_node
5894     all_nodes = [pnode]
5895
5896   if instance.disk_template == constants.DT_FILE:
5897     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5898     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5899
5900     result.Raise("Failed to create directory '%s' on"
5901                  " node %s" % (file_storage_dir, pnode))
5902
5903   # Note: this needs to be kept in sync with adding of disks in
5904   # LUSetInstanceParams
5905   for idx, device in enumerate(instance.disks):
5906     if to_skip and idx in to_skip:
5907       continue
5908     logging.info("Creating volume %s for instance %s",
5909                  device.iv_name, instance.name)
5910     #HARDCODE
5911     for node in all_nodes:
5912       f_create = node == pnode
5913       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5914
5915
5916 def _RemoveDisks(lu, instance, target_node=None):
5917   """Remove all disks for an instance.
5918
5919   This abstracts away some work from `AddInstance()` and
5920   `RemoveInstance()`. Note that in case some of the devices couldn't
5921   be removed, the removal will continue with the other ones (compare
5922   with `_CreateDisks()`).
5923
5924   @type lu: L{LogicalUnit}
5925   @param lu: the logical unit on whose behalf we execute
5926   @type instance: L{objects.Instance}
5927   @param instance: the instance whose disks we should remove
5928   @type target_node: string
5929   @param target_node: used to override the node on which to remove the disks
5930   @rtype: boolean
5931   @return: the success of the removal
5932
5933   """
5934   logging.info("Removing block devices for instance %s", instance.name)
5935
5936   all_result = True
5937   for device in instance.disks:
5938     if target_node:
5939       edata = [(target_node, device)]
5940     else:
5941       edata = device.ComputeNodeTree(instance.primary_node)
5942     for node, disk in edata:
5943       lu.cfg.SetDiskID(disk, node)
5944       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5945       if msg:
5946         lu.LogWarning("Could not remove block device %s on node %s,"
5947                       " continuing anyway: %s", device.iv_name, node, msg)
5948         all_result = False
5949
5950   if instance.disk_template == constants.DT_FILE:
5951     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5952     if target_node:
5953       tgt = target_node
5954     else:
5955       tgt = instance.primary_node
5956     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5957     if result.fail_msg:
5958       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5959                     file_storage_dir, instance.primary_node, result.fail_msg)
5960       all_result = False
5961
5962   return all_result
5963
5964
5965 def _ComputeDiskSize(disk_template, disks):
5966   """Compute disk size requirements in the volume group
5967
5968   """
5969   # Required free disk space as a function of disk and swap space
5970   req_size_dict = {
5971     constants.DT_DISKLESS: None,
5972     constants.DT_PLAIN: sum(d["size"] for d in disks),
5973     # 128 MB are added for drbd metadata for each disk
5974     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5975     constants.DT_FILE: None,
5976   }
5977
5978   if disk_template not in req_size_dict:
5979     raise errors.ProgrammerError("Disk template '%s' size requirement"
5980                                  " is unknown" %  disk_template)
5981
5982   return req_size_dict[disk_template]
5983
5984
5985 def _CheckHVParams(lu, nodenames, hvname, hvparams):
5986   """Hypervisor parameter validation.
5987
5988   This function abstract the hypervisor parameter validation to be
5989   used in both instance create and instance modify.
5990
5991   @type lu: L{LogicalUnit}
5992   @param lu: the logical unit for which we check
5993   @type nodenames: list
5994   @param nodenames: the list of nodes on which we should check
5995   @type hvname: string
5996   @param hvname: the name of the hypervisor we should use
5997   @type hvparams: dict
5998   @param hvparams: the parameters which we need to check
5999   @raise errors.OpPrereqError: if the parameters are not valid
6000
6001   """
6002   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6003                                                   hvname,
6004                                                   hvparams)
6005   for node in nodenames:
6006     info = hvinfo[node]
6007     if info.offline:
6008       continue
6009     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6010
6011
6012 class LUCreateInstance(LogicalUnit):
6013   """Create an instance.
6014
6015   """
6016   HPATH = "instance-add"
6017   HTYPE = constants.HTYPE_INSTANCE
6018   _OP_REQP = ["instance_name", "disks",
6019               "mode", "start",
6020               "wait_for_sync", "ip_check", "nics",
6021               "hvparams", "beparams"]
6022   REQ_BGL = False
6023
6024   def CheckArguments(self):
6025     """Check arguments.
6026
6027     """
6028     # set optional parameters to none if they don't exist
6029     for attr in ["pnode", "snode", "iallocator", "hypervisor",
6030                  "disk_template", "identify_defaults"]:
6031       if not hasattr(self.op, attr):
6032         setattr(self.op, attr, None)
6033
6034     # do not require name_check to ease forward/backward compatibility
6035     # for tools
6036     if not hasattr(self.op, "name_check"):
6037       self.op.name_check = True
6038     if not hasattr(self.op, "no_install"):
6039       self.op.no_install = False
6040     if self.op.no_install and self.op.start:
6041       self.LogInfo("No-installation mode selected, disabling startup")
6042       self.op.start = False
6043     # validate/normalize the instance name
6044     self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6045     if self.op.ip_check and not self.op.name_check:
6046       # TODO: make the ip check more flexible and not depend on the name check
6047       raise errors.OpPrereqError("Cannot do ip checks without a name check",
6048                                  errors.ECODE_INVAL)
6049     # check disk information: either all adopt, or no adopt
6050     has_adopt = has_no_adopt = False
6051     for disk in self.op.disks:
6052       if "adopt" in disk:
6053         has_adopt = True
6054       else:
6055         has_no_adopt = True
6056     if has_adopt and has_no_adopt:
6057       raise errors.OpPrereqError("Either all disks are adopted or none is",
6058                                  errors.ECODE_INVAL)
6059     if has_adopt:
6060       if self.op.disk_template != constants.DT_PLAIN:
6061         raise errors.OpPrereqError("Disk adoption is only supported for the"
6062                                    " 'plain' disk template",
6063                                    errors.ECODE_INVAL)
6064       if self.op.iallocator is not None:
6065         raise errors.OpPrereqError("Disk adoption not allowed with an"
6066                                    " iallocator script", errors.ECODE_INVAL)
6067       if self.op.mode == constants.INSTANCE_IMPORT:
6068         raise errors.OpPrereqError("Disk adoption not allowed for"
6069                                    " instance import", errors.ECODE_INVAL)
6070
6071     self.adopt_disks = has_adopt
6072
6073     # verify creation mode
6074     if self.op.mode not in (constants.INSTANCE_CREATE,
6075                             constants.INSTANCE_IMPORT):
6076       raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6077                                  self.op.mode, errors.ECODE_INVAL)
6078
6079     # instance name verification
6080     if self.op.name_check:
6081       self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6082       self.op.instance_name = self.hostname1.name
6083       # used in CheckPrereq for ip ping check
6084       self.check_ip = self.hostname1.ip
6085     else:
6086       self.check_ip = None
6087
6088     # file storage checks
6089     if (self.op.file_driver and
6090         not self.op.file_driver in constants.FILE_DRIVER):
6091       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6092                                  self.op.file_driver, errors.ECODE_INVAL)
6093
6094     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6095       raise errors.OpPrereqError("File storage directory path not absolute",
6096                                  errors.ECODE_INVAL)
6097
6098     ### Node/iallocator related checks
6099     if [self.op.iallocator, self.op.pnode].count(None) != 1:
6100       raise errors.OpPrereqError("One and only one of iallocator and primary"
6101                                  " node must be given",
6102                                  errors.ECODE_INVAL)
6103
6104     if self.op.mode == constants.INSTANCE_IMPORT:
6105       # On import force_variant must be True, because if we forced it at
6106       # initial install, our only chance when importing it back is that it
6107       # works again!
6108       self.op.force_variant = True
6109
6110       if self.op.no_install:
6111         self.LogInfo("No-installation mode has no effect during import")
6112
6113     else: # INSTANCE_CREATE
6114       if getattr(self.op, "os_type", None) is None:
6115         raise errors.OpPrereqError("No guest OS specified",
6116                                    errors.ECODE_INVAL)
6117       self.op.force_variant = getattr(self.op, "force_variant", False)
6118       if self.op.disk_template is None:
6119         raise errors.OpPrereqError("No disk template specified",
6120                                    errors.ECODE_INVAL)
6121
6122   def ExpandNames(self):
6123     """ExpandNames for CreateInstance.
6124
6125     Figure out the right locks for instance creation.
6126
6127     """
6128     self.needed_locks = {}
6129
6130     instance_name = self.op.instance_name
6131     # this is just a preventive check, but someone might still add this
6132     # instance in the meantime, and creation will fail at lock-add time
6133     if instance_name in self.cfg.GetInstanceList():
6134       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6135                                  instance_name, errors.ECODE_EXISTS)
6136
6137     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6138
6139     if self.op.iallocator:
6140       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6141     else:
6142       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6143       nodelist = [self.op.pnode]
6144       if self.op.snode is not None:
6145         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6146         nodelist.append(self.op.snode)
6147       self.needed_locks[locking.LEVEL_NODE] = nodelist
6148
6149     # in case of import lock the source node too
6150     if self.op.mode == constants.INSTANCE_IMPORT:
6151       src_node = getattr(self.op, "src_node", None)
6152       src_path = getattr(self.op, "src_path", None)
6153
6154       if src_path is None:
6155         self.op.src_path = src_path = self.op.instance_name
6156
6157       if src_node is None:
6158         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6159         self.op.src_node = None
6160         if os.path.isabs(src_path):
6161           raise errors.OpPrereqError("Importing an instance from an absolute"
6162                                      " path requires a source node option.",
6163                                      errors.ECODE_INVAL)
6164       else:
6165         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6166         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6167           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6168         if not os.path.isabs(src_path):
6169           self.op.src_path = src_path = \
6170             utils.PathJoin(constants.EXPORT_DIR, src_path)
6171
6172   def _RunAllocator(self):
6173     """Run the allocator based on input opcode.
6174
6175     """
6176     nics = [n.ToDict() for n in self.nics]
6177     ial = IAllocator(self.cfg, self.rpc,
6178                      mode=constants.IALLOCATOR_MODE_ALLOC,
6179                      name=self.op.instance_name,
6180                      disk_template=self.op.disk_template,
6181                      tags=[],
6182                      os=self.op.os_type,
6183                      vcpus=self.be_full[constants.BE_VCPUS],
6184                      mem_size=self.be_full[constants.BE_MEMORY],
6185                      disks=self.disks,
6186                      nics=nics,
6187                      hypervisor=self.op.hypervisor,
6188                      )
6189
6190     ial.Run(self.op.iallocator)
6191
6192     if not ial.success:
6193       raise errors.OpPrereqError("Can't compute nodes using"
6194                                  " iallocator '%s': %s" %
6195                                  (self.op.iallocator, ial.info),
6196                                  errors.ECODE_NORES)
6197     if len(ial.result) != ial.required_nodes:
6198       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6199                                  " of nodes (%s), required %s" %
6200                                  (self.op.iallocator, len(ial.result),
6201                                   ial.required_nodes), errors.ECODE_FAULT)
6202     self.op.pnode = ial.result[0]
6203     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6204                  self.op.instance_name, self.op.iallocator,
6205                  utils.CommaJoin(ial.result))
6206     if ial.required_nodes == 2:
6207       self.op.snode = ial.result[1]
6208
6209   def BuildHooksEnv(self):
6210     """Build hooks env.
6211
6212     This runs on master, primary and secondary nodes of the instance.
6213
6214     """
6215     env = {
6216       "ADD_MODE": self.op.mode,
6217       }
6218     if self.op.mode == constants.INSTANCE_IMPORT:
6219       env["SRC_NODE"] = self.op.src_node
6220       env["SRC_PATH"] = self.op.src_path
6221       env["SRC_IMAGES"] = self.src_images
6222
6223     env.update(_BuildInstanceHookEnv(
6224       name=self.op.instance_name,
6225       primary_node=self.op.pnode,
6226       secondary_nodes=self.secondaries,
6227       status=self.op.start,
6228       os_type=self.op.os_type,
6229       memory=self.be_full[constants.BE_MEMORY],
6230       vcpus=self.be_full[constants.BE_VCPUS],
6231       nics=_NICListToTuple(self, self.nics),
6232       disk_template=self.op.disk_template,
6233       disks=[(d["size"], d["mode"]) for d in self.disks],
6234       bep=self.be_full,
6235       hvp=self.hv_full,
6236       hypervisor_name=self.op.hypervisor,
6237     ))
6238
6239     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6240           self.secondaries)
6241     return env, nl, nl
6242
6243   def _ReadExportInfo(self):
6244     """Reads the export information from disk.
6245
6246     It will override the opcode source node and path with the actual
6247     information, if these two were not specified before.
6248
6249     @return: the export information
6250
6251     """
6252     assert self.op.mode == constants.INSTANCE_IMPORT
6253
6254     src_node = self.op.src_node
6255     src_path = self.op.src_path
6256
6257     if src_node is None:
6258       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6259       exp_list = self.rpc.call_export_list(locked_nodes)
6260       found = False
6261       for node in exp_list:
6262         if exp_list[node].fail_msg:
6263           continue
6264         if src_path in exp_list[node].payload:
6265           found = True
6266           self.op.src_node = src_node = node
6267           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6268                                                        src_path)
6269           break
6270       if not found:
6271         raise errors.OpPrereqError("No export found for relative path %s" %
6272                                     src_path, errors.ECODE_INVAL)
6273
6274     _CheckNodeOnline(self, src_node)
6275     result = self.rpc.call_export_info(src_node, src_path)
6276     result.Raise("No export or invalid export found in dir %s" % src_path)
6277
6278     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6279     if not export_info.has_section(constants.INISECT_EXP):
6280       raise errors.ProgrammerError("Corrupted export config",
6281                                    errors.ECODE_ENVIRON)
6282
6283     ei_version = export_info.get(constants.INISECT_EXP, "version")
6284     if (int(ei_version) != constants.EXPORT_VERSION):
6285       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6286                                  (ei_version, constants.EXPORT_VERSION),
6287                                  errors.ECODE_ENVIRON)
6288     return export_info
6289
6290   def _ReadExportParams(self, einfo):
6291     """Use export parameters as defaults.
6292
6293     In case the opcode doesn't specify (as in override) some instance
6294     parameters, then try to use them from the export information, if
6295     that declares them.
6296
6297     """
6298     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6299
6300     if self.op.disk_template is None:
6301       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6302         self.op.disk_template = einfo.get(constants.INISECT_INS,
6303                                           "disk_template")
6304       else:
6305         raise errors.OpPrereqError("No disk template specified and the export"
6306                                    " is missing the disk_template information",
6307                                    errors.ECODE_INVAL)
6308
6309     if not self.op.disks:
6310       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6311         disks = []
6312         # TODO: import the disk iv_name too
6313         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6314           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6315           disks.append({"size": disk_sz})
6316         self.op.disks = disks
6317       else:
6318         raise errors.OpPrereqError("No disk info specified and the export"
6319                                    " is missing the disk information",
6320                                    errors.ECODE_INVAL)
6321
6322     if (not self.op.nics and
6323         einfo.has_option(constants.INISECT_INS, "nic_count")):
6324       nics = []
6325       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6326         ndict = {}
6327         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6328           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6329           ndict[name] = v
6330         nics.append(ndict)
6331       self.op.nics = nics
6332
6333     if (self.op.hypervisor is None and
6334         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6335       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6336     if einfo.has_section(constants.INISECT_HYP):
6337       # use the export parameters but do not override the ones
6338       # specified by the user
6339       for name, value in einfo.items(constants.INISECT_HYP):
6340         if name not in self.op.hvparams:
6341           self.op.hvparams[name] = value
6342
6343     if einfo.has_section(constants.INISECT_BEP):
6344       # use the parameters, without overriding
6345       for name, value in einfo.items(constants.INISECT_BEP):
6346         if name not in self.op.beparams:
6347           self.op.beparams[name] = value
6348     else:
6349       # try to read the parameters old style, from the main section
6350       for name in constants.BES_PARAMETERS:
6351         if (name not in self.op.beparams and
6352             einfo.has_option(constants.INISECT_INS, name)):
6353           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6354
6355   def _RevertToDefaults(self, cluster):
6356     """Revert the instance parameters to the default values.
6357
6358     """
6359     # hvparams
6360     hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6361     for name in self.op.hvparams.keys():
6362       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6363         del self.op.hvparams[name]
6364     # beparams
6365     be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6366     for name in self.op.beparams.keys():
6367       if name in be_defs and be_defs[name] == self.op.beparams[name]:
6368         del self.op.beparams[name]
6369     # nic params
6370     nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6371     for nic in self.op.nics:
6372       for name in constants.NICS_PARAMETERS:
6373         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6374           del nic[name]
6375
6376   def CheckPrereq(self):
6377     """Check prerequisites.
6378
6379     """
6380     if self.op.mode == constants.INSTANCE_IMPORT:
6381       export_info = self._ReadExportInfo()
6382       self._ReadExportParams(export_info)
6383
6384     _CheckDiskTemplate(self.op.disk_template)
6385
6386     if (not self.cfg.GetVGName() and
6387         self.op.disk_template not in constants.DTS_NOT_LVM):
6388       raise errors.OpPrereqError("Cluster does not support lvm-based"
6389                                  " instances", errors.ECODE_STATE)
6390
6391     if self.op.hypervisor is None:
6392       self.op.hypervisor = self.cfg.GetHypervisorType()
6393
6394     cluster = self.cfg.GetClusterInfo()
6395     enabled_hvs = cluster.enabled_hypervisors
6396     if self.op.hypervisor not in enabled_hvs:
6397       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6398                                  " cluster (%s)" % (self.op.hypervisor,
6399                                   ",".join(enabled_hvs)),
6400                                  errors.ECODE_STATE)
6401
6402     # check hypervisor parameter syntax (locally)
6403     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6404     filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6405                                                         self.op.os_type),
6406                                   self.op.hvparams)
6407     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6408     hv_type.CheckParameterSyntax(filled_hvp)
6409     self.hv_full = filled_hvp
6410     # check that we don't specify global parameters on an instance
6411     _CheckGlobalHvParams(self.op.hvparams)
6412
6413     # fill and remember the beparams dict
6414     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6415     self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6416                                     self.op.beparams)
6417
6418     # now that hvp/bep are in final format, let's reset to defaults,
6419     # if told to do so
6420     if self.op.identify_defaults:
6421       self._RevertToDefaults(cluster)
6422
6423     # NIC buildup
6424     self.nics = []
6425     for idx, nic in enumerate(self.op.nics):
6426       nic_mode_req = nic.get("mode", None)
6427       nic_mode = nic_mode_req
6428       if nic_mode is None:
6429         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6430
6431       # in routed mode, for the first nic, the default ip is 'auto'
6432       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6433         default_ip_mode = constants.VALUE_AUTO
6434       else:
6435         default_ip_mode = constants.VALUE_NONE
6436
6437       # ip validity checks
6438       ip = nic.get("ip", default_ip_mode)
6439       if ip is None or ip.lower() == constants.VALUE_NONE:
6440         nic_ip = None
6441       elif ip.lower() == constants.VALUE_AUTO:
6442         if not self.op.name_check:
6443           raise errors.OpPrereqError("IP address set to auto but name checks"
6444                                      " have been skipped. Aborting.",
6445                                      errors.ECODE_INVAL)
6446         nic_ip = self.hostname1.ip
6447       else:
6448         if not utils.IsValidIP(ip):
6449           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6450                                      " like a valid IP" % ip,
6451                                      errors.ECODE_INVAL)
6452         nic_ip = ip
6453
6454       # TODO: check the ip address for uniqueness
6455       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6456         raise errors.OpPrereqError("Routed nic mode requires an ip address",
6457                                    errors.ECODE_INVAL)
6458
6459       # MAC address verification
6460       mac = nic.get("mac", constants.VALUE_AUTO)
6461       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6462         mac = utils.NormalizeAndValidateMac(mac)
6463
6464         try:
6465           self.cfg.ReserveMAC(mac, self.proc.GetECId())
6466         except errors.ReservationError:
6467           raise errors.OpPrereqError("MAC address %s already in use"
6468                                      " in cluster" % mac,
6469                                      errors.ECODE_NOTUNIQUE)
6470
6471       # bridge verification
6472       bridge = nic.get("bridge", None)
6473       link = nic.get("link", None)
6474       if bridge and link:
6475         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6476                                    " at the same time", errors.ECODE_INVAL)
6477       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6478         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6479                                    errors.ECODE_INVAL)
6480       elif bridge:
6481         link = bridge
6482
6483       nicparams = {}
6484       if nic_mode_req:
6485         nicparams[constants.NIC_MODE] = nic_mode_req
6486       if link:
6487         nicparams[constants.NIC_LINK] = link
6488
6489       check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6490                                       nicparams)
6491       objects.NIC.CheckParameterSyntax(check_params)
6492       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6493
6494     # disk checks/pre-build
6495     self.disks = []
6496     for disk in self.op.disks:
6497       mode = disk.get("mode", constants.DISK_RDWR)
6498       if mode not in constants.DISK_ACCESS_SET:
6499         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6500                                    mode, errors.ECODE_INVAL)
6501       size = disk.get("size", None)
6502       if size is None:
6503         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6504       try:
6505         size = int(size)
6506       except (TypeError, ValueError):
6507         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6508                                    errors.ECODE_INVAL)
6509       new_disk = {"size": size, "mode": mode}
6510       if "adopt" in disk:
6511         new_disk["adopt"] = disk["adopt"]
6512       self.disks.append(new_disk)
6513
6514     if self.op.mode == constants.INSTANCE_IMPORT:
6515
6516       # Check that the new instance doesn't have less disks than the export
6517       instance_disks = len(self.disks)
6518       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6519       if instance_disks < export_disks:
6520         raise errors.OpPrereqError("Not enough disks to import."
6521                                    " (instance: %d, export: %d)" %
6522                                    (instance_disks, export_disks),
6523                                    errors.ECODE_INVAL)
6524
6525       disk_images = []
6526       for idx in range(export_disks):
6527         option = 'disk%d_dump' % idx
6528         if export_info.has_option(constants.INISECT_INS, option):
6529           # FIXME: are the old os-es, disk sizes, etc. useful?
6530           export_name = export_info.get(constants.INISECT_INS, option)
6531           image = utils.PathJoin(self.op.src_path, export_name)
6532           disk_images.append(image)
6533         else:
6534           disk_images.append(False)
6535
6536       self.src_images = disk_images
6537
6538       old_name = export_info.get(constants.INISECT_INS, 'name')
6539       try:
6540         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6541       except (TypeError, ValueError), err:
6542         raise errors.OpPrereqError("Invalid export file, nic_count is not"
6543                                    " an integer: %s" % str(err),
6544                                    errors.ECODE_STATE)
6545       if self.op.instance_name == old_name:
6546         for idx, nic in enumerate(self.nics):
6547           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6548             nic_mac_ini = 'nic%d_mac' % idx
6549             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6550
6551     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6552
6553     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6554     if self.op.ip_check:
6555       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6556         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6557                                    (self.check_ip, self.op.instance_name),
6558                                    errors.ECODE_NOTUNIQUE)
6559
6560     #### mac address generation
6561     # By generating here the mac address both the allocator and the hooks get
6562     # the real final mac address rather than the 'auto' or 'generate' value.
6563     # There is a race condition between the generation and the instance object
6564     # creation, which means that we know the mac is valid now, but we're not
6565     # sure it will be when we actually add the instance. If things go bad
6566     # adding the instance will abort because of a duplicate mac, and the
6567     # creation job will fail.
6568     for nic in self.nics:
6569       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6570         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6571
6572     #### allocator run
6573
6574     if self.op.iallocator is not None:
6575       self._RunAllocator()
6576
6577     #### node related checks
6578
6579     # check primary node
6580     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6581     assert self.pnode is not None, \
6582       "Cannot retrieve locked node %s" % self.op.pnode
6583     if pnode.offline:
6584       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6585                                  pnode.name, errors.ECODE_STATE)
6586     if pnode.drained:
6587       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6588                                  pnode.name, errors.ECODE_STATE)
6589
6590     self.secondaries = []
6591
6592     # mirror node verification
6593     if self.op.disk_template in constants.DTS_NET_MIRROR:
6594       if self.op.snode is None:
6595         raise errors.OpPrereqError("The networked disk templates need"
6596                                    " a mirror node", errors.ECODE_INVAL)
6597       if self.op.snode == pnode.name:
6598         raise errors.OpPrereqError("The secondary node cannot be the"
6599                                    " primary node.", errors.ECODE_INVAL)
6600       _CheckNodeOnline(self, self.op.snode)
6601       _CheckNodeNotDrained(self, self.op.snode)
6602       self.secondaries.append(self.op.snode)
6603
6604     nodenames = [pnode.name] + self.secondaries
6605
6606     req_size = _ComputeDiskSize(self.op.disk_template,
6607                                 self.disks)
6608
6609     # Check lv size requirements, if not adopting
6610     if req_size is not None and not self.adopt_disks:
6611       _CheckNodesFreeDisk(self, nodenames, req_size)
6612
6613     if self.adopt_disks: # instead, we must check the adoption data
6614       all_lvs = set([i["adopt"] for i in self.disks])
6615       if len(all_lvs) != len(self.disks):
6616         raise errors.OpPrereqError("Duplicate volume names given for adoption",
6617                                    errors.ECODE_INVAL)
6618       for lv_name in all_lvs:
6619         try:
6620           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6621         except errors.ReservationError:
6622           raise errors.OpPrereqError("LV named %s used by another instance" %
6623                                      lv_name, errors.ECODE_NOTUNIQUE)
6624
6625       node_lvs = self.rpc.call_lv_list([pnode.name],
6626                                        self.cfg.GetVGName())[pnode.name]
6627       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6628       node_lvs = node_lvs.payload
6629       delta = all_lvs.difference(node_lvs.keys())
6630       if delta:
6631         raise errors.OpPrereqError("Missing logical volume(s): %s" %
6632                                    utils.CommaJoin(delta),
6633                                    errors.ECODE_INVAL)
6634       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6635       if online_lvs:
6636         raise errors.OpPrereqError("Online logical volumes found, cannot"
6637                                    " adopt: %s" % utils.CommaJoin(online_lvs),
6638                                    errors.ECODE_STATE)
6639       # update the size of disk based on what is found
6640       for dsk in self.disks:
6641         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6642
6643     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6644
6645     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6646
6647     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6648
6649     # memory check on primary node
6650     if self.op.start:
6651       _CheckNodeFreeMemory(self, self.pnode.name,
6652                            "creating instance %s" % self.op.instance_name,
6653                            self.be_full[constants.BE_MEMORY],
6654                            self.op.hypervisor)
6655
6656     self.dry_run_result = list(nodenames)
6657
6658   def Exec(self, feedback_fn):
6659     """Create and add the instance to the cluster.
6660
6661     """
6662     instance = self.op.instance_name
6663     pnode_name = self.pnode.name
6664
6665     ht_kind = self.op.hypervisor
6666     if ht_kind in constants.HTS_REQ_PORT:
6667       network_port = self.cfg.AllocatePort()
6668     else:
6669       network_port = None
6670
6671     if constants.ENABLE_FILE_STORAGE:
6672       # this is needed because os.path.join does not accept None arguments
6673       if self.op.file_storage_dir is None:
6674         string_file_storage_dir = ""
6675       else:
6676         string_file_storage_dir = self.op.file_storage_dir
6677
6678       # build the full file storage dir path
6679       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6680                                         string_file_storage_dir, instance)
6681     else:
6682       file_storage_dir = ""
6683
6684
6685     disks = _GenerateDiskTemplate(self,
6686                                   self.op.disk_template,
6687                                   instance, pnode_name,
6688                                   self.secondaries,
6689                                   self.disks,
6690                                   file_storage_dir,
6691                                   self.op.file_driver,
6692                                   0)
6693
6694     iobj = objects.Instance(name=instance, os=self.op.os_type,
6695                             primary_node=pnode_name,
6696                             nics=self.nics, disks=disks,
6697                             disk_template=self.op.disk_template,
6698                             admin_up=False,
6699                             network_port=network_port,
6700                             beparams=self.op.beparams,
6701                             hvparams=self.op.hvparams,
6702                             hypervisor=self.op.hypervisor,
6703                             )
6704
6705     if self.adopt_disks:
6706       # rename LVs to the newly-generated names; we need to construct
6707       # 'fake' LV disks with the old data, plus the new unique_id
6708       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6709       rename_to = []
6710       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6711         rename_to.append(t_dsk.logical_id)
6712         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6713         self.cfg.SetDiskID(t_dsk, pnode_name)
6714       result = self.rpc.call_blockdev_rename(pnode_name,
6715                                              zip(tmp_disks, rename_to))
6716       result.Raise("Failed to rename adoped LVs")
6717     else:
6718       feedback_fn("* creating instance disks...")
6719       try:
6720         _CreateDisks(self, iobj)
6721       except errors.OpExecError:
6722         self.LogWarning("Device creation failed, reverting...")
6723         try:
6724           _RemoveDisks(self, iobj)
6725         finally:
6726           self.cfg.ReleaseDRBDMinors(instance)
6727           raise
6728
6729     feedback_fn("adding instance %s to cluster config" % instance)
6730
6731     self.cfg.AddInstance(iobj, self.proc.GetECId())
6732
6733     # Declare that we don't want to remove the instance lock anymore, as we've
6734     # added the instance to the config
6735     del self.remove_locks[locking.LEVEL_INSTANCE]
6736     # Unlock all the nodes
6737     if self.op.mode == constants.INSTANCE_IMPORT:
6738       nodes_keep = [self.op.src_node]
6739       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6740                        if node != self.op.src_node]
6741       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6742       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6743     else:
6744       self.context.glm.release(locking.LEVEL_NODE)
6745       del self.acquired_locks[locking.LEVEL_NODE]
6746
6747     if self.op.wait_for_sync:
6748       disk_abort = not _WaitForSync(self, iobj)
6749     elif iobj.disk_template in constants.DTS_NET_MIRROR:
6750       # make sure the disks are not degraded (still sync-ing is ok)
6751       time.sleep(15)
6752       feedback_fn("* checking mirrors status")
6753       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6754     else:
6755       disk_abort = False
6756
6757     if disk_abort:
6758       _RemoveDisks(self, iobj)
6759       self.cfg.RemoveInstance(iobj.name)
6760       # Make sure the instance lock gets removed
6761       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6762       raise errors.OpExecError("There are some degraded disks for"
6763                                " this instance")
6764
6765     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6766       if self.op.mode == constants.INSTANCE_CREATE:
6767         if not self.op.no_install:
6768           feedback_fn("* running the instance OS create scripts...")
6769           # FIXME: pass debug option from opcode to backend
6770           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6771                                                  self.op.debug_level)
6772           result.Raise("Could not add os for instance %s"
6773                        " on node %s" % (instance, pnode_name))
6774
6775       elif self.op.mode == constants.INSTANCE_IMPORT:
6776         feedback_fn("* running the instance OS import scripts...")
6777         src_node = self.op.src_node
6778         src_images = self.src_images
6779         cluster_name = self.cfg.GetClusterName()
6780         # FIXME: pass debug option from opcode to backend
6781         import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6782                                                          src_node, src_images,
6783                                                          cluster_name,
6784                                                          self.op.debug_level)
6785         msg = import_result.fail_msg
6786         if msg:
6787           self.LogWarning("Error while importing the disk images for instance"
6788                           " %s on node %s: %s" % (instance, pnode_name, msg))
6789       else:
6790         # also checked in the prereq part
6791         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6792                                      % self.op.mode)
6793
6794     if self.op.start:
6795       iobj.admin_up = True
6796       self.cfg.Update(iobj, feedback_fn)
6797       logging.info("Starting instance %s on node %s", instance, pnode_name)
6798       feedback_fn("* starting instance...")
6799       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6800       result.Raise("Could not start instance")
6801
6802     return list(iobj.all_nodes)
6803
6804
6805 class LUConnectConsole(NoHooksLU):
6806   """Connect to an instance's console.
6807
6808   This is somewhat special in that it returns the command line that
6809   you need to run on the master node in order to connect to the
6810   console.
6811
6812   """
6813   _OP_REQP = ["instance_name"]
6814   REQ_BGL = False
6815
6816   def ExpandNames(self):
6817     self._ExpandAndLockInstance()
6818
6819   def CheckPrereq(self):
6820     """Check prerequisites.
6821
6822     This checks that the instance is in the cluster.
6823
6824     """
6825     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6826     assert self.instance is not None, \
6827       "Cannot retrieve locked instance %s" % self.op.instance_name
6828     _CheckNodeOnline(self, self.instance.primary_node)
6829
6830   def Exec(self, feedback_fn):
6831     """Connect to the console of an instance
6832
6833     """
6834     instance = self.instance
6835     node = instance.primary_node
6836
6837     node_insts = self.rpc.call_instance_list([node],
6838                                              [instance.hypervisor])[node]
6839     node_insts.Raise("Can't get node information from %s" % node)
6840
6841     if instance.name not in node_insts.payload:
6842       raise errors.OpExecError("Instance %s is not running." % instance.name)
6843
6844     logging.debug("Connecting to console of %s on %s", instance.name, node)
6845
6846     hyper = hypervisor.GetHypervisor(instance.hypervisor)
6847     cluster = self.cfg.GetClusterInfo()
6848     # beparams and hvparams are passed separately, to avoid editing the
6849     # instance and then saving the defaults in the instance itself.
6850     hvparams = cluster.FillHV(instance)
6851     beparams = cluster.FillBE(instance)
6852     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6853
6854     # build ssh cmdline
6855     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6856
6857
6858 class LUReplaceDisks(LogicalUnit):
6859   """Replace the disks of an instance.
6860
6861   """
6862   HPATH = "mirrors-replace"
6863   HTYPE = constants.HTYPE_INSTANCE
6864   _OP_REQP = ["instance_name", "mode", "disks"]
6865   REQ_BGL = False
6866
6867   def CheckArguments(self):
6868     if not hasattr(self.op, "remote_node"):
6869       self.op.remote_node = None
6870     if not hasattr(self.op, "iallocator"):
6871       self.op.iallocator = None
6872     if not hasattr(self.op, "early_release"):
6873       self.op.early_release = False
6874
6875     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6876                                   self.op.iallocator)
6877
6878   def ExpandNames(self):
6879     self._ExpandAndLockInstance()
6880
6881     if self.op.iallocator is not None:
6882       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6883
6884     elif self.op.remote_node is not None:
6885       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6886       self.op.remote_node = remote_node
6887
6888       # Warning: do not remove the locking of the new secondary here
6889       # unless DRBD8.AddChildren is changed to work in parallel;
6890       # currently it doesn't since parallel invocations of
6891       # FindUnusedMinor will conflict
6892       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6893       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6894
6895     else:
6896       self.needed_locks[locking.LEVEL_NODE] = []
6897       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6898
6899     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6900                                    self.op.iallocator, self.op.remote_node,
6901                                    self.op.disks, False, self.op.early_release)
6902
6903     self.tasklets = [self.replacer]
6904
6905   def DeclareLocks(self, level):
6906     # If we're not already locking all nodes in the set we have to declare the
6907     # instance's primary/secondary nodes.
6908     if (level == locking.LEVEL_NODE and
6909         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6910       self._LockInstancesNodes()
6911
6912   def BuildHooksEnv(self):
6913     """Build hooks env.
6914
6915     This runs on the master, the primary and all the secondaries.
6916
6917     """
6918     instance = self.replacer.instance
6919     env = {
6920       "MODE": self.op.mode,
6921       "NEW_SECONDARY": self.op.remote_node,
6922       "OLD_SECONDARY": instance.secondary_nodes[0],
6923       }
6924     env.update(_BuildInstanceHookEnvByObject(self, instance))
6925     nl = [
6926       self.cfg.GetMasterNode(),
6927       instance.primary_node,
6928       ]
6929     if self.op.remote_node is not None:
6930       nl.append(self.op.remote_node)
6931     return env, nl, nl
6932
6933
6934 class LUEvacuateNode(LogicalUnit):
6935   """Relocate the secondary instances from a node.
6936
6937   """
6938   HPATH = "node-evacuate"
6939   HTYPE = constants.HTYPE_NODE
6940   _OP_REQP = ["node_name"]
6941   REQ_BGL = False
6942
6943   def CheckArguments(self):
6944     if not hasattr(self.op, "remote_node"):
6945       self.op.remote_node = None
6946     if not hasattr(self.op, "iallocator"):
6947       self.op.iallocator = None
6948     if not hasattr(self.op, "early_release"):
6949       self.op.early_release = False
6950
6951     TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6952                                   self.op.remote_node,
6953                                   self.op.iallocator)
6954
6955   def ExpandNames(self):
6956     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6957
6958     self.needed_locks = {}
6959
6960     # Declare node locks
6961     if self.op.iallocator is not None:
6962       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6963
6964     elif self.op.remote_node is not None:
6965       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6966
6967       # Warning: do not remove the locking of the new secondary here
6968       # unless DRBD8.AddChildren is changed to work in parallel;
6969       # currently it doesn't since parallel invocations of
6970       # FindUnusedMinor will conflict
6971       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6972       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6973
6974     else:
6975       raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6976
6977     # Create tasklets for replacing disks for all secondary instances on this
6978     # node
6979     names = []
6980     tasklets = []
6981
6982     for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6983       logging.debug("Replacing disks for instance %s", inst.name)
6984       names.append(inst.name)
6985
6986       replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6987                                 self.op.iallocator, self.op.remote_node, [],
6988                                 True, self.op.early_release)
6989       tasklets.append(replacer)
6990
6991     self.tasklets = tasklets
6992     self.instance_names = names
6993
6994     # Declare instance locks
6995     self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6996
6997   def DeclareLocks(self, level):
6998     # If we're not already locking all nodes in the set we have to declare the
6999     # instance's primary/secondary nodes.
7000     if (level == locking.LEVEL_NODE and
7001         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7002       self._LockInstancesNodes()
7003
7004   def BuildHooksEnv(self):
7005     """Build hooks env.
7006
7007     This runs on the master, the primary and all the secondaries.
7008
7009     """
7010     env = {
7011       "NODE_NAME": self.op.node_name,
7012       }
7013
7014     nl = [self.cfg.GetMasterNode()]
7015
7016     if self.op.remote_node is not None:
7017       env["NEW_SECONDARY"] = self.op.remote_node
7018       nl.append(self.op.remote_node)
7019
7020     return (env, nl, nl)
7021
7022
7023 class TLReplaceDisks(Tasklet):
7024   """Replaces disks for an instance.
7025
7026   Note: Locking is not within the scope of this class.
7027
7028   """
7029   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7030                disks, delay_iallocator, early_release):
7031     """Initializes this class.
7032
7033     """
7034     Tasklet.__init__(self, lu)
7035
7036     # Parameters
7037     self.instance_name = instance_name
7038     self.mode = mode
7039     self.iallocator_name = iallocator_name
7040     self.remote_node = remote_node
7041     self.disks = disks
7042     self.delay_iallocator = delay_iallocator
7043     self.early_release = early_release
7044
7045     # Runtime data
7046     self.instance = None
7047     self.new_node = None
7048     self.target_node = None
7049     self.other_node = None
7050     self.remote_node_info = None
7051     self.node_secondary_ip = None
7052
7053   @staticmethod
7054   def CheckArguments(mode, remote_node, iallocator):
7055     """Helper function for users of this class.
7056
7057     """
7058     # check for valid parameter combination
7059     if mode == constants.REPLACE_DISK_CHG:
7060       if remote_node is None and iallocator is None:
7061         raise errors.OpPrereqError("When changing the secondary either an"
7062                                    " iallocator script must be used or the"
7063                                    " new node given", errors.ECODE_INVAL)
7064
7065       if remote_node is not None and iallocator is not None:
7066         raise errors.OpPrereqError("Give either the iallocator or the new"
7067                                    " secondary, not both", errors.ECODE_INVAL)
7068
7069     elif remote_node is not None or iallocator is not None:
7070       # Not replacing the secondary
7071       raise errors.OpPrereqError("The iallocator and new node options can"
7072                                  " only be used when changing the"
7073                                  " secondary node", errors.ECODE_INVAL)
7074
7075   @staticmethod
7076   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7077     """Compute a new secondary node using an IAllocator.
7078
7079     """
7080     ial = IAllocator(lu.cfg, lu.rpc,
7081                      mode=constants.IALLOCATOR_MODE_RELOC,
7082                      name=instance_name,
7083                      relocate_from=relocate_from)
7084
7085     ial.Run(iallocator_name)
7086
7087     if not ial.success:
7088       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7089                                  " %s" % (iallocator_name, ial.info),
7090                                  errors.ECODE_NORES)
7091
7092     if len(ial.result) != ial.required_nodes:
7093       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7094                                  " of nodes (%s), required %s" %
7095                                  (iallocator_name,
7096                                   len(ial.result), ial.required_nodes),
7097                                  errors.ECODE_FAULT)
7098
7099     remote_node_name = ial.result[0]
7100
7101     lu.LogInfo("Selected new secondary for instance '%s': %s",
7102                instance_name, remote_node_name)
7103
7104     return remote_node_name
7105
7106   def _FindFaultyDisks(self, node_name):
7107     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7108                                     node_name, True)
7109
7110   def CheckPrereq(self):
7111     """Check prerequisites.
7112
7113     This checks that the instance is in the cluster.
7114
7115     """
7116     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7117     assert instance is not None, \
7118       "Cannot retrieve locked instance %s" % self.instance_name
7119
7120     if instance.disk_template != constants.DT_DRBD8:
7121       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7122                                  " instances", errors.ECODE_INVAL)
7123
7124     if len(instance.secondary_nodes) != 1:
7125       raise errors.OpPrereqError("The instance has a strange layout,"
7126                                  " expected one secondary but found %d" %
7127                                  len(instance.secondary_nodes),
7128                                  errors.ECODE_FAULT)
7129
7130     if not self.delay_iallocator:
7131       self._CheckPrereq2()
7132
7133   def _CheckPrereq2(self):
7134     """Check prerequisites, second part.
7135
7136     This function should always be part of CheckPrereq. It was separated and is
7137     now called from Exec because during node evacuation iallocator was only
7138     called with an unmodified cluster model, not taking planned changes into
7139     account.
7140
7141     """
7142     instance = self.instance
7143     secondary_node = instance.secondary_nodes[0]
7144
7145     if self.iallocator_name is None:
7146       remote_node = self.remote_node
7147     else:
7148       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7149                                        instance.name, instance.secondary_nodes)
7150
7151     if remote_node is not None:
7152       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7153       assert self.remote_node_info is not None, \
7154         "Cannot retrieve locked node %s" % remote_node
7155     else:
7156       self.remote_node_info = None
7157
7158     if remote_node == self.instance.primary_node:
7159       raise errors.OpPrereqError("The specified node is the primary node of"
7160                                  " the instance.", errors.ECODE_INVAL)
7161
7162     if remote_node == secondary_node:
7163       raise errors.OpPrereqError("The specified node is already the"
7164                                  " secondary node of the instance.",
7165                                  errors.ECODE_INVAL)
7166
7167     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7168                                     constants.REPLACE_DISK_CHG):
7169       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7170                                  errors.ECODE_INVAL)
7171
7172     if self.mode == constants.REPLACE_DISK_AUTO:
7173       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7174       faulty_secondary = self._FindFaultyDisks(secondary_node)
7175
7176       if faulty_primary and faulty_secondary:
7177         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7178                                    " one node and can not be repaired"
7179                                    " automatically" % self.instance_name,
7180                                    errors.ECODE_STATE)
7181
7182       if faulty_primary:
7183         self.disks = faulty_primary
7184         self.target_node = instance.primary_node
7185         self.other_node = secondary_node
7186         check_nodes = [self.target_node, self.other_node]
7187       elif faulty_secondary:
7188         self.disks = faulty_secondary
7189         self.target_node = secondary_node
7190         self.other_node = instance.primary_node
7191         check_nodes = [self.target_node, self.other_node]
7192       else:
7193         self.disks = []
7194         check_nodes = []
7195
7196     else:
7197       # Non-automatic modes
7198       if self.mode == constants.REPLACE_DISK_PRI:
7199         self.target_node = instance.primary_node
7200         self.other_node = secondary_node
7201         check_nodes = [self.target_node, self.other_node]
7202
7203       elif self.mode == constants.REPLACE_DISK_SEC:
7204         self.target_node = secondary_node
7205         self.other_node = instance.primary_node
7206         check_nodes = [self.target_node, self.other_node]
7207
7208       elif self.mode == constants.REPLACE_DISK_CHG:
7209         self.new_node = remote_node
7210         self.other_node = instance.primary_node
7211         self.target_node = secondary_node
7212         check_nodes = [self.new_node, self.other_node]
7213
7214         _CheckNodeNotDrained(self.lu, remote_node)
7215
7216         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7217         assert old_node_info is not None
7218         if old_node_info.offline and not self.early_release:
7219           # doesn't make sense to delay the release
7220           self.early_release = True
7221           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7222                           " early-release mode", secondary_node)
7223
7224       else:
7225         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7226                                      self.mode)
7227
7228       # If not specified all disks should be replaced
7229       if not self.disks:
7230         self.disks = range(len(self.instance.disks))
7231
7232     for node in check_nodes:
7233       _CheckNodeOnline(self.lu, node)
7234
7235     # Check whether disks are valid
7236     for disk_idx in self.disks:
7237       instance.FindDisk(disk_idx)
7238
7239     # Get secondary node IP addresses
7240     node_2nd_ip = {}
7241
7242     for node_name in [self.target_node, self.other_node, self.new_node]:
7243       if node_name is not None:
7244         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7245
7246     self.node_secondary_ip = node_2nd_ip
7247
7248   def Exec(self, feedback_fn):
7249     """Execute disk replacement.
7250
7251     This dispatches the disk replacement to the appropriate handler.
7252
7253     """
7254     if self.delay_iallocator:
7255       self._CheckPrereq2()
7256
7257     if not self.disks:
7258       feedback_fn("No disks need replacement")
7259       return
7260
7261     feedback_fn("Replacing disk(s) %s for %s" %
7262                 (utils.CommaJoin(self.disks), self.instance.name))
7263
7264     activate_disks = (not self.instance.admin_up)
7265
7266     # Activate the instance disks if we're replacing them on a down instance
7267     if activate_disks:
7268       _StartInstanceDisks(self.lu, self.instance, True)
7269
7270     try:
7271       # Should we replace the secondary node?
7272       if self.new_node is not None:
7273         fn = self._ExecDrbd8Secondary
7274       else:
7275         fn = self._ExecDrbd8DiskOnly
7276
7277       return fn(feedback_fn)
7278
7279     finally:
7280       # Deactivate the instance disks if we're replacing them on a
7281       # down instance
7282       if activate_disks:
7283         _SafeShutdownInstanceDisks(self.lu, self.instance)
7284
7285   def _CheckVolumeGroup(self, nodes):
7286     self.lu.LogInfo("Checking volume groups")
7287
7288     vgname = self.cfg.GetVGName()
7289
7290     # Make sure volume group exists on all involved nodes
7291     results = self.rpc.call_vg_list(nodes)
7292     if not results:
7293       raise errors.OpExecError("Can't list volume groups on the nodes")
7294
7295     for node in nodes:
7296       res = results[node]
7297       res.Raise("Error checking node %s" % node)
7298       if vgname not in res.payload:
7299         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7300                                  (vgname, node))
7301
7302   def _CheckDisksExistence(self, nodes):
7303     # Check disk existence
7304     for idx, dev in enumerate(self.instance.disks):
7305       if idx not in self.disks:
7306         continue
7307
7308       for node in nodes:
7309         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7310         self.cfg.SetDiskID(dev, node)
7311
7312         result = self.rpc.call_blockdev_find(node, dev)
7313
7314         msg = result.fail_msg
7315         if msg or not result.payload:
7316           if not msg:
7317             msg = "disk not found"
7318           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7319                                    (idx, node, msg))
7320
7321   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7322     for idx, dev in enumerate(self.instance.disks):
7323       if idx not in self.disks:
7324         continue
7325
7326       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7327                       (idx, node_name))
7328
7329       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7330                                    ldisk=ldisk):
7331         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7332                                  " replace disks for instance %s" %
7333                                  (node_name, self.instance.name))
7334
7335   def _CreateNewStorage(self, node_name):
7336     vgname = self.cfg.GetVGName()
7337     iv_names = {}
7338
7339     for idx, dev in enumerate(self.instance.disks):
7340       if idx not in self.disks:
7341         continue
7342
7343       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7344
7345       self.cfg.SetDiskID(dev, node_name)
7346
7347       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7348       names = _GenerateUniqueNames(self.lu, lv_names)
7349
7350       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7351                              logical_id=(vgname, names[0]))
7352       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7353                              logical_id=(vgname, names[1]))
7354
7355       new_lvs = [lv_data, lv_meta]
7356       old_lvs = dev.children
7357       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7358
7359       # we pass force_create=True to force the LVM creation
7360       for new_lv in new_lvs:
7361         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7362                         _GetInstanceInfoText(self.instance), False)
7363
7364     return iv_names
7365
7366   def _CheckDevices(self, node_name, iv_names):
7367     for name, (dev, _, _) in iv_names.iteritems():
7368       self.cfg.SetDiskID(dev, node_name)
7369
7370       result = self.rpc.call_blockdev_find(node_name, dev)
7371
7372       msg = result.fail_msg
7373       if msg or not result.payload:
7374         if not msg:
7375           msg = "disk not found"
7376         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7377                                  (name, msg))
7378
7379       if result.payload.is_degraded:
7380         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7381
7382   def _RemoveOldStorage(self, node_name, iv_names):
7383     for name, (_, old_lvs, _) in iv_names.iteritems():
7384       self.lu.LogInfo("Remove logical volumes for %s" % name)
7385
7386       for lv in old_lvs:
7387         self.cfg.SetDiskID(lv, node_name)
7388
7389         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7390         if msg:
7391           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7392                              hint="remove unused LVs manually")
7393
7394   def _ReleaseNodeLock(self, node_name):
7395     """Releases the lock for a given node."""
7396     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7397
7398   def _ExecDrbd8DiskOnly(self, feedback_fn):
7399     """Replace a disk on the primary or secondary for DRBD 8.
7400
7401     The algorithm for replace is quite complicated:
7402
7403       1. for each disk to be replaced:
7404
7405         1. create new LVs on the target node with unique names
7406         1. detach old LVs from the drbd device
7407         1. rename old LVs to name_replaced.<time_t>
7408         1. rename new LVs to old LVs
7409         1. attach the new LVs (with the old names now) to the drbd device
7410
7411       1. wait for sync across all devices
7412
7413       1. for each modified disk:
7414
7415         1. remove old LVs (which have the name name_replaces.<time_t>)
7416
7417     Failures are not very well handled.
7418
7419     """
7420     steps_total = 6
7421
7422     # Step: check device activation
7423     self.lu.LogStep(1, steps_total, "Check device existence")
7424     self._CheckDisksExistence([self.other_node, self.target_node])
7425     self._CheckVolumeGroup([self.target_node, self.other_node])
7426
7427     # Step: check other node consistency
7428     self.lu.LogStep(2, steps_total, "Check peer consistency")
7429     self._CheckDisksConsistency(self.other_node,
7430                                 self.other_node == self.instance.primary_node,
7431                                 False)
7432
7433     # Step: create new storage
7434     self.lu.LogStep(3, steps_total, "Allocate new storage")
7435     iv_names = self._CreateNewStorage(self.target_node)
7436
7437     # Step: for each lv, detach+rename*2+attach
7438     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7439     for dev, old_lvs, new_lvs in iv_names.itervalues():
7440       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7441
7442       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7443                                                      old_lvs)
7444       result.Raise("Can't detach drbd from local storage on node"
7445                    " %s for device %s" % (self.target_node, dev.iv_name))
7446       #dev.children = []
7447       #cfg.Update(instance)
7448
7449       # ok, we created the new LVs, so now we know we have the needed
7450       # storage; as such, we proceed on the target node to rename
7451       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7452       # using the assumption that logical_id == physical_id (which in
7453       # turn is the unique_id on that node)
7454
7455       # FIXME(iustin): use a better name for the replaced LVs
7456       temp_suffix = int(time.time())
7457       ren_fn = lambda d, suff: (d.physical_id[0],
7458                                 d.physical_id[1] + "_replaced-%s" % suff)
7459
7460       # Build the rename list based on what LVs exist on the node
7461       rename_old_to_new = []
7462       for to_ren in old_lvs:
7463         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7464         if not result.fail_msg and result.payload:
7465           # device exists
7466           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7467
7468       self.lu.LogInfo("Renaming the old LVs on the target node")
7469       result = self.rpc.call_blockdev_rename(self.target_node,
7470                                              rename_old_to_new)
7471       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7472
7473       # Now we rename the new LVs to the old LVs
7474       self.lu.LogInfo("Renaming the new LVs on the target node")
7475       rename_new_to_old = [(new, old.physical_id)
7476                            for old, new in zip(old_lvs, new_lvs)]
7477       result = self.rpc.call_blockdev_rename(self.target_node,
7478                                              rename_new_to_old)
7479       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7480
7481       for old, new in zip(old_lvs, new_lvs):
7482         new.logical_id = old.logical_id
7483         self.cfg.SetDiskID(new, self.target_node)
7484
7485       for disk in old_lvs:
7486         disk.logical_id = ren_fn(disk, temp_suffix)
7487         self.cfg.SetDiskID(disk, self.target_node)
7488
7489       # Now that the new lvs have the old name, we can add them to the device
7490       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7491       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7492                                                   new_lvs)
7493       msg = result.fail_msg
7494       if msg:
7495         for new_lv in new_lvs:
7496           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7497                                                new_lv).fail_msg
7498           if msg2:
7499             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7500                                hint=("cleanup manually the unused logical"
7501                                      "volumes"))
7502         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7503
7504       dev.children = new_lvs
7505
7506       self.cfg.Update(self.instance, feedback_fn)
7507
7508     cstep = 5
7509     if self.early_release:
7510       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7511       cstep += 1
7512       self._RemoveOldStorage(self.target_node, iv_names)
7513       # WARNING: we release both node locks here, do not do other RPCs
7514       # than WaitForSync to the primary node
7515       self._ReleaseNodeLock([self.target_node, self.other_node])
7516
7517     # Wait for sync
7518     # This can fail as the old devices are degraded and _WaitForSync
7519     # does a combined result over all disks, so we don't check its return value
7520     self.lu.LogStep(cstep, steps_total, "Sync devices")
7521     cstep += 1
7522     _WaitForSync(self.lu, self.instance)
7523
7524     # Check all devices manually
7525     self._CheckDevices(self.instance.primary_node, iv_names)
7526
7527     # Step: remove old storage
7528     if not self.early_release:
7529       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7530       cstep += 1
7531       self._RemoveOldStorage(self.target_node, iv_names)
7532
7533   def _ExecDrbd8Secondary(self, feedback_fn):
7534     """Replace the secondary node for DRBD 8.
7535
7536     The algorithm for replace is quite complicated:
7537       - for all disks of the instance:
7538         - create new LVs on the new node with same names
7539         - shutdown the drbd device on the old secondary
7540         - disconnect the drbd network on the primary
7541         - create the drbd device on the new secondary
7542         - network attach the drbd on the primary, using an artifice:
7543           the drbd code for Attach() will connect to the network if it
7544           finds a device which is connected to the good local disks but
7545           not network enabled
7546       - wait for sync across all devices
7547       - remove all disks from the old secondary
7548
7549     Failures are not very well handled.
7550
7551     """
7552     steps_total = 6
7553
7554     # Step: check device activation
7555     self.lu.LogStep(1, steps_total, "Check device existence")
7556     self._CheckDisksExistence([self.instance.primary_node])
7557     self._CheckVolumeGroup([self.instance.primary_node])
7558
7559     # Step: check other node consistency
7560     self.lu.LogStep(2, steps_total, "Check peer consistency")
7561     self._CheckDisksConsistency(self.instance.primary_node, True, True)
7562
7563     # Step: create new storage
7564     self.lu.LogStep(3, steps_total, "Allocate new storage")
7565     for idx, dev in enumerate(self.instance.disks):
7566       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7567                       (self.new_node, idx))
7568       # we pass force_create=True to force LVM creation
7569       for new_lv in dev.children:
7570         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7571                         _GetInstanceInfoText(self.instance), False)
7572
7573     # Step 4: dbrd minors and drbd setups changes
7574     # after this, we must manually remove the drbd minors on both the
7575     # error and the success paths
7576     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7577     minors = self.cfg.AllocateDRBDMinor([self.new_node
7578                                          for dev in self.instance.disks],
7579                                         self.instance.name)
7580     logging.debug("Allocated minors %r", minors)
7581
7582     iv_names = {}
7583     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7584       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7585                       (self.new_node, idx))
7586       # create new devices on new_node; note that we create two IDs:
7587       # one without port, so the drbd will be activated without
7588       # networking information on the new node at this stage, and one
7589       # with network, for the latter activation in step 4
7590       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7591       if self.instance.primary_node == o_node1:
7592         p_minor = o_minor1
7593       else:
7594         assert self.instance.primary_node == o_node2, "Three-node instance?"
7595         p_minor = o_minor2
7596
7597       new_alone_id = (self.instance.primary_node, self.new_node, None,
7598                       p_minor, new_minor, o_secret)
7599       new_net_id = (self.instance.primary_node, self.new_node, o_port,
7600                     p_minor, new_minor, o_secret)
7601
7602       iv_names[idx] = (dev, dev.children, new_net_id)
7603       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7604                     new_net_id)
7605       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7606                               logical_id=new_alone_id,
7607                               children=dev.children,
7608                               size=dev.size)
7609       try:
7610         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7611                               _GetInstanceInfoText(self.instance), False)
7612       except errors.GenericError:
7613         self.cfg.ReleaseDRBDMinors(self.instance.name)
7614         raise
7615
7616     # We have new devices, shutdown the drbd on the old secondary
7617     for idx, dev in enumerate(self.instance.disks):
7618       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7619       self.cfg.SetDiskID(dev, self.target_node)
7620       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7621       if msg:
7622         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7623                            "node: %s" % (idx, msg),
7624                            hint=("Please cleanup this device manually as"
7625                                  " soon as possible"))
7626
7627     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7628     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7629                                                self.node_secondary_ip,
7630                                                self.instance.disks)\
7631                                               [self.instance.primary_node]
7632
7633     msg = result.fail_msg
7634     if msg:
7635       # detaches didn't succeed (unlikely)
7636       self.cfg.ReleaseDRBDMinors(self.instance.name)
7637       raise errors.OpExecError("Can't detach the disks from the network on"
7638                                " old node: %s" % (msg,))
7639
7640     # if we managed to detach at least one, we update all the disks of
7641     # the instance to point to the new secondary
7642     self.lu.LogInfo("Updating instance configuration")
7643     for dev, _, new_logical_id in iv_names.itervalues():
7644       dev.logical_id = new_logical_id
7645       self.cfg.SetDiskID(dev, self.instance.primary_node)
7646
7647     self.cfg.Update(self.instance, feedback_fn)
7648
7649     # and now perform the drbd attach
7650     self.lu.LogInfo("Attaching primary drbds to new secondary"
7651                     " (standalone => connected)")
7652     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7653                                             self.new_node],
7654                                            self.node_secondary_ip,
7655                                            self.instance.disks,
7656                                            self.instance.name,
7657                                            False)
7658     for to_node, to_result in result.items():
7659       msg = to_result.fail_msg
7660       if msg:
7661         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7662                            to_node, msg,
7663                            hint=("please do a gnt-instance info to see the"
7664                                  " status of disks"))
7665     cstep = 5
7666     if self.early_release:
7667       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7668       cstep += 1
7669       self._RemoveOldStorage(self.target_node, iv_names)
7670       # WARNING: we release all node locks here, do not do other RPCs
7671       # than WaitForSync to the primary node
7672       self._ReleaseNodeLock([self.instance.primary_node,
7673                              self.target_node,
7674                              self.new_node])
7675
7676     # Wait for sync
7677     # This can fail as the old devices are degraded and _WaitForSync
7678     # does a combined result over all disks, so we don't check its return value
7679     self.lu.LogStep(cstep, steps_total, "Sync devices")
7680     cstep += 1
7681     _WaitForSync(self.lu, self.instance)
7682
7683     # Check all devices manually
7684     self._CheckDevices(self.instance.primary_node, iv_names)
7685
7686     # Step: remove old storage
7687     if not self.early_release:
7688       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7689       self._RemoveOldStorage(self.target_node, iv_names)
7690
7691
7692 class LURepairNodeStorage(NoHooksLU):
7693   """Repairs the volume group on a node.
7694
7695   """
7696   _OP_REQP = ["node_name"]
7697   REQ_BGL = False
7698
7699   def CheckArguments(self):
7700     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7701
7702     _CheckStorageType(self.op.storage_type)
7703
7704   def ExpandNames(self):
7705     self.needed_locks = {
7706       locking.LEVEL_NODE: [self.op.node_name],
7707       }
7708
7709   def _CheckFaultyDisks(self, instance, node_name):
7710     """Ensure faulty disks abort the opcode or at least warn."""
7711     try:
7712       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7713                                   node_name, True):
7714         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7715                                    " node '%s'" % (instance.name, node_name),
7716                                    errors.ECODE_STATE)
7717     except errors.OpPrereqError, err:
7718       if self.op.ignore_consistency:
7719         self.proc.LogWarning(str(err.args[0]))
7720       else:
7721         raise
7722
7723   def CheckPrereq(self):
7724     """Check prerequisites.
7725
7726     """
7727     storage_type = self.op.storage_type
7728
7729     if (constants.SO_FIX_CONSISTENCY not in
7730         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7731       raise errors.OpPrereqError("Storage units of type '%s' can not be"
7732                                  " repaired" % storage_type,
7733                                  errors.ECODE_INVAL)
7734
7735     # Check whether any instance on this node has faulty disks
7736     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7737       if not inst.admin_up:
7738         continue
7739       check_nodes = set(inst.all_nodes)
7740       check_nodes.discard(self.op.node_name)
7741       for inst_node_name in check_nodes:
7742         self._CheckFaultyDisks(inst, inst_node_name)
7743
7744   def Exec(self, feedback_fn):
7745     feedback_fn("Repairing storage unit '%s' on %s ..." %
7746                 (self.op.name, self.op.node_name))
7747
7748     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7749     result = self.rpc.call_storage_execute(self.op.node_name,
7750                                            self.op.storage_type, st_args,
7751                                            self.op.name,
7752                                            constants.SO_FIX_CONSISTENCY)
7753     result.Raise("Failed to repair storage unit '%s' on %s" %
7754                  (self.op.name, self.op.node_name))
7755
7756
7757 class LUNodeEvacuationStrategy(NoHooksLU):
7758   """Computes the node evacuation strategy.
7759
7760   """
7761   _OP_REQP = ["nodes"]
7762   REQ_BGL = False
7763
7764   def CheckArguments(self):
7765     if not hasattr(self.op, "remote_node"):
7766       self.op.remote_node = None
7767     if not hasattr(self.op, "iallocator"):
7768       self.op.iallocator = None
7769     if self.op.remote_node is not None and self.op.iallocator is not None:
7770       raise errors.OpPrereqError("Give either the iallocator or the new"
7771                                  " secondary, not both", errors.ECODE_INVAL)
7772
7773   def ExpandNames(self):
7774     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7775     self.needed_locks = locks = {}
7776     if self.op.remote_node is None:
7777       locks[locking.LEVEL_NODE] = locking.ALL_SET
7778     else:
7779       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7780       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7781
7782   def CheckPrereq(self):
7783     pass
7784
7785   def Exec(self, feedback_fn):
7786     if self.op.remote_node is not None:
7787       instances = []
7788       for node in self.op.nodes:
7789         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7790       result = []
7791       for i in instances:
7792         if i.primary_node == self.op.remote_node:
7793           raise errors.OpPrereqError("Node %s is the primary node of"
7794                                      " instance %s, cannot use it as"
7795                                      " secondary" %
7796                                      (self.op.remote_node, i.name),
7797                                      errors.ECODE_INVAL)
7798         result.append([i.name, self.op.remote_node])
7799     else:
7800       ial = IAllocator(self.cfg, self.rpc,
7801                        mode=constants.IALLOCATOR_MODE_MEVAC,
7802                        evac_nodes=self.op.nodes)
7803       ial.Run(self.op.iallocator, validate=True)
7804       if not ial.success:
7805         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7806                                  errors.ECODE_NORES)
7807       result = ial.result
7808     return result
7809
7810
7811 class LUGrowDisk(LogicalUnit):
7812   """Grow a disk of an instance.
7813
7814   """
7815   HPATH = "disk-grow"
7816   HTYPE = constants.HTYPE_INSTANCE
7817   _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7818   REQ_BGL = False
7819
7820   def ExpandNames(self):
7821     self._ExpandAndLockInstance()
7822     self.needed_locks[locking.LEVEL_NODE] = []
7823     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7824
7825   def DeclareLocks(self, level):
7826     if level == locking.LEVEL_NODE:
7827       self._LockInstancesNodes()
7828
7829   def BuildHooksEnv(self):
7830     """Build hooks env.
7831
7832     This runs on the master, the primary and all the secondaries.
7833
7834     """
7835     env = {
7836       "DISK": self.op.disk,
7837       "AMOUNT": self.op.amount,
7838       }
7839     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7840     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7841     return env, nl, nl
7842
7843   def CheckPrereq(self):
7844     """Check prerequisites.
7845
7846     This checks that the instance is in the cluster.
7847
7848     """
7849     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7850     assert instance is not None, \
7851       "Cannot retrieve locked instance %s" % self.op.instance_name
7852     nodenames = list(instance.all_nodes)
7853     for node in nodenames:
7854       _CheckNodeOnline(self, node)
7855
7856
7857     self.instance = instance
7858
7859     if instance.disk_template not in constants.DTS_GROWABLE:
7860       raise errors.OpPrereqError("Instance's disk layout does not support"
7861                                  " growing.", errors.ECODE_INVAL)
7862
7863     self.disk = instance.FindDisk(self.op.disk)
7864
7865     if instance.disk_template != constants.DT_FILE:
7866       # TODO: check the free disk space for file, when that feature will be
7867       # supported
7868       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7869
7870   def Exec(self, feedback_fn):
7871     """Execute disk grow.
7872
7873     """
7874     instance = self.instance
7875     disk = self.disk
7876     for node in instance.all_nodes:
7877       self.cfg.SetDiskID(disk, node)
7878       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7879       result.Raise("Grow request failed to node %s" % node)
7880
7881       # TODO: Rewrite code to work properly
7882       # DRBD goes into sync mode for a short amount of time after executing the
7883       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7884       # calling "resize" in sync mode fails. Sleeping for a short amount of
7885       # time is a work-around.
7886       time.sleep(5)
7887
7888     disk.RecordGrow(self.op.amount)
7889     self.cfg.Update(instance, feedback_fn)
7890     if self.op.wait_for_sync:
7891       disk_abort = not _WaitForSync(self, instance)
7892       if disk_abort:
7893         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7894                              " status.\nPlease check the instance.")
7895
7896
7897 class LUQueryInstanceData(NoHooksLU):
7898   """Query runtime instance data.
7899
7900   """
7901   _OP_REQP = ["instances", "static"]
7902   REQ_BGL = False
7903
7904   def ExpandNames(self):
7905     self.needed_locks = {}
7906     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7907
7908     if not isinstance(self.op.instances, list):
7909       raise errors.OpPrereqError("Invalid argument type 'instances'",
7910                                  errors.ECODE_INVAL)
7911
7912     if self.op.instances:
7913       self.wanted_names = []
7914       for name in self.op.instances:
7915         full_name = _ExpandInstanceName(self.cfg, name)
7916         self.wanted_names.append(full_name)
7917       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7918     else:
7919       self.wanted_names = None
7920       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7921
7922     self.needed_locks[locking.LEVEL_NODE] = []
7923     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7924
7925   def DeclareLocks(self, level):
7926     if level == locking.LEVEL_NODE:
7927       self._LockInstancesNodes()
7928
7929   def CheckPrereq(self):
7930     """Check prerequisites.
7931
7932     This only checks the optional instance list against the existing names.
7933
7934     """
7935     if self.wanted_names is None:
7936       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7937
7938     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7939                              in self.wanted_names]
7940     return
7941
7942   def _ComputeBlockdevStatus(self, node, instance_name, dev):
7943     """Returns the status of a block device
7944
7945     """
7946     if self.op.static or not node:
7947       return None
7948
7949     self.cfg.SetDiskID(dev, node)
7950
7951     result = self.rpc.call_blockdev_find(node, dev)
7952     if result.offline:
7953       return None
7954
7955     result.Raise("Can't compute disk status for %s" % instance_name)
7956
7957     status = result.payload
7958     if status is None:
7959       return None
7960
7961     return (status.dev_path, status.major, status.minor,
7962             status.sync_percent, status.estimated_time,
7963             status.is_degraded, status.ldisk_status)
7964
7965   def _ComputeDiskStatus(self, instance, snode, dev):
7966     """Compute block device status.
7967
7968     """
7969     if dev.dev_type in constants.LDS_DRBD:
7970       # we change the snode then (otherwise we use the one passed in)
7971       if dev.logical_id[0] == instance.primary_node:
7972         snode = dev.logical_id[1]
7973       else:
7974         snode = dev.logical_id[0]
7975
7976     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7977                                               instance.name, dev)
7978     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7979
7980     if dev.children:
7981       dev_children = [self._ComputeDiskStatus(instance, snode, child)
7982                       for child in dev.children]
7983     else:
7984       dev_children = []
7985
7986     data = {
7987       "iv_name": dev.iv_name,
7988       "dev_type": dev.dev_type,
7989       "logical_id": dev.logical_id,
7990       "physical_id": dev.physical_id,
7991       "pstatus": dev_pstatus,
7992       "sstatus": dev_sstatus,
7993       "children": dev_children,
7994       "mode": dev.mode,
7995       "size": dev.size,
7996       }
7997
7998     return data
7999
8000   def Exec(self, feedback_fn):
8001     """Gather and return data"""
8002     result = {}
8003
8004     cluster = self.cfg.GetClusterInfo()
8005
8006     for instance in self.wanted_instances:
8007       if not self.op.static:
8008         remote_info = self.rpc.call_instance_info(instance.primary_node,
8009                                                   instance.name,
8010                                                   instance.hypervisor)
8011         remote_info.Raise("Error checking node %s" % instance.primary_node)
8012         remote_info = remote_info.payload
8013         if remote_info and "state" in remote_info:
8014           remote_state = "up"
8015         else:
8016           remote_state = "down"
8017       else:
8018         remote_state = None
8019       if instance.admin_up:
8020         config_state = "up"
8021       else:
8022         config_state = "down"
8023
8024       disks = [self._ComputeDiskStatus(instance, None, device)
8025                for device in instance.disks]
8026
8027       idict = {
8028         "name": instance.name,
8029         "config_state": config_state,
8030         "run_state": remote_state,
8031         "pnode": instance.primary_node,
8032         "snodes": instance.secondary_nodes,
8033         "os": instance.os,
8034         # this happens to be the same format used for hooks
8035         "nics": _NICListToTuple(self, instance.nics),
8036         "disks": disks,
8037         "hypervisor": instance.hypervisor,
8038         "network_port": instance.network_port,
8039         "hv_instance": instance.hvparams,
8040         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8041         "be_instance": instance.beparams,
8042         "be_actual": cluster.FillBE(instance),
8043         "serial_no": instance.serial_no,
8044         "mtime": instance.mtime,
8045         "ctime": instance.ctime,
8046         "uuid": instance.uuid,
8047         }
8048
8049       result[instance.name] = idict
8050
8051     return result
8052
8053
8054 class LUSetInstanceParams(LogicalUnit):
8055   """Modifies an instances's parameters.
8056
8057   """
8058   HPATH = "instance-modify"
8059   HTYPE = constants.HTYPE_INSTANCE
8060   _OP_REQP = ["instance_name"]
8061   REQ_BGL = False
8062
8063   def CheckArguments(self):
8064     if not hasattr(self.op, 'nics'):
8065       self.op.nics = []
8066     if not hasattr(self.op, 'disks'):
8067       self.op.disks = []
8068     if not hasattr(self.op, 'beparams'):
8069       self.op.beparams = {}
8070     if not hasattr(self.op, 'hvparams'):
8071       self.op.hvparams = {}
8072     if not hasattr(self.op, "disk_template"):
8073       self.op.disk_template = None
8074     if not hasattr(self.op, "remote_node"):
8075       self.op.remote_node = None
8076     if not hasattr(self.op, "os_name"):
8077       self.op.os_name = None
8078     if not hasattr(self.op, "force_variant"):
8079       self.op.force_variant = False
8080     self.op.force = getattr(self.op, "force", False)
8081     if not (self.op.nics or self.op.disks or self.op.disk_template or
8082             self.op.hvparams or self.op.beparams or self.op.os_name):
8083       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8084
8085     if self.op.hvparams:
8086       _CheckGlobalHvParams(self.op.hvparams)
8087
8088     # Disk validation
8089     disk_addremove = 0
8090     for disk_op, disk_dict in self.op.disks:
8091       if disk_op == constants.DDM_REMOVE:
8092         disk_addremove += 1
8093         continue
8094       elif disk_op == constants.DDM_ADD:
8095         disk_addremove += 1
8096       else:
8097         if not isinstance(disk_op, int):
8098           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8099         if not isinstance(disk_dict, dict):
8100           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8101           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8102
8103       if disk_op == constants.DDM_ADD:
8104         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8105         if mode not in constants.DISK_ACCESS_SET:
8106           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8107                                      errors.ECODE_INVAL)
8108         size = disk_dict.get('size', None)
8109         if size is None:
8110           raise errors.OpPrereqError("Required disk parameter size missing",
8111                                      errors.ECODE_INVAL)
8112         try:
8113           size = int(size)
8114         except (TypeError, ValueError), err:
8115           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8116                                      str(err), errors.ECODE_INVAL)
8117         disk_dict['size'] = size
8118       else:
8119         # modification of disk
8120         if 'size' in disk_dict:
8121           raise errors.OpPrereqError("Disk size change not possible, use"
8122                                      " grow-disk", errors.ECODE_INVAL)
8123
8124     if disk_addremove > 1:
8125       raise errors.OpPrereqError("Only one disk add or remove operation"
8126                                  " supported at a time", errors.ECODE_INVAL)
8127
8128     if self.op.disks and self.op.disk_template is not None:
8129       raise errors.OpPrereqError("Disk template conversion and other disk"
8130                                  " changes not supported at the same time",
8131                                  errors.ECODE_INVAL)
8132
8133     if self.op.disk_template:
8134       _CheckDiskTemplate(self.op.disk_template)
8135       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8136           self.op.remote_node is None):
8137         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8138                                    " one requires specifying a secondary node",
8139                                    errors.ECODE_INVAL)
8140
8141     # NIC validation
8142     nic_addremove = 0
8143     for nic_op, nic_dict in self.op.nics:
8144       if nic_op == constants.DDM_REMOVE:
8145         nic_addremove += 1
8146         continue
8147       elif nic_op == constants.DDM_ADD:
8148         nic_addremove += 1
8149       else:
8150         if not isinstance(nic_op, int):
8151           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8152         if not isinstance(nic_dict, dict):
8153           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8154           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8155
8156       # nic_dict should be a dict
8157       nic_ip = nic_dict.get('ip', None)
8158       if nic_ip is not None:
8159         if nic_ip.lower() == constants.VALUE_NONE:
8160           nic_dict['ip'] = None
8161         else:
8162           if not utils.IsValidIP(nic_ip):
8163             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8164                                        errors.ECODE_INVAL)
8165
8166       nic_bridge = nic_dict.get('bridge', None)
8167       nic_link = nic_dict.get('link', None)
8168       if nic_bridge and nic_link:
8169         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8170                                    " at the same time", errors.ECODE_INVAL)
8171       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8172         nic_dict['bridge'] = None
8173       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8174         nic_dict['link'] = None
8175
8176       if nic_op == constants.DDM_ADD:
8177         nic_mac = nic_dict.get('mac', None)
8178         if nic_mac is None:
8179           nic_dict['mac'] = constants.VALUE_AUTO
8180
8181       if 'mac' in nic_dict:
8182         nic_mac = nic_dict['mac']
8183         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8184           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8185
8186         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8187           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8188                                      " modifying an existing nic",
8189                                      errors.ECODE_INVAL)
8190
8191     if nic_addremove > 1:
8192       raise errors.OpPrereqError("Only one NIC add or remove operation"
8193                                  " supported at a time", errors.ECODE_INVAL)
8194
8195   def ExpandNames(self):
8196     self._ExpandAndLockInstance()
8197     self.needed_locks[locking.LEVEL_NODE] = []
8198     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8199
8200   def DeclareLocks(self, level):
8201     if level == locking.LEVEL_NODE:
8202       self._LockInstancesNodes()
8203       if self.op.disk_template and self.op.remote_node:
8204         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8205         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8206
8207   def BuildHooksEnv(self):
8208     """Build hooks env.
8209
8210     This runs on the master, primary and secondaries.
8211
8212     """
8213     args = dict()
8214     if constants.BE_MEMORY in self.be_new:
8215       args['memory'] = self.be_new[constants.BE_MEMORY]
8216     if constants.BE_VCPUS in self.be_new:
8217       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8218     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8219     # information at all.
8220     if self.op.nics:
8221       args['nics'] = []
8222       nic_override = dict(self.op.nics)
8223       c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8224       for idx, nic in enumerate(self.instance.nics):
8225         if idx in nic_override:
8226           this_nic_override = nic_override[idx]
8227         else:
8228           this_nic_override = {}
8229         if 'ip' in this_nic_override:
8230           ip = this_nic_override['ip']
8231         else:
8232           ip = nic.ip
8233         if 'mac' in this_nic_override:
8234           mac = this_nic_override['mac']
8235         else:
8236           mac = nic.mac
8237         if idx in self.nic_pnew:
8238           nicparams = self.nic_pnew[idx]
8239         else:
8240           nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8241         mode = nicparams[constants.NIC_MODE]
8242         link = nicparams[constants.NIC_LINK]
8243         args['nics'].append((ip, mac, mode, link))
8244       if constants.DDM_ADD in nic_override:
8245         ip = nic_override[constants.DDM_ADD].get('ip', None)
8246         mac = nic_override[constants.DDM_ADD]['mac']
8247         nicparams = self.nic_pnew[constants.DDM_ADD]
8248         mode = nicparams[constants.NIC_MODE]
8249         link = nicparams[constants.NIC_LINK]
8250         args['nics'].append((ip, mac, mode, link))
8251       elif constants.DDM_REMOVE in nic_override:
8252         del args['nics'][-1]
8253
8254     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8255     if self.op.disk_template:
8256       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8257     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8258     return env, nl, nl
8259
8260   @staticmethod
8261   def _GetUpdatedParams(old_params, update_dict,
8262                         default_values, parameter_types):
8263     """Return the new params dict for the given params.
8264
8265     @type old_params: dict
8266     @param old_params: old parameters
8267     @type update_dict: dict
8268     @param update_dict: dict containing new parameter values,
8269                         or constants.VALUE_DEFAULT to reset the
8270                         parameter to its default value
8271     @type default_values: dict
8272     @param default_values: default values for the filled parameters
8273     @type parameter_types: dict
8274     @param parameter_types: dict mapping target dict keys to types
8275                             in constants.ENFORCEABLE_TYPES
8276     @rtype: (dict, dict)
8277     @return: (new_parameters, filled_parameters)
8278
8279     """
8280     params_copy = copy.deepcopy(old_params)
8281     for key, val in update_dict.iteritems():
8282       if val == constants.VALUE_DEFAULT:
8283         try:
8284           del params_copy[key]
8285         except KeyError:
8286           pass
8287       else:
8288         params_copy[key] = val
8289     utils.ForceDictType(params_copy, parameter_types)
8290     params_filled = objects.FillDict(default_values, params_copy)
8291     return (params_copy, params_filled)
8292
8293   def CheckPrereq(self):
8294     """Check prerequisites.
8295
8296     This only checks the instance list against the existing names.
8297
8298     """
8299     self.force = self.op.force
8300
8301     # checking the new params on the primary/secondary nodes
8302
8303     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8304     cluster = self.cluster = self.cfg.GetClusterInfo()
8305     assert self.instance is not None, \
8306       "Cannot retrieve locked instance %s" % self.op.instance_name
8307     pnode = instance.primary_node
8308     nodelist = list(instance.all_nodes)
8309
8310     if self.op.disk_template:
8311       if instance.disk_template == self.op.disk_template:
8312         raise errors.OpPrereqError("Instance already has disk template %s" %
8313                                    instance.disk_template, errors.ECODE_INVAL)
8314
8315       if (instance.disk_template,
8316           self.op.disk_template) not in self._DISK_CONVERSIONS:
8317         raise errors.OpPrereqError("Unsupported disk template conversion from"
8318                                    " %s to %s" % (instance.disk_template,
8319                                                   self.op.disk_template),
8320                                    errors.ECODE_INVAL)
8321       if self.op.disk_template in constants.DTS_NET_MIRROR:
8322         _CheckNodeOnline(self, self.op.remote_node)
8323         _CheckNodeNotDrained(self, self.op.remote_node)
8324         disks = [{"size": d.size} for d in instance.disks]
8325         required = _ComputeDiskSize(self.op.disk_template, disks)
8326         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8327         _CheckInstanceDown(self, instance, "cannot change disk template")
8328
8329     # hvparams processing
8330     if self.op.hvparams:
8331       i_hvdict, hv_new = self._GetUpdatedParams(
8332                              instance.hvparams, self.op.hvparams,
8333                              cluster.hvparams[instance.hypervisor],
8334                              constants.HVS_PARAMETER_TYPES)
8335       # local check
8336       hypervisor.GetHypervisor(
8337         instance.hypervisor).CheckParameterSyntax(hv_new)
8338       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8339       self.hv_new = hv_new # the new actual values
8340       self.hv_inst = i_hvdict # the new dict (without defaults)
8341     else:
8342       self.hv_new = self.hv_inst = {}
8343
8344     # beparams processing
8345     if self.op.beparams:
8346       i_bedict, be_new = self._GetUpdatedParams(
8347                              instance.beparams, self.op.beparams,
8348                              cluster.beparams[constants.PP_DEFAULT],
8349                              constants.BES_PARAMETER_TYPES)
8350       self.be_new = be_new # the new actual values
8351       self.be_inst = i_bedict # the new dict (without defaults)
8352     else:
8353       self.be_new = self.be_inst = {}
8354
8355     self.warn = []
8356
8357     if constants.BE_MEMORY in self.op.beparams and not self.force:
8358       mem_check_list = [pnode]
8359       if be_new[constants.BE_AUTO_BALANCE]:
8360         # either we changed auto_balance to yes or it was from before
8361         mem_check_list.extend(instance.secondary_nodes)
8362       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8363                                                   instance.hypervisor)
8364       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8365                                          instance.hypervisor)
8366       pninfo = nodeinfo[pnode]
8367       msg = pninfo.fail_msg
8368       if msg:
8369         # Assume the primary node is unreachable and go ahead
8370         self.warn.append("Can't get info from primary node %s: %s" %
8371                          (pnode,  msg))
8372       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8373         self.warn.append("Node data from primary node %s doesn't contain"
8374                          " free memory information" % pnode)
8375       elif instance_info.fail_msg:
8376         self.warn.append("Can't get instance runtime information: %s" %
8377                         instance_info.fail_msg)
8378       else:
8379         if instance_info.payload:
8380           current_mem = int(instance_info.payload['memory'])
8381         else:
8382           # Assume instance not running
8383           # (there is a slight race condition here, but it's not very probable,
8384           # and we have no other way to check)
8385           current_mem = 0
8386         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8387                     pninfo.payload['memory_free'])
8388         if miss_mem > 0:
8389           raise errors.OpPrereqError("This change will prevent the instance"
8390                                      " from starting, due to %d MB of memory"
8391                                      " missing on its primary node" % miss_mem,
8392                                      errors.ECODE_NORES)
8393
8394       if be_new[constants.BE_AUTO_BALANCE]:
8395         for node, nres in nodeinfo.items():
8396           if node not in instance.secondary_nodes:
8397             continue
8398           msg = nres.fail_msg
8399           if msg:
8400             self.warn.append("Can't get info from secondary node %s: %s" %
8401                              (node, msg))
8402           elif not isinstance(nres.payload.get('memory_free', None), int):
8403             self.warn.append("Secondary node %s didn't return free"
8404                              " memory information" % node)
8405           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8406             self.warn.append("Not enough memory to failover instance to"
8407                              " secondary node %s" % node)
8408
8409     # NIC processing
8410     self.nic_pnew = {}
8411     self.nic_pinst = {}
8412     for nic_op, nic_dict in self.op.nics:
8413       if nic_op == constants.DDM_REMOVE:
8414         if not instance.nics:
8415           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8416                                      errors.ECODE_INVAL)
8417         continue
8418       if nic_op != constants.DDM_ADD:
8419         # an existing nic
8420         if not instance.nics:
8421           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8422                                      " no NICs" % nic_op,
8423                                      errors.ECODE_INVAL)
8424         if nic_op < 0 or nic_op >= len(instance.nics):
8425           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8426                                      " are 0 to %d" %
8427                                      (nic_op, len(instance.nics) - 1),
8428                                      errors.ECODE_INVAL)
8429         old_nic_params = instance.nics[nic_op].nicparams
8430         old_nic_ip = instance.nics[nic_op].ip
8431       else:
8432         old_nic_params = {}
8433         old_nic_ip = None
8434
8435       update_params_dict = dict([(key, nic_dict[key])
8436                                  for key in constants.NICS_PARAMETERS
8437                                  if key in nic_dict])
8438
8439       if 'bridge' in nic_dict:
8440         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8441
8442       new_nic_params, new_filled_nic_params = \
8443           self._GetUpdatedParams(old_nic_params, update_params_dict,
8444                                  cluster.nicparams[constants.PP_DEFAULT],
8445                                  constants.NICS_PARAMETER_TYPES)
8446       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8447       self.nic_pinst[nic_op] = new_nic_params
8448       self.nic_pnew[nic_op] = new_filled_nic_params
8449       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8450
8451       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8452         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8453         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8454         if msg:
8455           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8456           if self.force:
8457             self.warn.append(msg)
8458           else:
8459             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8460       if new_nic_mode == constants.NIC_MODE_ROUTED:
8461         if 'ip' in nic_dict:
8462           nic_ip = nic_dict['ip']
8463         else:
8464           nic_ip = old_nic_ip
8465         if nic_ip is None:
8466           raise errors.OpPrereqError('Cannot set the nic ip to None'
8467                                      ' on a routed nic', errors.ECODE_INVAL)
8468       if 'mac' in nic_dict:
8469         nic_mac = nic_dict['mac']
8470         if nic_mac is None:
8471           raise errors.OpPrereqError('Cannot set the nic mac to None',
8472                                      errors.ECODE_INVAL)
8473         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8474           # otherwise generate the mac
8475           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8476         else:
8477           # or validate/reserve the current one
8478           try:
8479             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8480           except errors.ReservationError:
8481             raise errors.OpPrereqError("MAC address %s already in use"
8482                                        " in cluster" % nic_mac,
8483                                        errors.ECODE_NOTUNIQUE)
8484
8485     # DISK processing
8486     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8487       raise errors.OpPrereqError("Disk operations not supported for"
8488                                  " diskless instances",
8489                                  errors.ECODE_INVAL)
8490     for disk_op, _ in self.op.disks:
8491       if disk_op == constants.DDM_REMOVE:
8492         if len(instance.disks) == 1:
8493           raise errors.OpPrereqError("Cannot remove the last disk of"
8494                                      " an instance", errors.ECODE_INVAL)
8495         _CheckInstanceDown(self, instance, "cannot remove disks")
8496
8497       if (disk_op == constants.DDM_ADD and
8498           len(instance.nics) >= constants.MAX_DISKS):
8499         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8500                                    " add more" % constants.MAX_DISKS,
8501                                    errors.ECODE_STATE)
8502       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8503         # an existing disk
8504         if disk_op < 0 or disk_op >= len(instance.disks):
8505           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8506                                      " are 0 to %d" %
8507                                      (disk_op, len(instance.disks)),
8508                                      errors.ECODE_INVAL)
8509
8510     # OS change
8511     if self.op.os_name and not self.op.force:
8512       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8513                       self.op.force_variant)
8514
8515     return
8516
8517   def _ConvertPlainToDrbd(self, feedback_fn):
8518     """Converts an instance from plain to drbd.
8519
8520     """
8521     feedback_fn("Converting template to drbd")
8522     instance = self.instance
8523     pnode = instance.primary_node
8524     snode = self.op.remote_node
8525
8526     # create a fake disk info for _GenerateDiskTemplate
8527     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8528     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8529                                       instance.name, pnode, [snode],
8530                                       disk_info, None, None, 0)
8531     info = _GetInstanceInfoText(instance)
8532     feedback_fn("Creating aditional volumes...")
8533     # first, create the missing data and meta devices
8534     for disk in new_disks:
8535       # unfortunately this is... not too nice
8536       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8537                             info, True)
8538       for child in disk.children:
8539         _CreateSingleBlockDev(self, snode, instance, child, info, True)
8540     # at this stage, all new LVs have been created, we can rename the
8541     # old ones
8542     feedback_fn("Renaming original volumes...")
8543     rename_list = [(o, n.children[0].logical_id)
8544                    for (o, n) in zip(instance.disks, new_disks)]
8545     result = self.rpc.call_blockdev_rename(pnode, rename_list)
8546     result.Raise("Failed to rename original LVs")
8547
8548     feedback_fn("Initializing DRBD devices...")
8549     # all child devices are in place, we can now create the DRBD devices
8550     for disk in new_disks:
8551       for node in [pnode, snode]:
8552         f_create = node == pnode
8553         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8554
8555     # at this point, the instance has been modified
8556     instance.disk_template = constants.DT_DRBD8
8557     instance.disks = new_disks
8558     self.cfg.Update(instance, feedback_fn)
8559
8560     # disks are created, waiting for sync
8561     disk_abort = not _WaitForSync(self, instance)
8562     if disk_abort:
8563       raise errors.OpExecError("There are some degraded disks for"
8564                                " this instance, please cleanup manually")
8565
8566   def _ConvertDrbdToPlain(self, feedback_fn):
8567     """Converts an instance from drbd to plain.
8568
8569     """
8570     instance = self.instance
8571     assert len(instance.secondary_nodes) == 1
8572     pnode = instance.primary_node
8573     snode = instance.secondary_nodes[0]
8574     feedback_fn("Converting template to plain")
8575
8576     old_disks = instance.disks
8577     new_disks = [d.children[0] for d in old_disks]
8578
8579     # copy over size and mode
8580     for parent, child in zip(old_disks, new_disks):
8581       child.size = parent.size
8582       child.mode = parent.mode
8583
8584     # update instance structure
8585     instance.disks = new_disks
8586     instance.disk_template = constants.DT_PLAIN
8587     self.cfg.Update(instance, feedback_fn)
8588
8589     feedback_fn("Removing volumes on the secondary node...")
8590     for disk in old_disks:
8591       self.cfg.SetDiskID(disk, snode)
8592       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8593       if msg:
8594         self.LogWarning("Could not remove block device %s on node %s,"
8595                         " continuing anyway: %s", disk.iv_name, snode, msg)
8596
8597     feedback_fn("Removing unneeded volumes on the primary node...")
8598     for idx, disk in enumerate(old_disks):
8599       meta = disk.children[1]
8600       self.cfg.SetDiskID(meta, pnode)
8601       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8602       if msg:
8603         self.LogWarning("Could not remove metadata for disk %d on node %s,"
8604                         " continuing anyway: %s", idx, pnode, msg)
8605
8606
8607   def Exec(self, feedback_fn):
8608     """Modifies an instance.
8609
8610     All parameters take effect only at the next restart of the instance.
8611
8612     """
8613     # Process here the warnings from CheckPrereq, as we don't have a
8614     # feedback_fn there.
8615     for warn in self.warn:
8616       feedback_fn("WARNING: %s" % warn)
8617
8618     result = []
8619     instance = self.instance
8620     # disk changes
8621     for disk_op, disk_dict in self.op.disks:
8622       if disk_op == constants.DDM_REMOVE:
8623         # remove the last disk
8624         device = instance.disks.pop()
8625         device_idx = len(instance.disks)
8626         for node, disk in device.ComputeNodeTree(instance.primary_node):
8627           self.cfg.SetDiskID(disk, node)
8628           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8629           if msg:
8630             self.LogWarning("Could not remove disk/%d on node %s: %s,"
8631                             " continuing anyway", device_idx, node, msg)
8632         result.append(("disk/%d" % device_idx, "remove"))
8633       elif disk_op == constants.DDM_ADD:
8634         # add a new disk
8635         if instance.disk_template == constants.DT_FILE:
8636           file_driver, file_path = instance.disks[0].logical_id
8637           file_path = os.path.dirname(file_path)
8638         else:
8639           file_driver = file_path = None
8640         disk_idx_base = len(instance.disks)
8641         new_disk = _GenerateDiskTemplate(self,
8642                                          instance.disk_template,
8643                                          instance.name, instance.primary_node,
8644                                          instance.secondary_nodes,
8645                                          [disk_dict],
8646                                          file_path,
8647                                          file_driver,
8648                                          disk_idx_base)[0]
8649         instance.disks.append(new_disk)
8650         info = _GetInstanceInfoText(instance)
8651
8652         logging.info("Creating volume %s for instance %s",
8653                      new_disk.iv_name, instance.name)
8654         # Note: this needs to be kept in sync with _CreateDisks
8655         #HARDCODE
8656         for node in instance.all_nodes:
8657           f_create = node == instance.primary_node
8658           try:
8659             _CreateBlockDev(self, node, instance, new_disk,
8660                             f_create, info, f_create)
8661           except errors.OpExecError, err:
8662             self.LogWarning("Failed to create volume %s (%s) on"
8663                             " node %s: %s",
8664                             new_disk.iv_name, new_disk, node, err)
8665         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8666                        (new_disk.size, new_disk.mode)))
8667       else:
8668         # change a given disk
8669         instance.disks[disk_op].mode = disk_dict['mode']
8670         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8671
8672     if self.op.disk_template:
8673       r_shut = _ShutdownInstanceDisks(self, instance)
8674       if not r_shut:
8675         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8676                                  " proceed with disk template conversion")
8677       mode = (instance.disk_template, self.op.disk_template)
8678       try:
8679         self._DISK_CONVERSIONS[mode](self, feedback_fn)
8680       except:
8681         self.cfg.ReleaseDRBDMinors(instance.name)
8682         raise
8683       result.append(("disk_template", self.op.disk_template))
8684
8685     # NIC changes
8686     for nic_op, nic_dict in self.op.nics:
8687       if nic_op == constants.DDM_REMOVE:
8688         # remove the last nic
8689         del instance.nics[-1]
8690         result.append(("nic.%d" % len(instance.nics), "remove"))
8691       elif nic_op == constants.DDM_ADD:
8692         # mac and bridge should be set, by now
8693         mac = nic_dict['mac']
8694         ip = nic_dict.get('ip', None)
8695         nicparams = self.nic_pinst[constants.DDM_ADD]
8696         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8697         instance.nics.append(new_nic)
8698         result.append(("nic.%d" % (len(instance.nics) - 1),
8699                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
8700                        (new_nic.mac, new_nic.ip,
8701                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8702                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8703                        )))
8704       else:
8705         for key in 'mac', 'ip':
8706           if key in nic_dict:
8707             setattr(instance.nics[nic_op], key, nic_dict[key])
8708         if nic_op in self.nic_pinst:
8709           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8710         for key, val in nic_dict.iteritems():
8711           result.append(("nic.%s/%d" % (key, nic_op), val))
8712
8713     # hvparams changes
8714     if self.op.hvparams:
8715       instance.hvparams = self.hv_inst
8716       for key, val in self.op.hvparams.iteritems():
8717         result.append(("hv/%s" % key, val))
8718
8719     # beparams changes
8720     if self.op.beparams:
8721       instance.beparams = self.be_inst
8722       for key, val in self.op.beparams.iteritems():
8723         result.append(("be/%s" % key, val))
8724
8725     # OS change
8726     if self.op.os_name:
8727       instance.os = self.op.os_name
8728
8729     self.cfg.Update(instance, feedback_fn)
8730
8731     return result
8732
8733   _DISK_CONVERSIONS = {
8734     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8735     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8736     }
8737
8738 class LUQueryExports(NoHooksLU):
8739   """Query the exports list
8740
8741   """
8742   _OP_REQP = ['nodes']
8743   REQ_BGL = False
8744
8745   def ExpandNames(self):
8746     self.needed_locks = {}
8747     self.share_locks[locking.LEVEL_NODE] = 1
8748     if not self.op.nodes:
8749       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8750     else:
8751       self.needed_locks[locking.LEVEL_NODE] = \
8752         _GetWantedNodes(self, self.op.nodes)
8753
8754   def CheckPrereq(self):
8755     """Check prerequisites.
8756
8757     """
8758     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8759
8760   def Exec(self, feedback_fn):
8761     """Compute the list of all the exported system images.
8762
8763     @rtype: dict
8764     @return: a dictionary with the structure node->(export-list)
8765         where export-list is a list of the instances exported on
8766         that node.
8767
8768     """
8769     rpcresult = self.rpc.call_export_list(self.nodes)
8770     result = {}
8771     for node in rpcresult:
8772       if rpcresult[node].fail_msg:
8773         result[node] = False
8774       else:
8775         result[node] = rpcresult[node].payload
8776
8777     return result
8778
8779
8780 class LUExportInstance(LogicalUnit):
8781   """Export an instance to an image in the cluster.
8782
8783   """
8784   HPATH = "instance-export"
8785   HTYPE = constants.HTYPE_INSTANCE
8786   _OP_REQP = ["instance_name", "target_node", "shutdown"]
8787   REQ_BGL = False
8788
8789   def CheckArguments(self):
8790     """Check the arguments.
8791
8792     """
8793     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8794                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
8795
8796   def ExpandNames(self):
8797     self._ExpandAndLockInstance()
8798     # FIXME: lock only instance primary and destination node
8799     #
8800     # Sad but true, for now we have do lock all nodes, as we don't know where
8801     # the previous export might be, and and in this LU we search for it and
8802     # remove it from its current node. In the future we could fix this by:
8803     #  - making a tasklet to search (share-lock all), then create the new one,
8804     #    then one to remove, after
8805     #  - removing the removal operation altogether
8806     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8807
8808   def DeclareLocks(self, level):
8809     """Last minute lock declaration."""
8810     # All nodes are locked anyway, so nothing to do here.
8811
8812   def BuildHooksEnv(self):
8813     """Build hooks env.
8814
8815     This will run on the master, primary node and target node.
8816
8817     """
8818     env = {
8819       "EXPORT_NODE": self.op.target_node,
8820       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8821       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8822       }
8823     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8824     nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8825           self.op.target_node]
8826     return env, nl, nl
8827
8828   def CheckPrereq(self):
8829     """Check prerequisites.
8830
8831     This checks that the instance and node names are valid.
8832
8833     """
8834     instance_name = self.op.instance_name
8835     self.instance = self.cfg.GetInstanceInfo(instance_name)
8836     assert self.instance is not None, \
8837           "Cannot retrieve locked instance %s" % self.op.instance_name
8838     _CheckNodeOnline(self, self.instance.primary_node)
8839
8840     self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8841     self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8842     assert self.dst_node is not None
8843
8844     _CheckNodeOnline(self, self.dst_node.name)
8845     _CheckNodeNotDrained(self, self.dst_node.name)
8846
8847     # instance disk type verification
8848     for disk in self.instance.disks:
8849       if disk.dev_type == constants.LD_FILE:
8850         raise errors.OpPrereqError("Export not supported for instances with"
8851                                    " file-based disks", errors.ECODE_INVAL)
8852
8853   def Exec(self, feedback_fn):
8854     """Export an instance to an image in the cluster.
8855
8856     """
8857     instance = self.instance
8858     dst_node = self.dst_node
8859     src_node = instance.primary_node
8860
8861     if self.op.shutdown:
8862       # shutdown the instance, but not the disks
8863       feedback_fn("Shutting down instance %s" % instance.name)
8864       result = self.rpc.call_instance_shutdown(src_node, instance,
8865                                                self.shutdown_timeout)
8866       result.Raise("Could not shutdown instance %s on"
8867                    " node %s" % (instance.name, src_node))
8868
8869     vgname = self.cfg.GetVGName()
8870
8871     snap_disks = []
8872
8873     # set the disks ID correctly since call_instance_start needs the
8874     # correct drbd minor to create the symlinks
8875     for disk in instance.disks:
8876       self.cfg.SetDiskID(disk, src_node)
8877
8878     activate_disks = (not instance.admin_up)
8879
8880     if activate_disks:
8881       # Activate the instance disks if we'exporting a stopped instance
8882       feedback_fn("Activating disks for %s" % instance.name)
8883       _StartInstanceDisks(self, instance, None)
8884
8885     try:
8886       # per-disk results
8887       dresults = []
8888       try:
8889         for idx, disk in enumerate(instance.disks):
8890           feedback_fn("Creating a snapshot of disk/%s on node %s" %
8891                       (idx, src_node))
8892
8893           # result.payload will be a snapshot of an lvm leaf of the one we
8894           # passed
8895           result = self.rpc.call_blockdev_snapshot(src_node, disk)
8896           msg = result.fail_msg
8897           if msg:
8898             self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8899                             idx, src_node, msg)
8900             snap_disks.append(False)
8901           else:
8902             disk_id = (vgname, result.payload)
8903             new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8904                                    logical_id=disk_id, physical_id=disk_id,
8905                                    iv_name=disk.iv_name)
8906             snap_disks.append(new_dev)
8907
8908       finally:
8909         if self.op.shutdown and instance.admin_up:
8910           feedback_fn("Starting instance %s" % instance.name)
8911           result = self.rpc.call_instance_start(src_node, instance, None, None)
8912           msg = result.fail_msg
8913           if msg:
8914             _ShutdownInstanceDisks(self, instance)
8915             raise errors.OpExecError("Could not start instance: %s" % msg)
8916
8917       # TODO: check for size
8918
8919       cluster_name = self.cfg.GetClusterName()
8920       for idx, dev in enumerate(snap_disks):
8921         feedback_fn("Exporting snapshot %s from %s to %s" %
8922                     (idx, src_node, dst_node.name))
8923         if dev:
8924           # FIXME: pass debug from opcode to backend
8925           result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8926                                                  instance, cluster_name,
8927                                                  idx, self.op.debug_level)
8928           msg = result.fail_msg
8929           if msg:
8930             self.LogWarning("Could not export disk/%s from node %s to"
8931                             " node %s: %s", idx, src_node, dst_node.name, msg)
8932             dresults.append(False)
8933           else:
8934             dresults.append(True)
8935           msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8936           if msg:
8937             self.LogWarning("Could not remove snapshot for disk/%d from node"
8938                             " %s: %s", idx, src_node, msg)
8939         else:
8940           dresults.append(False)
8941
8942       feedback_fn("Finalizing export on %s" % dst_node.name)
8943       result = self.rpc.call_finalize_export(dst_node.name, instance,
8944                                              snap_disks)
8945       fin_resu = True
8946       msg = result.fail_msg
8947       if msg:
8948         self.LogWarning("Could not finalize export for instance %s"
8949                         " on node %s: %s", instance.name, dst_node.name, msg)
8950         fin_resu = False
8951
8952     finally:
8953       if activate_disks:
8954         feedback_fn("Deactivating disks for %s" % instance.name)
8955         _ShutdownInstanceDisks(self, instance)
8956
8957     nodelist = self.cfg.GetNodeList()
8958     nodelist.remove(dst_node.name)
8959
8960     # on one-node clusters nodelist will be empty after the removal
8961     # if we proceed the backup would be removed because OpQueryExports
8962     # substitutes an empty list with the full cluster node list.
8963     iname = instance.name
8964     if nodelist:
8965       feedback_fn("Removing old exports for instance %s" % iname)
8966       exportlist = self.rpc.call_export_list(nodelist)
8967       for node in exportlist:
8968         if exportlist[node].fail_msg:
8969           continue
8970         if iname in exportlist[node].payload:
8971           msg = self.rpc.call_export_remove(node, iname).fail_msg
8972           if msg:
8973             self.LogWarning("Could not remove older export for instance %s"
8974                             " on node %s: %s", iname, node, msg)
8975     return fin_resu, dresults
8976
8977
8978 class LURemoveExport(NoHooksLU):
8979   """Remove exports related to the named instance.
8980
8981   """
8982   _OP_REQP = ["instance_name"]
8983   REQ_BGL = False
8984
8985   def ExpandNames(self):
8986     self.needed_locks = {}
8987     # We need all nodes to be locked in order for RemoveExport to work, but we
8988     # don't need to lock the instance itself, as nothing will happen to it (and
8989     # we can remove exports also for a removed instance)
8990     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8991
8992   def CheckPrereq(self):
8993     """Check prerequisites.
8994     """
8995     pass
8996
8997   def Exec(self, feedback_fn):
8998     """Remove any export.
8999
9000     """
9001     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9002     # If the instance was not found we'll try with the name that was passed in.
9003     # This will only work if it was an FQDN, though.
9004     fqdn_warn = False
9005     if not instance_name:
9006       fqdn_warn = True
9007       instance_name = self.op.instance_name
9008
9009     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9010     exportlist = self.rpc.call_export_list(locked_nodes)
9011     found = False
9012     for node in exportlist:
9013       msg = exportlist[node].fail_msg
9014       if msg:
9015         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9016         continue
9017       if instance_name in exportlist[node].payload:
9018         found = True
9019         result = self.rpc.call_export_remove(node, instance_name)
9020         msg = result.fail_msg
9021         if msg:
9022           logging.error("Could not remove export for instance %s"
9023                         " on node %s: %s", instance_name, node, msg)
9024
9025     if fqdn_warn and not found:
9026       feedback_fn("Export not found. If trying to remove an export belonging"
9027                   " to a deleted instance please use its Fully Qualified"
9028                   " Domain Name.")
9029
9030
9031 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9032   """Generic tags LU.
9033
9034   This is an abstract class which is the parent of all the other tags LUs.
9035
9036   """
9037
9038   def ExpandNames(self):
9039     self.needed_locks = {}
9040     if self.op.kind == constants.TAG_NODE:
9041       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9042       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9043     elif self.op.kind == constants.TAG_INSTANCE:
9044       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9045       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9046
9047   def CheckPrereq(self):
9048     """Check prerequisites.
9049
9050     """
9051     if self.op.kind == constants.TAG_CLUSTER:
9052       self.target = self.cfg.GetClusterInfo()
9053     elif self.op.kind == constants.TAG_NODE:
9054       self.target = self.cfg.GetNodeInfo(self.op.name)
9055     elif self.op.kind == constants.TAG_INSTANCE:
9056       self.target = self.cfg.GetInstanceInfo(self.op.name)
9057     else:
9058       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9059                                  str(self.op.kind), errors.ECODE_INVAL)
9060
9061
9062 class LUGetTags(TagsLU):
9063   """Returns the tags of a given object.
9064
9065   """
9066   _OP_REQP = ["kind", "name"]
9067   REQ_BGL = False
9068
9069   def Exec(self, feedback_fn):
9070     """Returns the tag list.
9071
9072     """
9073     return list(self.target.GetTags())
9074
9075
9076 class LUSearchTags(NoHooksLU):
9077   """Searches the tags for a given pattern.
9078
9079   """
9080   _OP_REQP = ["pattern"]
9081   REQ_BGL = False
9082
9083   def ExpandNames(self):
9084     self.needed_locks = {}
9085
9086   def CheckPrereq(self):
9087     """Check prerequisites.
9088
9089     This checks the pattern passed for validity by compiling it.
9090
9091     """
9092     try:
9093       self.re = re.compile(self.op.pattern)
9094     except re.error, err:
9095       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9096                                  (self.op.pattern, err), errors.ECODE_INVAL)
9097
9098   def Exec(self, feedback_fn):
9099     """Returns the tag list.
9100
9101     """
9102     cfg = self.cfg
9103     tgts = [("/cluster", cfg.GetClusterInfo())]
9104     ilist = cfg.GetAllInstancesInfo().values()
9105     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9106     nlist = cfg.GetAllNodesInfo().values()
9107     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9108     results = []
9109     for path, target in tgts:
9110       for tag in target.GetTags():
9111         if self.re.search(tag):
9112           results.append((path, tag))
9113     return results
9114
9115
9116 class LUAddTags(TagsLU):
9117   """Sets a tag on a given object.
9118
9119   """
9120   _OP_REQP = ["kind", "name", "tags"]
9121   REQ_BGL = False
9122
9123   def CheckPrereq(self):
9124     """Check prerequisites.
9125
9126     This checks the type and length of the tag name and value.
9127
9128     """
9129     TagsLU.CheckPrereq(self)
9130     for tag in self.op.tags:
9131       objects.TaggableObject.ValidateTag(tag)
9132
9133   def Exec(self, feedback_fn):
9134     """Sets the tag.
9135
9136     """
9137     try:
9138       for tag in self.op.tags:
9139         self.target.AddTag(tag)
9140     except errors.TagError, err:
9141       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9142     self.cfg.Update(self.target, feedback_fn)
9143
9144
9145 class LUDelTags(TagsLU):
9146   """Delete a list of tags from a given object.
9147
9148   """
9149   _OP_REQP = ["kind", "name", "tags"]
9150   REQ_BGL = False
9151
9152   def CheckPrereq(self):
9153     """Check prerequisites.
9154
9155     This checks that we have the given tag.
9156
9157     """
9158     TagsLU.CheckPrereq(self)
9159     for tag in self.op.tags:
9160       objects.TaggableObject.ValidateTag(tag)
9161     del_tags = frozenset(self.op.tags)
9162     cur_tags = self.target.GetTags()
9163     if not del_tags <= cur_tags:
9164       diff_tags = del_tags - cur_tags
9165       diff_names = ["'%s'" % tag for tag in diff_tags]
9166       diff_names.sort()
9167       raise errors.OpPrereqError("Tag(s) %s not found" %
9168                                  (",".join(diff_names)), errors.ECODE_NOENT)
9169
9170   def Exec(self, feedback_fn):
9171     """Remove the tag from the object.
9172
9173     """
9174     for tag in self.op.tags:
9175       self.target.RemoveTag(tag)
9176     self.cfg.Update(self.target, feedback_fn)
9177
9178
9179 class LUTestDelay(NoHooksLU):
9180   """Sleep for a specified amount of time.
9181
9182   This LU sleeps on the master and/or nodes for a specified amount of
9183   time.
9184
9185   """
9186   _OP_REQP = ["duration", "on_master", "on_nodes"]
9187   REQ_BGL = False
9188
9189   def ExpandNames(self):
9190     """Expand names and set required locks.
9191
9192     This expands the node list, if any.
9193
9194     """
9195     self.needed_locks = {}
9196     if self.op.on_nodes:
9197       # _GetWantedNodes can be used here, but is not always appropriate to use
9198       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9199       # more information.
9200       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9201       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9202
9203   def CheckPrereq(self):
9204     """Check prerequisites.
9205
9206     """
9207
9208   def Exec(self, feedback_fn):
9209     """Do the actual sleep.
9210
9211     """
9212     if self.op.on_master:
9213       if not utils.TestDelay(self.op.duration):
9214         raise errors.OpExecError("Error during master delay test")
9215     if self.op.on_nodes:
9216       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9217       for node, node_result in result.items():
9218         node_result.Raise("Failure during rpc call to node %s" % node)
9219
9220
9221 class IAllocator(object):
9222   """IAllocator framework.
9223
9224   An IAllocator instance has three sets of attributes:
9225     - cfg that is needed to query the cluster
9226     - input data (all members of the _KEYS class attribute are required)
9227     - four buffer attributes (in|out_data|text), that represent the
9228       input (to the external script) in text and data structure format,
9229       and the output from it, again in two formats
9230     - the result variables from the script (success, info, nodes) for
9231       easy usage
9232
9233   """
9234   # pylint: disable-msg=R0902
9235   # lots of instance attributes
9236   _ALLO_KEYS = [
9237     "name", "mem_size", "disks", "disk_template",
9238     "os", "tags", "nics", "vcpus", "hypervisor",
9239     ]
9240   _RELO_KEYS = [
9241     "name", "relocate_from",
9242     ]
9243   _EVAC_KEYS = [
9244     "evac_nodes",
9245     ]
9246
9247   def __init__(self, cfg, rpc, mode, **kwargs):
9248     self.cfg = cfg
9249     self.rpc = rpc
9250     # init buffer variables
9251     self.in_text = self.out_text = self.in_data = self.out_data = None
9252     # init all input fields so that pylint is happy
9253     self.mode = mode
9254     self.mem_size = self.disks = self.disk_template = None
9255     self.os = self.tags = self.nics = self.vcpus = None
9256     self.hypervisor = None
9257     self.relocate_from = None
9258     self.name = None
9259     self.evac_nodes = None
9260     # computed fields
9261     self.required_nodes = None
9262     # init result fields
9263     self.success = self.info = self.result = None
9264     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9265       keyset = self._ALLO_KEYS
9266       fn = self._AddNewInstance
9267     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9268       keyset = self._RELO_KEYS
9269       fn = self._AddRelocateInstance
9270     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9271       keyset = self._EVAC_KEYS
9272       fn = self._AddEvacuateNodes
9273     else:
9274       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9275                                    " IAllocator" % self.mode)
9276     for key in kwargs:
9277       if key not in keyset:
9278         raise errors.ProgrammerError("Invalid input parameter '%s' to"
9279                                      " IAllocator" % key)
9280       setattr(self, key, kwargs[key])
9281
9282     for key in keyset:
9283       if key not in kwargs:
9284         raise errors.ProgrammerError("Missing input parameter '%s' to"
9285                                      " IAllocator" % key)
9286     self._BuildInputData(fn)
9287
9288   def _ComputeClusterData(self):
9289     """Compute the generic allocator input data.
9290
9291     This is the data that is independent of the actual operation.
9292
9293     """
9294     cfg = self.cfg
9295     cluster_info = cfg.GetClusterInfo()
9296     # cluster data
9297     data = {
9298       "version": constants.IALLOCATOR_VERSION,
9299       "cluster_name": cfg.GetClusterName(),
9300       "cluster_tags": list(cluster_info.GetTags()),
9301       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9302       # we don't have job IDs
9303       }
9304     iinfo = cfg.GetAllInstancesInfo().values()
9305     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9306
9307     # node data
9308     node_results = {}
9309     node_list = cfg.GetNodeList()
9310
9311     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9312       hypervisor_name = self.hypervisor
9313     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9314       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9315     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9316       hypervisor_name = cluster_info.enabled_hypervisors[0]
9317
9318     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9319                                         hypervisor_name)
9320     node_iinfo = \
9321       self.rpc.call_all_instances_info(node_list,
9322                                        cluster_info.enabled_hypervisors)
9323     for nname, nresult in node_data.items():
9324       # first fill in static (config-based) values
9325       ninfo = cfg.GetNodeInfo(nname)
9326       pnr = {
9327         "tags": list(ninfo.GetTags()),
9328         "primary_ip": ninfo.primary_ip,
9329         "secondary_ip": ninfo.secondary_ip,
9330         "offline": ninfo.offline,
9331         "drained": ninfo.drained,
9332         "master_candidate": ninfo.master_candidate,
9333         }
9334
9335       if not (ninfo.offline or ninfo.drained):
9336         nresult.Raise("Can't get data for node %s" % nname)
9337         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9338                                 nname)
9339         remote_info = nresult.payload
9340
9341         for attr in ['memory_total', 'memory_free', 'memory_dom0',
9342                      'vg_size', 'vg_free', 'cpu_total']:
9343           if attr not in remote_info:
9344             raise errors.OpExecError("Node '%s' didn't return attribute"
9345                                      " '%s'" % (nname, attr))
9346           if not isinstance(remote_info[attr], int):
9347             raise errors.OpExecError("Node '%s' returned invalid value"
9348                                      " for '%s': %s" %
9349                                      (nname, attr, remote_info[attr]))
9350         # compute memory used by primary instances
9351         i_p_mem = i_p_up_mem = 0
9352         for iinfo, beinfo in i_list:
9353           if iinfo.primary_node == nname:
9354             i_p_mem += beinfo[constants.BE_MEMORY]
9355             if iinfo.name not in node_iinfo[nname].payload:
9356               i_used_mem = 0
9357             else:
9358               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9359             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9360             remote_info['memory_free'] -= max(0, i_mem_diff)
9361
9362             if iinfo.admin_up:
9363               i_p_up_mem += beinfo[constants.BE_MEMORY]
9364
9365         # compute memory used by instances
9366         pnr_dyn = {
9367           "total_memory": remote_info['memory_total'],
9368           "reserved_memory": remote_info['memory_dom0'],
9369           "free_memory": remote_info['memory_free'],
9370           "total_disk": remote_info['vg_size'],
9371           "free_disk": remote_info['vg_free'],
9372           "total_cpus": remote_info['cpu_total'],
9373           "i_pri_memory": i_p_mem,
9374           "i_pri_up_memory": i_p_up_mem,
9375           }
9376         pnr.update(pnr_dyn)
9377
9378       node_results[nname] = pnr
9379     data["nodes"] = node_results
9380
9381     # instance data
9382     instance_data = {}
9383     for iinfo, beinfo in i_list:
9384       nic_data = []
9385       for nic in iinfo.nics:
9386         filled_params = objects.FillDict(
9387             cluster_info.nicparams[constants.PP_DEFAULT],
9388             nic.nicparams)
9389         nic_dict = {"mac": nic.mac,
9390                     "ip": nic.ip,
9391                     "mode": filled_params[constants.NIC_MODE],
9392                     "link": filled_params[constants.NIC_LINK],
9393                    }
9394         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9395           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9396         nic_data.append(nic_dict)
9397       pir = {
9398         "tags": list(iinfo.GetTags()),
9399         "admin_up": iinfo.admin_up,
9400         "vcpus": beinfo[constants.BE_VCPUS],
9401         "memory": beinfo[constants.BE_MEMORY],
9402         "os": iinfo.os,
9403         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9404         "nics": nic_data,
9405         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9406         "disk_template": iinfo.disk_template,
9407         "hypervisor": iinfo.hypervisor,
9408         }
9409       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9410                                                  pir["disks"])
9411       instance_data[iinfo.name] = pir
9412
9413     data["instances"] = instance_data
9414
9415     self.in_data = data
9416
9417   def _AddNewInstance(self):
9418     """Add new instance data to allocator structure.
9419
9420     This in combination with _AllocatorGetClusterData will create the
9421     correct structure needed as input for the allocator.
9422
9423     The checks for the completeness of the opcode must have already been
9424     done.
9425
9426     """
9427     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9428
9429     if self.disk_template in constants.DTS_NET_MIRROR:
9430       self.required_nodes = 2
9431     else:
9432       self.required_nodes = 1
9433     request = {
9434       "name": self.name,
9435       "disk_template": self.disk_template,
9436       "tags": self.tags,
9437       "os": self.os,
9438       "vcpus": self.vcpus,
9439       "memory": self.mem_size,
9440       "disks": self.disks,
9441       "disk_space_total": disk_space,
9442       "nics": self.nics,
9443       "required_nodes": self.required_nodes,
9444       }
9445     return request
9446
9447   def _AddRelocateInstance(self):
9448     """Add relocate instance data to allocator structure.
9449
9450     This in combination with _IAllocatorGetClusterData will create the
9451     correct structure needed as input for the allocator.
9452
9453     The checks for the completeness of the opcode must have already been
9454     done.
9455
9456     """
9457     instance = self.cfg.GetInstanceInfo(self.name)
9458     if instance is None:
9459       raise errors.ProgrammerError("Unknown instance '%s' passed to"
9460                                    " IAllocator" % self.name)
9461
9462     if instance.disk_template not in constants.DTS_NET_MIRROR:
9463       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9464                                  errors.ECODE_INVAL)
9465
9466     if len(instance.secondary_nodes) != 1:
9467       raise errors.OpPrereqError("Instance has not exactly one secondary node",
9468                                  errors.ECODE_STATE)
9469
9470     self.required_nodes = 1
9471     disk_sizes = [{'size': disk.size} for disk in instance.disks]
9472     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9473
9474     request = {
9475       "name": self.name,
9476       "disk_space_total": disk_space,
9477       "required_nodes": self.required_nodes,
9478       "relocate_from": self.relocate_from,
9479       }
9480     return request
9481
9482   def _AddEvacuateNodes(self):
9483     """Add evacuate nodes data to allocator structure.
9484
9485     """
9486     request = {
9487       "evac_nodes": self.evac_nodes
9488       }
9489     return request
9490
9491   def _BuildInputData(self, fn):
9492     """Build input data structures.
9493
9494     """
9495     self._ComputeClusterData()
9496
9497     request = fn()
9498     request["type"] = self.mode
9499     self.in_data["request"] = request
9500
9501     self.in_text = serializer.Dump(self.in_data)
9502
9503   def Run(self, name, validate=True, call_fn=None):
9504     """Run an instance allocator and return the results.
9505
9506     """
9507     if call_fn is None:
9508       call_fn = self.rpc.call_iallocator_runner
9509
9510     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9511     result.Raise("Failure while running the iallocator script")
9512
9513     self.out_text = result.payload
9514     if validate:
9515       self._ValidateResult()
9516
9517   def _ValidateResult(self):
9518     """Process the allocator results.
9519
9520     This will process and if successful save the result in
9521     self.out_data and the other parameters.
9522
9523     """
9524     try:
9525       rdict = serializer.Load(self.out_text)
9526     except Exception, err:
9527       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9528
9529     if not isinstance(rdict, dict):
9530       raise errors.OpExecError("Can't parse iallocator results: not a dict")
9531
9532     # TODO: remove backwards compatiblity in later versions
9533     if "nodes" in rdict and "result" not in rdict:
9534       rdict["result"] = rdict["nodes"]
9535       del rdict["nodes"]
9536
9537     for key in "success", "info", "result":
9538       if key not in rdict:
9539         raise errors.OpExecError("Can't parse iallocator results:"
9540                                  " missing key '%s'" % key)
9541       setattr(self, key, rdict[key])
9542
9543     if not isinstance(rdict["result"], list):
9544       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9545                                " is not a list")
9546     self.out_data = rdict
9547
9548
9549 class LUTestAllocator(NoHooksLU):
9550   """Run allocator tests.
9551
9552   This LU runs the allocator tests
9553
9554   """
9555   _OP_REQP = ["direction", "mode", "name"]
9556
9557   def CheckPrereq(self):
9558     """Check prerequisites.
9559
9560     This checks the opcode parameters depending on the director and mode test.
9561
9562     """
9563     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9564       for attr in ["name", "mem_size", "disks", "disk_template",
9565                    "os", "tags", "nics", "vcpus"]:
9566         if not hasattr(self.op, attr):
9567           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9568                                      attr, errors.ECODE_INVAL)
9569       iname = self.cfg.ExpandInstanceName(self.op.name)
9570       if iname is not None:
9571         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9572                                    iname, errors.ECODE_EXISTS)
9573       if not isinstance(self.op.nics, list):
9574         raise errors.OpPrereqError("Invalid parameter 'nics'",
9575                                    errors.ECODE_INVAL)
9576       for row in self.op.nics:
9577         if (not isinstance(row, dict) or
9578             "mac" not in row or
9579             "ip" not in row or
9580             "bridge" not in row):
9581           raise errors.OpPrereqError("Invalid contents of the 'nics'"
9582                                      " parameter", errors.ECODE_INVAL)
9583       if not isinstance(self.op.disks, list):
9584         raise errors.OpPrereqError("Invalid parameter 'disks'",
9585                                    errors.ECODE_INVAL)
9586       for row in self.op.disks:
9587         if (not isinstance(row, dict) or
9588             "size" not in row or
9589             not isinstance(row["size"], int) or
9590             "mode" not in row or
9591             row["mode"] not in ['r', 'w']):
9592           raise errors.OpPrereqError("Invalid contents of the 'disks'"
9593                                      " parameter", errors.ECODE_INVAL)
9594       if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9595         self.op.hypervisor = self.cfg.GetHypervisorType()
9596     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9597       if not hasattr(self.op, "name"):
9598         raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9599                                    errors.ECODE_INVAL)
9600       fname = _ExpandInstanceName(self.cfg, self.op.name)
9601       self.op.name = fname
9602       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9603     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9604       if not hasattr(self.op, "evac_nodes"):
9605         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9606                                    " opcode input", errors.ECODE_INVAL)
9607     else:
9608       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9609                                  self.op.mode, errors.ECODE_INVAL)
9610
9611     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9612       if not hasattr(self.op, "allocator") or self.op.allocator is None:
9613         raise errors.OpPrereqError("Missing allocator name",
9614                                    errors.ECODE_INVAL)
9615     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9616       raise errors.OpPrereqError("Wrong allocator test '%s'" %
9617                                  self.op.direction, errors.ECODE_INVAL)
9618
9619   def Exec(self, feedback_fn):
9620     """Run the allocator test.
9621
9622     """
9623     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9624       ial = IAllocator(self.cfg, self.rpc,
9625                        mode=self.op.mode,
9626                        name=self.op.name,
9627                        mem_size=self.op.mem_size,
9628                        disks=self.op.disks,
9629                        disk_template=self.op.disk_template,
9630                        os=self.op.os,
9631                        tags=self.op.tags,
9632                        nics=self.op.nics,
9633                        vcpus=self.op.vcpus,
9634                        hypervisor=self.op.hypervisor,
9635                        )
9636     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9637       ial = IAllocator(self.cfg, self.rpc,
9638                        mode=self.op.mode,
9639                        name=self.op.name,
9640                        relocate_from=list(self.relocate_from),
9641                        )
9642     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9643       ial = IAllocator(self.cfg, self.rpc,
9644                        mode=self.op.mode,
9645                        evac_nodes=self.op.evac_nodes)
9646     else:
9647       raise errors.ProgrammerError("Uncatched mode %s in"
9648                                    " LUTestAllocator.Exec", self.op.mode)
9649
9650     if self.op.direction == constants.IALLOCATOR_DIR_IN:
9651       result = ial.in_text
9652     else:
9653       ial.Run(self.op.allocator, validate=False)
9654       result = ial.out_text
9655     return result