code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 import os
  30 import os.path
  31 import time
  32 import re
  33 import platform
  34 import logging
  35 import copy
  36
  37 from ganeti import ssh
  38 from ganeti import utils
  39 from ganeti import errors
  40 from ganeti import hypervisor
  41 from ganeti import locking
  42 from ganeti import constants
  43 from ganeti import objects
  44 from ganeti import serializer
  45 from ganeti import ssconf
  46
  47
  48 class LogicalUnit(object):
  49   """Logical Unit base class.
  50
  51   Subclasses must follow these rules:
  52     - implement ExpandNames
  53     - implement CheckPrereq (except when tasklets are used)
  54     - implement Exec (except when tasklets are used)
  55     - implement BuildHooksEnv
  56     - redefine HPATH and HTYPE
  57     - optionally redefine their run requirements:
  58         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  59
  60   Note that all commands require root permissions.
  61
  62   @ivar dry_run_result: the value (if any) that will be returned to the caller
  63       in dry-run mode (signalled by opcode dry_run parameter)
  64
  65   """
  66   HPATH = None
  67   HTYPE = None
  68   _OP_REQP = []
  69   REQ_BGL = True
  70
  71   def __init__(self, processor, op, context, rpc):
  72     """Constructor for LogicalUnit.
  73
  74     This needs to be overridden in derived classes in order to check op
  75     validity.
  76
  77     """
  78     self.proc = processor
  79     self.op = op
  80     self.cfg = context.cfg
  81     self.context = context
  82     self.rpc = rpc
  83     # Dicts used to declare locking needs to mcpu
  84     self.needed_locks = None
  85     self.acquired_locks = {}
  86     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
  87     self.add_locks = {}
  88     self.remove_locks = {}
  89     # Used to force good behavior when calling helper functions
  90     self.recalculate_locks = {}
  91     self.__ssh = None
  92     # logging
  93     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
  94     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
  95     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
  96     # support for dry-run
  97     self.dry_run_result = None
  98     # support for generic debug attribute
  99     if (not hasattr(self.op, "debug_level") or
 100         not isinstance(self.op.debug_level, int)):
 101       self.op.debug_level = 0
 102
 103     # Tasklets
 104     self.tasklets = None
 105
 106     for attr_name in self._OP_REQP:
 107       attr_val = getattr(op, attr_name, None)
 108       if attr_val is None:
 109         raise errors.OpPrereqError("Required parameter '%s' missing" %
 110                                    attr_name, errors.ECODE_INVAL)
 111
 112     self.CheckArguments()
 113
 114   def __GetSSH(self):
 115     """Returns the SshRunner object
 116
 117     """
 118     if not self.__ssh:
 119       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 120     return self.__ssh
 121
 122   ssh = property(fget=__GetSSH)
 123
 124   def CheckArguments(self):
 125     """Check syntactic validity for the opcode arguments.
 126
 127     This method is for doing a simple syntactic check and ensure
 128     validity of opcode parameters, without any cluster-related
 129     checks. While the same can be accomplished in ExpandNames and/or
 130     CheckPrereq, doing these separate is better because:
 131
 132       - ExpandNames is left as as purely a lock-related function
 133       - CheckPrereq is run after we have acquired locks (and possible
 134         waited for them)
 135
 136     The function is allowed to change the self.op attribute so that
 137     later methods can no longer worry about missing parameters.
 138
 139     """
 140     pass
 141
 142   def ExpandNames(self):
 143     """Expand names for this LU.
 144
 145     This method is called before starting to execute the opcode, and it should
 146     update all the parameters of the opcode to their canonical form (e.g. a
 147     short node name must be fully expanded after this method has successfully
 148     completed). This way locking, hooks, logging, ecc. can work correctly.
 149
 150     LUs which implement this method must also populate the self.needed_locks
 151     member, as a dict with lock levels as keys, and a list of needed lock names
 152     as values. Rules:
 153
 154       - use an empty dict if you don't need any lock
 155       - if you don't need any lock at a particular level omit that level
 156       - don't put anything for the BGL level
 157       - if you want all locks at a level use locking.ALL_SET as a value
 158
 159     If you need to share locks (rather than acquire them exclusively) at one
 160     level you can modify self.share_locks, setting a true value (usually 1) for
 161     that level. By default locks are not shared.
 162
 163     This function can also define a list of tasklets, which then will be
 164     executed in order instead of the usual LU-level CheckPrereq and Exec
 165     functions, if those are not defined by the LU.
 166
 167     Examples::
 168
 169       # Acquire all nodes and one instance
 170       self.needed_locks = {
 171         locking.LEVEL_NODE: locking.ALL_SET,
 172         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 173       }
 174       # Acquire just two nodes
 175       self.needed_locks = {
 176         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 177       }
 178       # Acquire no locks
 179       self.needed_locks = {} # No, you can't leave it to the default value None
 180
 181     """
 182     # The implementation of this method is mandatory only if the new LU is
 183     # concurrent, so that old LUs don't need to be changed all at the same
 184     # time.
 185     if self.REQ_BGL:
 186       self.needed_locks = {} # Exclusive LUs don't need locks.
 187     else:
 188       raise NotImplementedError
 189
 190   def DeclareLocks(self, level):
 191     """Declare LU locking needs for a level
 192
 193     While most LUs can just declare their locking needs at ExpandNames time,
 194     sometimes there's the need to calculate some locks after having acquired
 195     the ones before. This function is called just before acquiring locks at a
 196     particular level, but after acquiring the ones at lower levels, and permits
 197     such calculations. It can be used to modify self.needed_locks, and by
 198     default it does nothing.
 199
 200     This function is only called if you have something already set in
 201     self.needed_locks for the level.
 202
 203     @param level: Locking level which is going to be locked
 204     @type level: member of ganeti.locking.LEVELS
 205
 206     """
 207
 208   def CheckPrereq(self):
 209     """Check prerequisites for this LU.
 210
 211     This method should check that the prerequisites for the execution
 212     of this LU are fulfilled. It can do internode communication, but
 213     it should be idempotent - no cluster or system changes are
 214     allowed.
 215
 216     The method should raise errors.OpPrereqError in case something is
 217     not fulfilled. Its return value is ignored.
 218
 219     This method should also update all the parameters of the opcode to
 220     their canonical form if it hasn't been done by ExpandNames before.
 221
 222     """
 223     if self.tasklets is not None:
 224       for (idx, tl) in enumerate(self.tasklets):
 225         logging.debug("Checking prerequisites for tasklet %s/%s",
 226                       idx + 1, len(self.tasklets))
 227         tl.CheckPrereq()
 228     else:
 229       raise NotImplementedError
 230
 231   def Exec(self, feedback_fn):
 232     """Execute the LU.
 233
 234     This method should implement the actual work. It should raise
 235     errors.OpExecError for failures that are somewhat dealt with in
 236     code, or expected.
 237
 238     """
 239     if self.tasklets is not None:
 240       for (idx, tl) in enumerate(self.tasklets):
 241         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 242         tl.Exec(feedback_fn)
 243     else:
 244       raise NotImplementedError
 245
 246   def BuildHooksEnv(self):
 247     """Build hooks environment for this LU.
 248
 249     This method should return a three-node tuple consisting of: a dict
 250     containing the environment that will be used for running the
 251     specific hook for this LU, a list of node names on which the hook
 252     should run before the execution, and a list of node names on which
 253     the hook should run after the execution.
 254
 255     The keys of the dict must not have 'GANETI_' prefixed as this will
 256     be handled in the hooks runner. Also note additional keys will be
 257     added by the hooks runner. If the LU doesn't define any
 258     environment, an empty dict (and not None) should be returned.
 259
 260     No nodes should be returned as an empty list (and not None).
 261
 262     Note that if the HPATH for a LU class is None, this function will
 263     not be called.
 264
 265     """
 266     raise NotImplementedError
 267
 268   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 269     """Notify the LU about the results of its hooks.
 270
 271     This method is called every time a hooks phase is executed, and notifies
 272     the Logical Unit about the hooks' result. The LU can then use it to alter
 273     its result based on the hooks.  By default the method does nothing and the
 274     previous result is passed back unchanged but any LU can define it if it
 275     wants to use the local cluster hook-scripts somehow.
 276
 277     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 278         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 279     @param hook_results: the results of the multi-node hooks rpc call
 280     @param feedback_fn: function used send feedback back to the caller
 281     @param lu_result: the previous Exec result this LU had, or None
 282         in the PRE phase
 283     @return: the new Exec result, based on the previous result
 284         and hook results
 285
 286     """
 287     # API must be kept, thus we ignore the unused argument and could
 288     # be a function warnings
 289     # pylint: disable-msg=W0613,R0201
 290     return lu_result
 291
 292   def _ExpandAndLockInstance(self):
 293     """Helper function to expand and lock an instance.
 294
 295     Many LUs that work on an instance take its name in self.op.instance_name
 296     and need to expand it and then declare the expanded name for locking. This
 297     function does it, and then updates self.op.instance_name to the expanded
 298     name. It also initializes needed_locks as a dict, if this hasn't been done
 299     before.
 300
 301     """
 302     if self.needed_locks is None:
 303       self.needed_locks = {}
 304     else:
 305       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 306         "_ExpandAndLockInstance called with instance-level locks set"
 307     expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
 308     if expanded_name is None:
 309       raise errors.OpPrereqError("Instance '%s' not known" %
 310                                  self.op.instance_name, errors.ECODE_NOENT)
 311     self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
 312     self.op.instance_name = expanded_name
 313
 314   def _LockInstancesNodes(self, primary_only=False):
 315     """Helper function to declare instances' nodes for locking.
 316
 317     This function should be called after locking one or more instances to lock
 318     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 319     with all primary or secondary nodes for instances already locked and
 320     present in self.needed_locks[locking.LEVEL_INSTANCE].
 321
 322     It should be called from DeclareLocks, and for safety only works if
 323     self.recalculate_locks[locking.LEVEL_NODE] is set.
 324
 325     In the future it may grow parameters to just lock some instance's nodes, or
 326     to just lock primaries or secondary nodes, if needed.
 327
 328     If should be called in DeclareLocks in a way similar to::
 329
 330       if level == locking.LEVEL_NODE:
 331         self._LockInstancesNodes()
 332
 333     @type primary_only: boolean
 334     @param primary_only: only lock primary nodes of locked instances
 335
 336     """
 337     assert locking.LEVEL_NODE in self.recalculate_locks, \
 338       "_LockInstancesNodes helper function called with no nodes to recalculate"
 339
 340     # TODO: check if we're really been called with the instance locks held
 341
 342     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 343     # future we might want to have different behaviors depending on the value
 344     # of self.recalculate_locks[locking.LEVEL_NODE]
 345     wanted_nodes = []
 346     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 347       instance = self.context.cfg.GetInstanceInfo(instance_name)
 348       wanted_nodes.append(instance.primary_node)
 349       if not primary_only:
 350         wanted_nodes.extend(instance.secondary_nodes)
 351
 352     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 353       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 354     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 355       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 356
 357     del self.recalculate_locks[locking.LEVEL_NODE]
 358
 359
 360 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 361   """Simple LU which runs no hooks.
 362
 363   This LU is intended as a parent for other LogicalUnits which will
 364   run no hooks, in order to reduce duplicate code.
 365
 366   """
 367   HPATH = None
 368   HTYPE = None
 369
 370   def BuildHooksEnv(self):
 371     """Empty BuildHooksEnv for NoHooksLu.
 372
 373     This just raises an error.
 374
 375     """
 376     assert False, "BuildHooksEnv called for NoHooksLUs"
 377
 378
 379 class Tasklet:
 380   """Tasklet base class.
 381
 382   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 383   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 384   tasklets know nothing about locks.
 385
 386   Subclasses must follow these rules:
 387     - Implement CheckPrereq
 388     - Implement Exec
 389
 390   """
 391   def __init__(self, lu):
 392     self.lu = lu
 393
 394     # Shortcuts
 395     self.cfg = lu.cfg
 396     self.rpc = lu.rpc
 397
 398   def CheckPrereq(self):
 399     """Check prerequisites for this tasklets.
 400
 401     This method should check whether the prerequisites for the execution of
 402     this tasklet are fulfilled. It can do internode communication, but it
 403     should be idempotent - no cluster or system changes are allowed.
 404
 405     The method should raise errors.OpPrereqError in case something is not
 406     fulfilled. Its return value is ignored.
 407
 408     This method should also update all parameters to their canonical form if it
 409     hasn't been done before.
 410
 411     """
 412     raise NotImplementedError
 413
 414   def Exec(self, feedback_fn):
 415     """Execute the tasklet.
 416
 417     This method should implement the actual work. It should raise
 418     errors.OpExecError for failures that are somewhat dealt with in code, or
 419     expected.
 420
 421     """
 422     raise NotImplementedError
 423
 424
 425 def _GetWantedNodes(lu, nodes):
 426   """Returns list of checked and expanded node names.
 427
 428   @type lu: L{LogicalUnit}
 429   @param lu: the logical unit on whose behalf we execute
 430   @type nodes: list
 431   @param nodes: list of node names or None for all nodes
 432   @rtype: list
 433   @return: the list of nodes, sorted
 434   @raise errors.OpProgrammerError: if the nodes parameter is wrong type
 435
 436   """
 437   if not isinstance(nodes, list):
 438     raise errors.OpPrereqError("Invalid argument type 'nodes'",
 439                                errors.ECODE_INVAL)
 440
 441   if not nodes:
 442     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 443       " non-empty list of nodes whose name is to be expanded.")
 444
 445   wanted = []
 446   for name in nodes:
 447     node = lu.cfg.ExpandNodeName(name)
 448     if node is None:
 449       raise errors.OpPrereqError("No such node name '%s'" % name,
 450                                  errors.ECODE_NOENT)
 451     wanted.append(node)
 452
 453   return utils.NiceSort(wanted)
 454
 455
 456 def _GetWantedInstances(lu, instances):
 457   """Returns list of checked and expanded instance names.
 458
 459   @type lu: L{LogicalUnit}
 460   @param lu: the logical unit on whose behalf we execute
 461   @type instances: list
 462   @param instances: list of instance names or None for all instances
 463   @rtype: list
 464   @return: the list of instances, sorted
 465   @raise errors.OpPrereqError: if the instances parameter is wrong type
 466   @raise errors.OpPrereqError: if any of the passed instances is not found
 467
 468   """
 469   if not isinstance(instances, list):
 470     raise errors.OpPrereqError("Invalid argument type 'instances'",
 471                                errors.ECODE_INVAL)
 472
 473   if instances:
 474     wanted = []
 475
 476     for name in instances:
 477       instance = lu.cfg.ExpandInstanceName(name)
 478       if instance is None:
 479         raise errors.OpPrereqError("No such instance name '%s'" % name,
 480                                    errors.ECODE_NOENT)
 481       wanted.append(instance)
 482
 483   else:
 484     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 485   return wanted
 486
 487
 488 def _CheckOutputFields(static, dynamic, selected):
 489   """Checks whether all selected fields are valid.
 490
 491   @type static: L{utils.FieldSet}
 492   @param static: static fields set
 493   @type dynamic: L{utils.FieldSet}
 494   @param dynamic: dynamic fields set
 495
 496   """
 497   f = utils.FieldSet()
 498   f.Extend(static)
 499   f.Extend(dynamic)
 500
 501   delta = f.NonMatching(selected)
 502   if delta:
 503     raise errors.OpPrereqError("Unknown output fields selected: %s"
 504                                % ",".join(delta), errors.ECODE_INVAL)
 505
 506
 507 def _CheckBooleanOpField(op, name):
 508   """Validates boolean opcode parameters.
 509
 510   This will ensure that an opcode parameter is either a boolean value,
 511   or None (but that it always exists).
 512
 513   """
 514   val = getattr(op, name, None)
 515   if not (val is None or isinstance(val, bool)):
 516     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 517                                (name, str(val)), errors.ECODE_INVAL)
 518   setattr(op, name, val)
 519
 520
 521 def _CheckGlobalHvParams(params):
 522   """Validates that given hypervisor params are not global ones.
 523
 524   This will ensure that instances don't get customised versions of
 525   global params.
 526
 527   """
 528   used_globals = constants.HVC_GLOBALS.intersection(params)
 529   if used_globals:
 530     msg = ("The following hypervisor parameters are global and cannot"
 531            " be customized at instance level, please modify them at"
 532            " cluster level: %s" % utils.CommaJoin(used_globals))
 533     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 534
 535
 536 def _CheckNodeOnline(lu, node):
 537   """Ensure that a given node is online.
 538
 539   @param lu: the LU on behalf of which we make the check
 540   @param node: the node to check
 541   @raise errors.OpPrereqError: if the node is offline
 542
 543   """
 544   if lu.cfg.GetNodeInfo(node).offline:
 545     raise errors.OpPrereqError("Can't use offline node %s" % node,
 546                                errors.ECODE_INVAL)
 547
 548
 549 def _CheckNodeNotDrained(lu, node):
 550   """Ensure that a given node is not drained.
 551
 552   @param lu: the LU on behalf of which we make the check
 553   @param node: the node to check
 554   @raise errors.OpPrereqError: if the node is drained
 555
 556   """
 557   if lu.cfg.GetNodeInfo(node).drained:
 558     raise errors.OpPrereqError("Can't use drained node %s" % node,
 559                                errors.ECODE_INVAL)
 560
 561
 562 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 563                           memory, vcpus, nics, disk_template, disks,
 564                           bep, hvp, hypervisor_name):
 565   """Builds instance related env variables for hooks
 566
 567   This builds the hook environment from individual variables.
 568
 569   @type name: string
 570   @param name: the name of the instance
 571   @type primary_node: string
 572   @param primary_node: the name of the instance's primary node
 573   @type secondary_nodes: list
 574   @param secondary_nodes: list of secondary nodes as strings
 575   @type os_type: string
 576   @param os_type: the name of the instance's OS
 577   @type status: boolean
 578   @param status: the should_run status of the instance
 579   @type memory: string
 580   @param memory: the memory size of the instance
 581   @type vcpus: string
 582   @param vcpus: the count of VCPUs the instance has
 583   @type nics: list
 584   @param nics: list of tuples (ip, mac, mode, link) representing
 585       the NICs the instance has
 586   @type disk_template: string
 587   @param disk_template: the disk template of the instance
 588   @type disks: list
 589   @param disks: the list of (size, mode) pairs
 590   @type bep: dict
 591   @param bep: the backend parameters for the instance
 592   @type hvp: dict
 593   @param hvp: the hypervisor parameters for the instance
 594   @type hypervisor_name: string
 595   @param hypervisor_name: the hypervisor for the instance
 596   @rtype: dict
 597   @return: the hook environment for this instance
 598
 599   """
 600   if status:
 601     str_status = "up"
 602   else:
 603     str_status = "down"
 604   env = {
 605     "OP_TARGET": name,
 606     "INSTANCE_NAME": name,
 607     "INSTANCE_PRIMARY": primary_node,
 608     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 609     "INSTANCE_OS_TYPE": os_type,
 610     "INSTANCE_STATUS": str_status,
 611     "INSTANCE_MEMORY": memory,
 612     "INSTANCE_VCPUS": vcpus,
 613     "INSTANCE_DISK_TEMPLATE": disk_template,
 614     "INSTANCE_HYPERVISOR": hypervisor_name,
 615   }
 616
 617   if nics:
 618     nic_count = len(nics)
 619     for idx, (ip, mac, mode, link) in enumerate(nics):
 620       if ip is None:
 621         ip = ""
 622       env["INSTANCE_NIC%d_IP" % idx] = ip
 623       env["INSTANCE_NIC%d_MAC" % idx] = mac
 624       env["INSTANCE_NIC%d_MODE" % idx] = mode
 625       env["INSTANCE_NIC%d_LINK" % idx] = link
 626       if mode == constants.NIC_MODE_BRIDGED:
 627         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 628   else:
 629     nic_count = 0
 630
 631   env["INSTANCE_NIC_COUNT"] = nic_count
 632
 633   if disks:
 634     disk_count = len(disks)
 635     for idx, (size, mode) in enumerate(disks):
 636       env["INSTANCE_DISK%d_SIZE" % idx] = size
 637       env["INSTANCE_DISK%d_MODE" % idx] = mode
 638   else:
 639     disk_count = 0
 640
 641   env["INSTANCE_DISK_COUNT"] = disk_count
 642
 643   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 644     for key, value in source.items():
 645       env["INSTANCE_%s_%s" % (kind, key)] = value
 646
 647   return env
 648
 649
 650 def _NICListToTuple(lu, nics):
 651   """Build a list of nic information tuples.
 652
 653   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 654   value in LUQueryInstanceData.
 655
 656   @type lu:  L{LogicalUnit}
 657   @param lu: the logical unit on whose behalf we execute
 658   @type nics: list of L{objects.NIC}
 659   @param nics: list of nics to convert to hooks tuples
 660
 661   """
 662   hooks_nics = []
 663   c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
 664   for nic in nics:
 665     ip = nic.ip
 666     mac = nic.mac
 667     filled_params = objects.FillDict(c_nicparams, nic.nicparams)
 668     mode = filled_params[constants.NIC_MODE]
 669     link = filled_params[constants.NIC_LINK]
 670     hooks_nics.append((ip, mac, mode, link))
 671   return hooks_nics
 672
 673
 674 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 675   """Builds instance related env variables for hooks from an object.
 676
 677   @type lu: L{LogicalUnit}
 678   @param lu: the logical unit on whose behalf we execute
 679   @type instance: L{objects.Instance}
 680   @param instance: the instance for which we should build the
 681       environment
 682   @type override: dict
 683   @param override: dictionary with key/values that will override
 684       our values
 685   @rtype: dict
 686   @return: the hook environment dictionary
 687
 688   """
 689   cluster = lu.cfg.GetClusterInfo()
 690   bep = cluster.FillBE(instance)
 691   hvp = cluster.FillHV(instance)
 692   args = {
 693     'name': instance.name,
 694     'primary_node': instance.primary_node,
 695     'secondary_nodes': instance.secondary_nodes,
 696     'os_type': instance.os,
 697     'status': instance.admin_up,
 698     'memory': bep[constants.BE_MEMORY],
 699     'vcpus': bep[constants.BE_VCPUS],
 700     'nics': _NICListToTuple(lu, instance.nics),
 701     'disk_template': instance.disk_template,
 702     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 703     'bep': bep,
 704     'hvp': hvp,
 705     'hypervisor_name': instance.hypervisor,
 706   }
 707   if override:
 708     args.update(override)
 709   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 710
 711
 712 def _AdjustCandidatePool(lu, exceptions):
 713   """Adjust the candidate pool after node operations.
 714
 715   """
 716   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 717   if mod_list:
 718     lu.LogInfo("Promoted nodes to master candidate role: %s",
 719                utils.CommaJoin(node.name for node in mod_list))
 720     for name in mod_list:
 721       lu.context.ReaddNode(name)
 722   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 723   if mc_now > mc_max:
 724     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 725                (mc_now, mc_max))
 726
 727
 728 def _DecideSelfPromotion(lu, exceptions=None):
 729   """Decide whether I should promote myself as a master candidate.
 730
 731   """
 732   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 733   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 734   # the new node will increase mc_max with one, so:
 735   mc_should = min(mc_should + 1, cp_size)
 736   return mc_now < mc_should
 737
 738
 739 def _CheckNicsBridgesExist(lu, target_nics, target_node,
 740                                profile=constants.PP_DEFAULT):
 741   """Check that the brigdes needed by a list of nics exist.
 742
 743   """
 744   c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
 745   paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
 746                 for nic in target_nics]
 747   brlist = [params[constants.NIC_LINK] for params in paramslist
 748             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 749   if brlist:
 750     result = lu.rpc.call_bridges_exist(target_node, brlist)
 751     result.Raise("Error checking bridges on destination node '%s'" %
 752                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 753
 754
 755 def _CheckInstanceBridgesExist(lu, instance, node=None):
 756   """Check that the brigdes needed by an instance exist.
 757
 758   """
 759   if node is None:
 760     node = instance.primary_node
 761   _CheckNicsBridgesExist(lu, instance.nics, node)
 762
 763
 764 def _CheckOSVariant(os_obj, name):
 765   """Check whether an OS name conforms to the os variants specification.
 766
 767   @type os_obj: L{objects.OS}
 768   @param os_obj: OS object to check
 769   @type name: string
 770   @param name: OS name passed by the user, to check for validity
 771
 772   """
 773   if not os_obj.supported_variants:
 774     return
 775   try:
 776     variant = name.split("+", 1)[1]
 777   except IndexError:
 778     raise errors.OpPrereqError("OS name must include a variant",
 779                                errors.ECODE_INVAL)
 780
 781   if variant not in os_obj.supported_variants:
 782     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
 783
 784
 785 def _GetNodeInstancesInner(cfg, fn):
 786   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 787
 788
 789 def _GetNodeInstances(cfg, node_name):
 790   """Returns a list of all primary and secondary instances on a node.
 791
 792   """
 793
 794   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
 795
 796
 797 def _GetNodePrimaryInstances(cfg, node_name):
 798   """Returns primary instances on a node.
 799
 800   """
 801   return _GetNodeInstancesInner(cfg,
 802                                 lambda inst: node_name == inst.primary_node)
 803
 804
 805 def _GetNodeSecondaryInstances(cfg, node_name):
 806   """Returns secondary instances on a node.
 807
 808   """
 809   return _GetNodeInstancesInner(cfg,
 810                                 lambda inst: node_name in inst.secondary_nodes)
 811
 812
 813 def _GetStorageTypeArgs(cfg, storage_type):
 814   """Returns the arguments for a storage type.
 815
 816   """
 817   # Special case for file storage
 818   if storage_type == constants.ST_FILE:
 819     # storage.FileStorage wants a list of storage directories
 820     return [[cfg.GetFileStorageDir()]]
 821
 822   return []
 823
 824
 825 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
 826   faulty = []
 827
 828   for dev in instance.disks:
 829     cfg.SetDiskID(dev, node_name)
 830
 831   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
 832   result.Raise("Failed to get disk status from node %s" % node_name,
 833                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 834
 835   for idx, bdev_status in enumerate(result.payload):
 836     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
 837       faulty.append(idx)
 838
 839   return faulty
 840
 841
 842 class LUPostInitCluster(LogicalUnit):
 843   """Logical unit for running hooks after cluster initialization.
 844
 845   """
 846   HPATH = "cluster-init"
 847   HTYPE = constants.HTYPE_CLUSTER
 848   _OP_REQP = []
 849
 850   def BuildHooksEnv(self):
 851     """Build hooks env.
 852
 853     """
 854     env = {"OP_TARGET": self.cfg.GetClusterName()}
 855     mn = self.cfg.GetMasterNode()
 856     return env, [], [mn]
 857
 858   def CheckPrereq(self):
 859     """No prerequisites to check.
 860
 861     """
 862     return True
 863
 864   def Exec(self, feedback_fn):
 865     """Nothing to do.
 866
 867     """
 868     return True
 869
 870
 871 class LUDestroyCluster(LogicalUnit):
 872   """Logical unit for destroying the cluster.
 873
 874   """
 875   HPATH = "cluster-destroy"
 876   HTYPE = constants.HTYPE_CLUSTER
 877   _OP_REQP = []
 878
 879   def BuildHooksEnv(self):
 880     """Build hooks env.
 881
 882     """
 883     env = {"OP_TARGET": self.cfg.GetClusterName()}
 884     return env, [], []
 885
 886   def CheckPrereq(self):
 887     """Check prerequisites.
 888
 889     This checks whether the cluster is empty.
 890
 891     Any errors are signaled by raising errors.OpPrereqError.
 892
 893     """
 894     master = self.cfg.GetMasterNode()
 895
 896     nodelist = self.cfg.GetNodeList()
 897     if len(nodelist) != 1 or nodelist[0] != master:
 898       raise errors.OpPrereqError("There are still %d node(s) in"
 899                                  " this cluster." % (len(nodelist) - 1),
 900                                  errors.ECODE_INVAL)
 901     instancelist = self.cfg.GetInstanceList()
 902     if instancelist:
 903       raise errors.OpPrereqError("There are still %d instance(s) in"
 904                                  " this cluster." % len(instancelist),
 905                                  errors.ECODE_INVAL)
 906
 907   def Exec(self, feedback_fn):
 908     """Destroys the cluster.
 909
 910     """
 911     master = self.cfg.GetMasterNode()
 912     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
 913
 914     # Run post hooks on master node before it's removed
 915     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
 916     try:
 917       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
 918     except:
 919       # pylint: disable-msg=W0702
 920       self.LogWarning("Errors occurred running hooks on %s" % master)
 921
 922     result = self.rpc.call_node_stop_master(master, False)
 923     result.Raise("Could not disable the master role")
 924
 925     if modify_ssh_setup:
 926       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
 927       utils.CreateBackup(priv_key)
 928       utils.CreateBackup(pub_key)
 929
 930     return master
 931
 932
 933 class LUVerifyCluster(LogicalUnit):
 934   """Verifies the cluster status.
 935
 936   """
 937   HPATH = "cluster-verify"
 938   HTYPE = constants.HTYPE_CLUSTER
 939   _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
 940   REQ_BGL = False
 941
 942   TCLUSTER = "cluster"
 943   TNODE = "node"
 944   TINSTANCE = "instance"
 945
 946   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
 947   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
 948   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
 949   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
 950   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
 951   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
 952   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
 953   ENODEDRBD = (TNODE, "ENODEDRBD")
 954   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
 955   ENODEHOOKS = (TNODE, "ENODEHOOKS")
 956   ENODEHV = (TNODE, "ENODEHV")
 957   ENODELVM = (TNODE, "ENODELVM")
 958   ENODEN1 = (TNODE, "ENODEN1")
 959   ENODENET = (TNODE, "ENODENET")
 960   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
 961   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
 962   ENODERPC = (TNODE, "ENODERPC")
 963   ENODESSH = (TNODE, "ENODESSH")
 964   ENODEVERSION = (TNODE, "ENODEVERSION")
 965   ENODESETUP = (TNODE, "ENODESETUP")
 966   ENODETIME = (TNODE, "ENODETIME")
 967
 968   ETYPE_FIELD = "code"
 969   ETYPE_ERROR = "ERROR"
 970   ETYPE_WARNING = "WARNING"
 971
 972   def ExpandNames(self):
 973     self.needed_locks = {
 974       locking.LEVEL_NODE: locking.ALL_SET,
 975       locking.LEVEL_INSTANCE: locking.ALL_SET,
 976     }
 977     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
 978
 979   def _Error(self, ecode, item, msg, *args, **kwargs):
 980     """Format an error message.
 981
 982     Based on the opcode's error_codes parameter, either format a
 983     parseable error code, or a simpler error string.
 984
 985     This must be called only from Exec and functions called from Exec.
 986
 987     """
 988     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
 989     itype, etxt = ecode
 990     # first complete the msg
 991     if args:
 992       msg = msg % args
 993     # then format the whole message
 994     if self.op.error_codes:
 995       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
 996     else:
 997       if item:
 998         item = " " + item
 999       else:
1000         item = ""
1001       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1002     # and finally report it via the feedback_fn
1003     self._feedback_fn("  - %s" % msg)
1004
1005   def _ErrorIf(self, cond, *args, **kwargs):
1006     """Log an error message if the passed condition is True.
1007
1008     """
1009     cond = bool(cond) or self.op.debug_simulate_errors
1010     if cond:
1011       self._Error(*args, **kwargs)
1012     # do not mark the operation as failed for WARN cases only
1013     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1014       self.bad = self.bad or cond
1015
1016   def _VerifyNode(self, nodeinfo, file_list, local_cksum,
1017                   node_result, master_files, drbd_map, vg_name):
1018     """Run multiple tests against a node.
1019
1020     Test list:
1021
1022       - compares ganeti version
1023       - checks vg existence and size > 20G
1024       - checks config file checksum
1025       - checks ssh to other nodes
1026
1027     @type nodeinfo: L{objects.Node}
1028     @param nodeinfo: the node to check
1029     @param file_list: required list of files
1030     @param local_cksum: dictionary of local files and their checksums
1031     @param node_result: the results from the node
1032     @param master_files: list of files that only masters should have
1033     @param drbd_map: the useddrbd minors for this node, in
1034         form of minor: (instance, must_exist) which correspond to instances
1035         and their running status
1036     @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1037
1038     """
1039     node = nodeinfo.name
1040     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1041
1042     # main result, node_result should be a non-empty dict
1043     test = not node_result or not isinstance(node_result, dict)
1044     _ErrorIf(test, self.ENODERPC, node,
1045                   "unable to verify node: no data returned")
1046     if test:
1047       return
1048
1049     # compares ganeti version
1050     local_version = constants.PROTOCOL_VERSION
1051     remote_version = node_result.get('version', None)
1052     test = not (remote_version and
1053                 isinstance(remote_version, (list, tuple)) and
1054                 len(remote_version) == 2)
1055     _ErrorIf(test, self.ENODERPC, node,
1056              "connection to node returned invalid data")
1057     if test:
1058       return
1059
1060     test = local_version != remote_version[0]
1061     _ErrorIf(test, self.ENODEVERSION, node,
1062              "incompatible protocol versions: master %s,"
1063              " node %s", local_version, remote_version[0])
1064     if test:
1065       return
1066
1067     # node seems compatible, we can actually try to look into its results
1068
1069     # full package version
1070     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1071                   self.ENODEVERSION, node,
1072                   "software version mismatch: master %s, node %s",
1073                   constants.RELEASE_VERSION, remote_version[1],
1074                   code=self.ETYPE_WARNING)
1075
1076     # checks vg existence and size > 20G
1077     if vg_name is not None:
1078       vglist = node_result.get(constants.NV_VGLIST, None)
1079       test = not vglist
1080       _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1081       if not test:
1082         vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1083                                               constants.MIN_VG_SIZE)
1084         _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1085
1086     # checks config file checksum
1087
1088     remote_cksum = node_result.get(constants.NV_FILELIST, None)
1089     test = not isinstance(remote_cksum, dict)
1090     _ErrorIf(test, self.ENODEFILECHECK, node,
1091              "node hasn't returned file checksum data")
1092     if not test:
1093       for file_name in file_list:
1094         node_is_mc = nodeinfo.master_candidate
1095         must_have = (file_name not in master_files) or node_is_mc
1096         # missing
1097         test1 = file_name not in remote_cksum
1098         # invalid checksum
1099         test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1100         # existing and good
1101         test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1102         _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1103                  "file '%s' missing", file_name)
1104         _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1105                  "file '%s' has wrong checksum", file_name)
1106         # not candidate and this is not a must-have file
1107         _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1108                  "file '%s' should not exist on non master"
1109                  " candidates (and the file is outdated)", file_name)
1110         # all good, except non-master/non-must have combination
1111         _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1112                  "file '%s' should not exist"
1113                  " on non master candidates", file_name)
1114
1115     # checks ssh to any
1116
1117     test = constants.NV_NODELIST not in node_result
1118     _ErrorIf(test, self.ENODESSH, node,
1119              "node hasn't returned node ssh connectivity data")
1120     if not test:
1121       if node_result[constants.NV_NODELIST]:
1122         for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1123           _ErrorIf(True, self.ENODESSH, node,
1124                    "ssh communication with node '%s': %s", a_node, a_msg)
1125
1126     test = constants.NV_NODENETTEST not in node_result
1127     _ErrorIf(test, self.ENODENET, node,
1128              "node hasn't returned node tcp connectivity data")
1129     if not test:
1130       if node_result[constants.NV_NODENETTEST]:
1131         nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1132         for anode in nlist:
1133           _ErrorIf(True, self.ENODENET, node,
1134                    "tcp communication with node '%s': %s",
1135                    anode, node_result[constants.NV_NODENETTEST][anode])
1136
1137     hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1138     if isinstance(hyp_result, dict):
1139       for hv_name, hv_result in hyp_result.iteritems():
1140         test = hv_result is not None
1141         _ErrorIf(test, self.ENODEHV, node,
1142                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1143
1144     # check used drbd list
1145     if vg_name is not None:
1146       used_minors = node_result.get(constants.NV_DRBDLIST, [])
1147       test = not isinstance(used_minors, (tuple, list))
1148       _ErrorIf(test, self.ENODEDRBD, node,
1149                "cannot parse drbd status file: %s", str(used_minors))
1150       if not test:
1151         for minor, (iname, must_exist) in drbd_map.items():
1152           test = minor not in used_minors and must_exist
1153           _ErrorIf(test, self.ENODEDRBD, node,
1154                    "drbd minor %d of instance %s is not active",
1155                    minor, iname)
1156         for minor in used_minors:
1157           test = minor not in drbd_map
1158           _ErrorIf(test, self.ENODEDRBD, node,
1159                    "unallocated drbd minor %d is in use", minor)
1160     test = node_result.get(constants.NV_NODESETUP,
1161                            ["Missing NODESETUP results"])
1162     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1163              "; ".join(test))
1164
1165     # check pv names
1166     if vg_name is not None:
1167       pvlist = node_result.get(constants.NV_PVLIST, None)
1168       test = pvlist is None
1169       _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1170       if not test:
1171         # check that ':' is not present in PV names, since it's a
1172         # special character for lvcreate (denotes the range of PEs to
1173         # use on the PV)
1174         for _, pvname, owner_vg in pvlist:
1175           test = ":" in pvname
1176           _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1177                    " '%s' of VG '%s'", pvname, owner_vg)
1178
1179   def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1180                       node_instance, n_offline):
1181     """Verify an instance.
1182
1183     This function checks to see if the required block devices are
1184     available on the instance's node.
1185
1186     """
1187     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1188     node_current = instanceconfig.primary_node
1189
1190     node_vol_should = {}
1191     instanceconfig.MapLVsByNode(node_vol_should)
1192
1193     for node in node_vol_should:
1194       if node in n_offline:
1195         # ignore missing volumes on offline nodes
1196         continue
1197       for volume in node_vol_should[node]:
1198         test = node not in node_vol_is or volume not in node_vol_is[node]
1199         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1200                  "volume %s missing on node %s", volume, node)
1201
1202     if instanceconfig.admin_up:
1203       test = ((node_current not in node_instance or
1204                not instance in node_instance[node_current]) and
1205               node_current not in n_offline)
1206       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1207                "instance not running on its primary node %s",
1208                node_current)
1209
1210     for node in node_instance:
1211       if (not node == node_current):
1212         test = instance in node_instance[node]
1213         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1214                  "instance should not run on node %s", node)
1215
1216   def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1217     """Verify if there are any unknown volumes in the cluster.
1218
1219     The .os, .swap and backup volumes are ignored. All other volumes are
1220     reported as unknown.
1221
1222     """
1223     for node in node_vol_is:
1224       for volume in node_vol_is[node]:
1225         test = (node not in node_vol_should or
1226                 volume not in node_vol_should[node])
1227         self._ErrorIf(test, self.ENODEORPHANLV, node,
1228                       "volume %s is unknown", volume)
1229
1230   def _VerifyOrphanInstances(self, instancelist, node_instance):
1231     """Verify the list of running instances.
1232
1233     This checks what instances are running but unknown to the cluster.
1234
1235     """
1236     for node in node_instance:
1237       for o_inst in node_instance[node]:
1238         test = o_inst not in instancelist
1239         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1240                       "instance %s on node %s should not exist", o_inst, node)
1241
1242   def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1243     """Verify N+1 Memory Resilience.
1244
1245     Check that if one single node dies we can still start all the instances it
1246     was primary for.
1247
1248     """
1249     for node, nodeinfo in node_info.iteritems():
1250       # This code checks that every node which is now listed as secondary has
1251       # enough memory to host all instances it is supposed to should a single
1252       # other node in the cluster fail.
1253       # FIXME: not ready for failover to an arbitrary node
1254       # FIXME: does not support file-backed instances
1255       # WARNING: we currently take into account down instances as well as up
1256       # ones, considering that even if they're down someone might want to start
1257       # them even in the event of a node failure.
1258       for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1259         needed_mem = 0
1260         for instance in instances:
1261           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1262           if bep[constants.BE_AUTO_BALANCE]:
1263             needed_mem += bep[constants.BE_MEMORY]
1264         test = nodeinfo['mfree'] < needed_mem
1265         self._ErrorIf(test, self.ENODEN1, node,
1266                       "not enough memory on to accommodate"
1267                       " failovers should peer node %s fail", prinode)
1268
1269   def CheckPrereq(self):
1270     """Check prerequisites.
1271
1272     Transform the list of checks we're going to skip into a set and check that
1273     all its members are valid.
1274
1275     """
1276     self.skip_set = frozenset(self.op.skip_checks)
1277     if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1278       raise errors.OpPrereqError("Invalid checks to be skipped specified",
1279                                  errors.ECODE_INVAL)
1280
1281   def BuildHooksEnv(self):
1282     """Build hooks env.
1283
1284     Cluster-Verify hooks just ran in the post phase and their failure makes
1285     the output be logged in the verify output and the verification to fail.
1286
1287     """
1288     all_nodes = self.cfg.GetNodeList()
1289     env = {
1290       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1291       }
1292     for node in self.cfg.GetAllNodesInfo().values():
1293       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1294
1295     return env, [], all_nodes
1296
1297   def Exec(self, feedback_fn):
1298     """Verify integrity of cluster, performing various test on nodes.
1299
1300     """
1301     self.bad = False
1302     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1303     verbose = self.op.verbose
1304     self._feedback_fn = feedback_fn
1305     feedback_fn("* Verifying global settings")
1306     for msg in self.cfg.VerifyConfig():
1307       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1308
1309     vg_name = self.cfg.GetVGName()
1310     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1311     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1312     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1313     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1314     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1315                         for iname in instancelist)
1316     i_non_redundant = [] # Non redundant instances
1317     i_non_a_balanced = [] # Non auto-balanced instances
1318     n_offline = [] # List of offline nodes
1319     n_drained = [] # List of nodes being drained
1320     node_volume = {}
1321     node_instance = {}
1322     node_info = {}
1323     instance_cfg = {}
1324
1325     # FIXME: verify OS list
1326     # do local checksums
1327     master_files = [constants.CLUSTER_CONF_FILE]
1328
1329     file_names = ssconf.SimpleStore().GetFileList()
1330     file_names.append(constants.SSL_CERT_FILE)
1331     file_names.append(constants.RAPI_CERT_FILE)
1332     file_names.extend(master_files)
1333
1334     local_checksums = utils.FingerprintFiles(file_names)
1335
1336     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1337     node_verify_param = {
1338       constants.NV_FILELIST: file_names,
1339       constants.NV_NODELIST: [node.name for node in nodeinfo
1340                               if not node.offline],
1341       constants.NV_HYPERVISOR: hypervisors,
1342       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1343                                   node.secondary_ip) for node in nodeinfo
1344                                  if not node.offline],
1345       constants.NV_INSTANCELIST: hypervisors,
1346       constants.NV_VERSION: None,
1347       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1348       constants.NV_NODESETUP: None,
1349       constants.NV_TIME: None,
1350       }
1351
1352     if vg_name is not None:
1353       node_verify_param[constants.NV_VGLIST] = None
1354       node_verify_param[constants.NV_LVLIST] = vg_name
1355       node_verify_param[constants.NV_PVLIST] = [vg_name]
1356       node_verify_param[constants.NV_DRBDLIST] = None
1357
1358     # Due to the way our RPC system works, exact response times cannot be
1359     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1360     # time before and after executing the request, we can at least have a time
1361     # window.
1362     nvinfo_starttime = time.time()
1363     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1364                                            self.cfg.GetClusterName())
1365     nvinfo_endtime = time.time()
1366
1367     cluster = self.cfg.GetClusterInfo()
1368     master_node = self.cfg.GetMasterNode()
1369     all_drbd_map = self.cfg.ComputeDRBDMap()
1370
1371     feedback_fn("* Verifying node status")
1372     for node_i in nodeinfo:
1373       node = node_i.name
1374
1375       if node_i.offline:
1376         if verbose:
1377           feedback_fn("* Skipping offline node %s" % (node,))
1378         n_offline.append(node)
1379         continue
1380
1381       if node == master_node:
1382         ntype = "master"
1383       elif node_i.master_candidate:
1384         ntype = "master candidate"
1385       elif node_i.drained:
1386         ntype = "drained"
1387         n_drained.append(node)
1388       else:
1389         ntype = "regular"
1390       if verbose:
1391         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1392
1393       msg = all_nvinfo[node].fail_msg
1394       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1395       if msg:
1396         continue
1397
1398       nresult = all_nvinfo[node].payload
1399       node_drbd = {}
1400       for minor, instance in all_drbd_map[node].items():
1401         test = instance not in instanceinfo
1402         _ErrorIf(test, self.ECLUSTERCFG, None,
1403                  "ghost instance '%s' in temporary DRBD map", instance)
1404           # ghost instance should not be running, but otherwise we
1405           # don't give double warnings (both ghost instance and
1406           # unallocated minor in use)
1407         if test:
1408           node_drbd[minor] = (instance, False)
1409         else:
1410           instance = instanceinfo[instance]
1411           node_drbd[minor] = (instance.name, instance.admin_up)
1412
1413       self._VerifyNode(node_i, file_names, local_checksums,
1414                        nresult, master_files, node_drbd, vg_name)
1415
1416       lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1417       if vg_name is None:
1418         node_volume[node] = {}
1419       elif isinstance(lvdata, basestring):
1420         _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1421                  utils.SafeEncode(lvdata))
1422         node_volume[node] = {}
1423       elif not isinstance(lvdata, dict):
1424         _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1425         continue
1426       else:
1427         node_volume[node] = lvdata
1428
1429       # node_instance
1430       idata = nresult.get(constants.NV_INSTANCELIST, None)
1431       test = not isinstance(idata, list)
1432       _ErrorIf(test, self.ENODEHV, node,
1433                "rpc call to node failed (instancelist)")
1434       if test:
1435         continue
1436
1437       node_instance[node] = idata
1438
1439       # node_info
1440       nodeinfo = nresult.get(constants.NV_HVINFO, None)
1441       test = not isinstance(nodeinfo, dict)
1442       _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1443       if test:
1444         continue
1445
1446       # Node time
1447       ntime = nresult.get(constants.NV_TIME, None)
1448       try:
1449         ntime_merged = utils.MergeTime(ntime)
1450       except (ValueError, TypeError):
1451         _ErrorIf(test, self.ENODETIME, node, "Node returned invalid time")
1452
1453       if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1454         ntime_diff = abs(nvinfo_starttime - ntime_merged)
1455       elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1456         ntime_diff = abs(ntime_merged - nvinfo_endtime)
1457       else:
1458         ntime_diff = None
1459
1460       _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1461                "Node time diverges by at least %0.1fs from master node time",
1462                ntime_diff)
1463
1464       if ntime_diff is not None:
1465         continue
1466
1467       try:
1468         node_info[node] = {
1469           "mfree": int(nodeinfo['memory_free']),
1470           "pinst": [],
1471           "sinst": [],
1472           # dictionary holding all instances this node is secondary for,
1473           # grouped by their primary node. Each key is a cluster node, and each
1474           # value is a list of instances which have the key as primary and the
1475           # current node as secondary.  this is handy to calculate N+1 memory
1476           # availability if you can only failover from a primary to its
1477           # secondary.
1478           "sinst-by-pnode": {},
1479         }
1480         # FIXME: devise a free space model for file based instances as well
1481         if vg_name is not None:
1482           test = (constants.NV_VGLIST not in nresult or
1483                   vg_name not in nresult[constants.NV_VGLIST])
1484           _ErrorIf(test, self.ENODELVM, node,
1485                    "node didn't return data for the volume group '%s'"
1486                    " - it is either missing or broken", vg_name)
1487           if test:
1488             continue
1489           node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1490       except (ValueError, KeyError):
1491         _ErrorIf(True, self.ENODERPC, node,
1492                  "node returned invalid nodeinfo, check lvm/hypervisor")
1493         continue
1494
1495     node_vol_should = {}
1496
1497     feedback_fn("* Verifying instance status")
1498     for instance in instancelist:
1499       if verbose:
1500         feedback_fn("* Verifying instance %s" % instance)
1501       inst_config = instanceinfo[instance]
1502       self._VerifyInstance(instance, inst_config, node_volume,
1503                            node_instance, n_offline)
1504       inst_nodes_offline = []
1505
1506       inst_config.MapLVsByNode(node_vol_should)
1507
1508       instance_cfg[instance] = inst_config
1509
1510       pnode = inst_config.primary_node
1511       _ErrorIf(pnode not in node_info and pnode not in n_offline,
1512                self.ENODERPC, pnode, "instance %s, connection to"
1513                " primary node failed", instance)
1514       if pnode in node_info:
1515         node_info[pnode]['pinst'].append(instance)
1516
1517       if pnode in n_offline:
1518         inst_nodes_offline.append(pnode)
1519
1520       # If the instance is non-redundant we cannot survive losing its primary
1521       # node, so we are not N+1 compliant. On the other hand we have no disk
1522       # templates with more than one secondary so that situation is not well
1523       # supported either.
1524       # FIXME: does not support file-backed instances
1525       if len(inst_config.secondary_nodes) == 0:
1526         i_non_redundant.append(instance)
1527       _ErrorIf(len(inst_config.secondary_nodes) > 1,
1528                self.EINSTANCELAYOUT, instance,
1529                "instance has multiple secondary nodes", code="WARNING")
1530
1531       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1532         i_non_a_balanced.append(instance)
1533
1534       for snode in inst_config.secondary_nodes:
1535         _ErrorIf(snode not in node_info and snode not in n_offline,
1536                  self.ENODERPC, snode,
1537                  "instance %s, connection to secondary node"
1538                  "failed", instance)
1539
1540         if snode in node_info:
1541           node_info[snode]['sinst'].append(instance)
1542           if pnode not in node_info[snode]['sinst-by-pnode']:
1543             node_info[snode]['sinst-by-pnode'][pnode] = []
1544           node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1545
1546         if snode in n_offline:
1547           inst_nodes_offline.append(snode)
1548
1549       # warn that the instance lives on offline nodes
1550       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1551                "instance lives on offline node(s) %s",
1552                utils.CommaJoin(inst_nodes_offline))
1553
1554     feedback_fn("* Verifying orphan volumes")
1555     self._VerifyOrphanVolumes(node_vol_should, node_volume)
1556
1557     feedback_fn("* Verifying remaining instances")
1558     self._VerifyOrphanInstances(instancelist, node_instance)
1559
1560     if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1561       feedback_fn("* Verifying N+1 Memory redundancy")
1562       self._VerifyNPlusOneMemory(node_info, instance_cfg)
1563
1564     feedback_fn("* Other Notes")
1565     if i_non_redundant:
1566       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1567                   % len(i_non_redundant))
1568
1569     if i_non_a_balanced:
1570       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1571                   % len(i_non_a_balanced))
1572
1573     if n_offline:
1574       feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1575
1576     if n_drained:
1577       feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1578
1579     return not self.bad
1580
1581   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1582     """Analyze the post-hooks' result
1583
1584     This method analyses the hook result, handles it, and sends some
1585     nicely-formatted feedback back to the user.
1586
1587     @param phase: one of L{constants.HOOKS_PHASE_POST} or
1588         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1589     @param hooks_results: the results of the multi-node hooks rpc call
1590     @param feedback_fn: function used send feedback back to the caller
1591     @param lu_result: previous Exec result
1592     @return: the new Exec result, based on the previous result
1593         and hook results
1594
1595     """
1596     # We only really run POST phase hooks, and are only interested in
1597     # their results
1598     if phase == constants.HOOKS_PHASE_POST:
1599       # Used to change hooks' output to proper indentation
1600       indent_re = re.compile('^', re.M)
1601       feedback_fn("* Hooks Results")
1602       assert hooks_results, "invalid result from hooks"
1603
1604       for node_name in hooks_results:
1605         res = hooks_results[node_name]
1606         msg = res.fail_msg
1607         test = msg and not res.offline
1608         self._ErrorIf(test, self.ENODEHOOKS, node_name,
1609                       "Communication failure in hooks execution: %s", msg)
1610         if res.offline or msg:
1611           # No need to investigate payload if node is offline or gave an error.
1612           # override manually lu_result here as _ErrorIf only
1613           # overrides self.bad
1614           lu_result = 1
1615           continue
1616         for script, hkr, output in res.payload:
1617           test = hkr == constants.HKR_FAIL
1618           self._ErrorIf(test, self.ENODEHOOKS, node_name,
1619                         "Script %s failed, output:", script)
1620           if test:
1621             output = indent_re.sub('      ', output)
1622             feedback_fn("%s" % output)
1623             lu_result = 1
1624
1625       return lu_result
1626
1627
1628 class LUVerifyDisks(NoHooksLU):
1629   """Verifies the cluster disks status.
1630
1631   """
1632   _OP_REQP = []
1633   REQ_BGL = False
1634
1635   def ExpandNames(self):
1636     self.needed_locks = {
1637       locking.LEVEL_NODE: locking.ALL_SET,
1638       locking.LEVEL_INSTANCE: locking.ALL_SET,
1639     }
1640     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1641
1642   def CheckPrereq(self):
1643     """Check prerequisites.
1644
1645     This has no prerequisites.
1646
1647     """
1648     pass
1649
1650   def Exec(self, feedback_fn):
1651     """Verify integrity of cluster disks.
1652
1653     @rtype: tuple of three items
1654     @return: a tuple of (dict of node-to-node_error, list of instances
1655         which need activate-disks, dict of instance: (node, volume) for
1656         missing volumes
1657
1658     """
1659     result = res_nodes, res_instances, res_missing = {}, [], {}
1660
1661     vg_name = self.cfg.GetVGName()
1662     nodes = utils.NiceSort(self.cfg.GetNodeList())
1663     instances = [self.cfg.GetInstanceInfo(name)
1664                  for name in self.cfg.GetInstanceList()]
1665
1666     nv_dict = {}
1667     for inst in instances:
1668       inst_lvs = {}
1669       if (not inst.admin_up or
1670           inst.disk_template not in constants.DTS_NET_MIRROR):
1671         continue
1672       inst.MapLVsByNode(inst_lvs)
1673       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1674       for node, vol_list in inst_lvs.iteritems():
1675         for vol in vol_list:
1676           nv_dict[(node, vol)] = inst
1677
1678     if not nv_dict:
1679       return result
1680
1681     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1682
1683     for node in nodes:
1684       # node_volume
1685       node_res = node_lvs[node]
1686       if node_res.offline:
1687         continue
1688       msg = node_res.fail_msg
1689       if msg:
1690         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1691         res_nodes[node] = msg
1692         continue
1693
1694       lvs = node_res.payload
1695       for lv_name, (_, _, lv_online) in lvs.items():
1696         inst = nv_dict.pop((node, lv_name), None)
1697         if (not lv_online and inst is not None
1698             and inst.name not in res_instances):
1699           res_instances.append(inst.name)
1700
1701     # any leftover items in nv_dict are missing LVs, let's arrange the
1702     # data better
1703     for key, inst in nv_dict.iteritems():
1704       if inst.name not in res_missing:
1705         res_missing[inst.name] = []
1706       res_missing[inst.name].append(key)
1707
1708     return result
1709
1710
1711 class LURepairDiskSizes(NoHooksLU):
1712   """Verifies the cluster disks sizes.
1713
1714   """
1715   _OP_REQP = ["instances"]
1716   REQ_BGL = False
1717
1718   def ExpandNames(self):
1719     if not isinstance(self.op.instances, list):
1720       raise errors.OpPrereqError("Invalid argument type 'instances'",
1721                                  errors.ECODE_INVAL)
1722
1723     if self.op.instances:
1724       self.wanted_names = []
1725       for name in self.op.instances:
1726         full_name = self.cfg.ExpandInstanceName(name)
1727         if full_name is None:
1728           raise errors.OpPrereqError("Instance '%s' not known" % name,
1729                                      errors.ECODE_NOENT)
1730         self.wanted_names.append(full_name)
1731       self.needed_locks = {
1732         locking.LEVEL_NODE: [],
1733         locking.LEVEL_INSTANCE: self.wanted_names,
1734         }
1735       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1736     else:
1737       self.wanted_names = None
1738       self.needed_locks = {
1739         locking.LEVEL_NODE: locking.ALL_SET,
1740         locking.LEVEL_INSTANCE: locking.ALL_SET,
1741         }
1742     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1743
1744   def DeclareLocks(self, level):
1745     if level == locking.LEVEL_NODE and self.wanted_names is not None:
1746       self._LockInstancesNodes(primary_only=True)
1747
1748   def CheckPrereq(self):
1749     """Check prerequisites.
1750
1751     This only checks the optional instance list against the existing names.
1752
1753     """
1754     if self.wanted_names is None:
1755       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1756
1757     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1758                              in self.wanted_names]
1759
1760   def _EnsureChildSizes(self, disk):
1761     """Ensure children of the disk have the needed disk size.
1762
1763     This is valid mainly for DRBD8 and fixes an issue where the
1764     children have smaller disk size.
1765
1766     @param disk: an L{ganeti.objects.Disk} object
1767
1768     """
1769     if disk.dev_type == constants.LD_DRBD8:
1770       assert disk.children, "Empty children for DRBD8?"
1771       fchild = disk.children[0]
1772       mismatch = fchild.size < disk.size
1773       if mismatch:
1774         self.LogInfo("Child disk has size %d, parent %d, fixing",
1775                      fchild.size, disk.size)
1776         fchild.size = disk.size
1777
1778       # and we recurse on this child only, not on the metadev
1779       return self._EnsureChildSizes(fchild) or mismatch
1780     else:
1781       return False
1782
1783   def Exec(self, feedback_fn):
1784     """Verify the size of cluster disks.
1785
1786     """
1787     # TODO: check child disks too
1788     # TODO: check differences in size between primary/secondary nodes
1789     per_node_disks = {}
1790     for instance in self.wanted_instances:
1791       pnode = instance.primary_node
1792       if pnode not in per_node_disks:
1793         per_node_disks[pnode] = []
1794       for idx, disk in enumerate(instance.disks):
1795         per_node_disks[pnode].append((instance, idx, disk))
1796
1797     changed = []
1798     for node, dskl in per_node_disks.items():
1799       newl = [v[2].Copy() for v in dskl]
1800       for dsk in newl:
1801         self.cfg.SetDiskID(dsk, node)
1802       result = self.rpc.call_blockdev_getsizes(node, newl)
1803       if result.fail_msg:
1804         self.LogWarning("Failure in blockdev_getsizes call to node"
1805                         " %s, ignoring", node)
1806         continue
1807       if len(result.data) != len(dskl):
1808         self.LogWarning("Invalid result from node %s, ignoring node results",
1809                         node)
1810         continue
1811       for ((instance, idx, disk), size) in zip(dskl, result.data):
1812         if size is None:
1813           self.LogWarning("Disk %d of instance %s did not return size"
1814                           " information, ignoring", idx, instance.name)
1815           continue
1816         if not isinstance(size, (int, long)):
1817           self.LogWarning("Disk %d of instance %s did not return valid"
1818                           " size information, ignoring", idx, instance.name)
1819           continue
1820         size = size >> 20
1821         if size != disk.size:
1822           self.LogInfo("Disk %d of instance %s has mismatched size,"
1823                        " correcting: recorded %d, actual %d", idx,
1824                        instance.name, disk.size, size)
1825           disk.size = size
1826           self.cfg.Update(instance, feedback_fn)
1827           changed.append((instance.name, idx, size))
1828         if self._EnsureChildSizes(disk):
1829           self.cfg.Update(instance, feedback_fn)
1830           changed.append((instance.name, idx, disk.size))
1831     return changed
1832
1833
1834 class LURenameCluster(LogicalUnit):
1835   """Rename the cluster.
1836
1837   """
1838   HPATH = "cluster-rename"
1839   HTYPE = constants.HTYPE_CLUSTER
1840   _OP_REQP = ["name"]
1841
1842   def BuildHooksEnv(self):
1843     """Build hooks env.
1844
1845     """
1846     env = {
1847       "OP_TARGET": self.cfg.GetClusterName(),
1848       "NEW_NAME": self.op.name,
1849       }
1850     mn = self.cfg.GetMasterNode()
1851     all_nodes = self.cfg.GetNodeList()
1852     return env, [mn], all_nodes
1853
1854   def CheckPrereq(self):
1855     """Verify that the passed name is a valid one.
1856
1857     """
1858     hostname = utils.GetHostInfo(self.op.name)
1859
1860     new_name = hostname.name
1861     self.ip = new_ip = hostname.ip
1862     old_name = self.cfg.GetClusterName()
1863     old_ip = self.cfg.GetMasterIP()
1864     if new_name == old_name and new_ip == old_ip:
1865       raise errors.OpPrereqError("Neither the name nor the IP address of the"
1866                                  " cluster has changed",
1867                                  errors.ECODE_INVAL)
1868     if new_ip != old_ip:
1869       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1870         raise errors.OpPrereqError("The given cluster IP address (%s) is"
1871                                    " reachable on the network. Aborting." %
1872                                    new_ip, errors.ECODE_NOTUNIQUE)
1873
1874     self.op.name = new_name
1875
1876   def Exec(self, feedback_fn):
1877     """Rename the cluster.
1878
1879     """
1880     clustername = self.op.name
1881     ip = self.ip
1882
1883     # shutdown the master IP
1884     master = self.cfg.GetMasterNode()
1885     result = self.rpc.call_node_stop_master(master, False)
1886     result.Raise("Could not disable the master role")
1887
1888     try:
1889       cluster = self.cfg.GetClusterInfo()
1890       cluster.cluster_name = clustername
1891       cluster.master_ip = ip
1892       self.cfg.Update(cluster, feedback_fn)
1893
1894       # update the known hosts file
1895       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1896       node_list = self.cfg.GetNodeList()
1897       try:
1898         node_list.remove(master)
1899       except ValueError:
1900         pass
1901       result = self.rpc.call_upload_file(node_list,
1902                                          constants.SSH_KNOWN_HOSTS_FILE)
1903       for to_node, to_result in result.iteritems():
1904         msg = to_result.fail_msg
1905         if msg:
1906           msg = ("Copy of file %s to node %s failed: %s" %
1907                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1908           self.proc.LogWarning(msg)
1909
1910     finally:
1911       result = self.rpc.call_node_start_master(master, False, False)
1912       msg = result.fail_msg
1913       if msg:
1914         self.LogWarning("Could not re-enable the master role on"
1915                         " the master, please restart manually: %s", msg)
1916
1917
1918 def _RecursiveCheckIfLVMBased(disk):
1919   """Check if the given disk or its children are lvm-based.
1920
1921   @type disk: L{objects.Disk}
1922   @param disk: the disk to check
1923   @rtype: boolean
1924   @return: boolean indicating whether a LD_LV dev_type was found or not
1925
1926   """
1927   if disk.children:
1928     for chdisk in disk.children:
1929       if _RecursiveCheckIfLVMBased(chdisk):
1930         return True
1931   return disk.dev_type == constants.LD_LV
1932
1933
1934 class LUSetClusterParams(LogicalUnit):
1935   """Change the parameters of the cluster.
1936
1937   """
1938   HPATH = "cluster-modify"
1939   HTYPE = constants.HTYPE_CLUSTER
1940   _OP_REQP = []
1941   REQ_BGL = False
1942
1943   def CheckArguments(self):
1944     """Check parameters
1945
1946     """
1947     if not hasattr(self.op, "candidate_pool_size"):
1948       self.op.candidate_pool_size = None
1949     if self.op.candidate_pool_size is not None:
1950       try:
1951         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1952       except (ValueError, TypeError), err:
1953         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1954                                    str(err), errors.ECODE_INVAL)
1955       if self.op.candidate_pool_size < 1:
1956         raise errors.OpPrereqError("At least one master candidate needed",
1957                                    errors.ECODE_INVAL)
1958
1959   def ExpandNames(self):
1960     # FIXME: in the future maybe other cluster params won't require checking on
1961     # all nodes to be modified.
1962     self.needed_locks = {
1963       locking.LEVEL_NODE: locking.ALL_SET,
1964     }
1965     self.share_locks[locking.LEVEL_NODE] = 1
1966
1967   def BuildHooksEnv(self):
1968     """Build hooks env.
1969
1970     """
1971     env = {
1972       "OP_TARGET": self.cfg.GetClusterName(),
1973       "NEW_VG_NAME": self.op.vg_name,
1974       }
1975     mn = self.cfg.GetMasterNode()
1976     return env, [mn], [mn]
1977
1978   def CheckPrereq(self):
1979     """Check prerequisites.
1980
1981     This checks whether the given params don't conflict and
1982     if the given volume group is valid.
1983
1984     """
1985     if self.op.vg_name is not None and not self.op.vg_name:
1986       instances = self.cfg.GetAllInstancesInfo().values()
1987       for inst in instances:
1988         for disk in inst.disks:
1989           if _RecursiveCheckIfLVMBased(disk):
1990             raise errors.OpPrereqError("Cannot disable lvm storage while"
1991                                        " lvm-based instances exist",
1992                                        errors.ECODE_INVAL)
1993
1994     node_list = self.acquired_locks[locking.LEVEL_NODE]
1995
1996     # if vg_name not None, checks given volume group on all nodes
1997     if self.op.vg_name:
1998       vglist = self.rpc.call_vg_list(node_list)
1999       for node in node_list:
2000         msg = vglist[node].fail_msg
2001         if msg:
2002           # ignoring down node
2003           self.LogWarning("Error while gathering data on node %s"
2004                           " (ignoring node): %s", node, msg)
2005           continue
2006         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2007                                               self.op.vg_name,
2008                                               constants.MIN_VG_SIZE)
2009         if vgstatus:
2010           raise errors.OpPrereqError("Error on node '%s': %s" %
2011                                      (node, vgstatus), errors.ECODE_ENVIRON)
2012
2013     self.cluster = cluster = self.cfg.GetClusterInfo()
2014     # validate params changes
2015     if self.op.beparams:
2016       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2017       self.new_beparams = objects.FillDict(
2018         cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2019
2020     if self.op.nicparams:
2021       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2022       self.new_nicparams = objects.FillDict(
2023         cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2024       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2025       nic_errors = []
2026
2027       # check all instances for consistency
2028       for instance in self.cfg.GetAllInstancesInfo().values():
2029         for nic_idx, nic in enumerate(instance.nics):
2030           params_copy = copy.deepcopy(nic.nicparams)
2031           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2032
2033           # check parameter syntax
2034           try:
2035             objects.NIC.CheckParameterSyntax(params_filled)
2036           except errors.ConfigurationError, err:
2037             nic_errors.append("Instance %s, nic/%d: %s" %
2038                               (instance.name, nic_idx, err))
2039
2040           # if we're moving instances to routed, check that they have an ip
2041           target_mode = params_filled[constants.NIC_MODE]
2042           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2043             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2044                               (instance.name, nic_idx))
2045       if nic_errors:
2046         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2047                                    "\n".join(nic_errors))
2048
2049     # hypervisor list/parameters
2050     self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2051     if self.op.hvparams:
2052       if not isinstance(self.op.hvparams, dict):
2053         raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2054                                    errors.ECODE_INVAL)
2055       for hv_name, hv_dict in self.op.hvparams.items():
2056         if hv_name not in self.new_hvparams:
2057           self.new_hvparams[hv_name] = hv_dict
2058         else:
2059           self.new_hvparams[hv_name].update(hv_dict)
2060
2061     if self.op.enabled_hypervisors is not None:
2062       self.hv_list = self.op.enabled_hypervisors
2063       if not self.hv_list:
2064         raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2065                                    " least one member",
2066                                    errors.ECODE_INVAL)
2067       invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2068       if invalid_hvs:
2069         raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2070                                    " entries: %s" %
2071                                    utils.CommaJoin(invalid_hvs),
2072                                    errors.ECODE_INVAL)
2073     else:
2074       self.hv_list = cluster.enabled_hypervisors
2075
2076     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2077       # either the enabled list has changed, or the parameters have, validate
2078       for hv_name, hv_params in self.new_hvparams.items():
2079         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2080             (self.op.enabled_hypervisors and
2081              hv_name in self.op.enabled_hypervisors)):
2082           # either this is a new hypervisor, or its parameters have changed
2083           hv_class = hypervisor.GetHypervisor(hv_name)
2084           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2085           hv_class.CheckParameterSyntax(hv_params)
2086           _CheckHVParams(self, node_list, hv_name, hv_params)
2087
2088   def Exec(self, feedback_fn):
2089     """Change the parameters of the cluster.
2090
2091     """
2092     if self.op.vg_name is not None:
2093       new_volume = self.op.vg_name
2094       if not new_volume:
2095         new_volume = None
2096       if new_volume != self.cfg.GetVGName():
2097         self.cfg.SetVGName(new_volume)
2098       else:
2099         feedback_fn("Cluster LVM configuration already in desired"
2100                     " state, not changing")
2101     if self.op.hvparams:
2102       self.cluster.hvparams = self.new_hvparams
2103     if self.op.enabled_hypervisors is not None:
2104       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2105     if self.op.beparams:
2106       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2107     if self.op.nicparams:
2108       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2109
2110     if self.op.candidate_pool_size is not None:
2111       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2112       # we need to update the pool size here, otherwise the save will fail
2113       _AdjustCandidatePool(self, [])
2114
2115     self.cfg.Update(self.cluster, feedback_fn)
2116
2117
2118 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2119   """Distribute additional files which are part of the cluster configuration.
2120
2121   ConfigWriter takes care of distributing the config and ssconf files, but
2122   there are more files which should be distributed to all nodes. This function
2123   makes sure those are copied.
2124
2125   @param lu: calling logical unit
2126   @param additional_nodes: list of nodes not in the config to distribute to
2127
2128   """
2129   # 1. Gather target nodes
2130   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2131   dist_nodes = lu.cfg.GetNodeList()
2132   if additional_nodes is not None:
2133     dist_nodes.extend(additional_nodes)
2134   if myself.name in dist_nodes:
2135     dist_nodes.remove(myself.name)
2136
2137   # 2. Gather files to distribute
2138   dist_files = set([constants.ETC_HOSTS,
2139                     constants.SSH_KNOWN_HOSTS_FILE,
2140                     constants.RAPI_CERT_FILE,
2141                     constants.RAPI_USERS_FILE,
2142                     constants.HMAC_CLUSTER_KEY,
2143                    ])
2144
2145   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2146   for hv_name in enabled_hypervisors:
2147     hv_class = hypervisor.GetHypervisor(hv_name)
2148     dist_files.update(hv_class.GetAncillaryFiles())
2149
2150   # 3. Perform the files upload
2151   for fname in dist_files:
2152     if os.path.exists(fname):
2153       result = lu.rpc.call_upload_file(dist_nodes, fname)
2154       for to_node, to_result in result.items():
2155         msg = to_result.fail_msg
2156         if msg:
2157           msg = ("Copy of file %s to node %s failed: %s" %
2158                  (fname, to_node, msg))
2159           lu.proc.LogWarning(msg)
2160
2161
2162 class LURedistributeConfig(NoHooksLU):
2163   """Force the redistribution of cluster configuration.
2164
2165   This is a very simple LU.
2166
2167   """
2168   _OP_REQP = []
2169   REQ_BGL = False
2170
2171   def ExpandNames(self):
2172     self.needed_locks = {
2173       locking.LEVEL_NODE: locking.ALL_SET,
2174     }
2175     self.share_locks[locking.LEVEL_NODE] = 1
2176
2177   def CheckPrereq(self):
2178     """Check prerequisites.
2179
2180     """
2181
2182   def Exec(self, feedback_fn):
2183     """Redistribute the configuration.
2184
2185     """
2186     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2187     _RedistributeAncillaryFiles(self)
2188
2189
2190 def _WaitForSync(lu, instance, oneshot=False):
2191   """Sleep and poll for an instance's disk to sync.
2192
2193   """
2194   if not instance.disks:
2195     return True
2196
2197   if not oneshot:
2198     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2199
2200   node = instance.primary_node
2201
2202   for dev in instance.disks:
2203     lu.cfg.SetDiskID(dev, node)
2204
2205   # TODO: Convert to utils.Retry
2206
2207   retries = 0
2208   degr_retries = 10 # in seconds, as we sleep 1 second each time
2209   while True:
2210     max_time = 0
2211     done = True
2212     cumul_degraded = False
2213     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2214     msg = rstats.fail_msg
2215     if msg:
2216       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2217       retries += 1
2218       if retries >= 10:
2219         raise errors.RemoteError("Can't contact node %s for mirror data,"
2220                                  " aborting." % node)
2221       time.sleep(6)
2222       continue
2223     rstats = rstats.payload
2224     retries = 0
2225     for i, mstat in enumerate(rstats):
2226       if mstat is None:
2227         lu.LogWarning("Can't compute data for node %s/%s",
2228                            node, instance.disks[i].iv_name)
2229         continue
2230
2231       cumul_degraded = (cumul_degraded or
2232                         (mstat.is_degraded and mstat.sync_percent is None))
2233       if mstat.sync_percent is not None:
2234         done = False
2235         if mstat.estimated_time is not None:
2236           rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2237           max_time = mstat.estimated_time
2238         else:
2239           rem_time = "no time estimate"
2240         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2241                         (instance.disks[i].iv_name, mstat.sync_percent,
2242                          rem_time))
2243
2244     # if we're done but degraded, let's do a few small retries, to
2245     # make sure we see a stable and not transient situation; therefore
2246     # we force restart of the loop
2247     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2248       logging.info("Degraded disks found, %d retries left", degr_retries)
2249       degr_retries -= 1
2250       time.sleep(1)
2251       continue
2252
2253     if done or oneshot:
2254       break
2255
2256     time.sleep(min(60, max_time))
2257
2258   if done:
2259     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2260   return not cumul_degraded
2261
2262
2263 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2264   """Check that mirrors are not degraded.
2265
2266   The ldisk parameter, if True, will change the test from the
2267   is_degraded attribute (which represents overall non-ok status for
2268   the device(s)) to the ldisk (representing the local storage status).
2269
2270   """
2271   lu.cfg.SetDiskID(dev, node)
2272
2273   result = True
2274
2275   if on_primary or dev.AssembleOnSecondary():
2276     rstats = lu.rpc.call_blockdev_find(node, dev)
2277     msg = rstats.fail_msg
2278     if msg:
2279       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2280       result = False
2281     elif not rstats.payload:
2282       lu.LogWarning("Can't find disk on node %s", node)
2283       result = False
2284     else:
2285       if ldisk:
2286         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2287       else:
2288         result = result and not rstats.payload.is_degraded
2289
2290   if dev.children:
2291     for child in dev.children:
2292       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2293
2294   return result
2295
2296
2297 class LUDiagnoseOS(NoHooksLU):
2298   """Logical unit for OS diagnose/query.
2299
2300   """
2301   _OP_REQP = ["output_fields", "names"]
2302   REQ_BGL = False
2303   _FIELDS_STATIC = utils.FieldSet()
2304   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2305   # Fields that need calculation of global os validity
2306   _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2307
2308   def ExpandNames(self):
2309     if self.op.names:
2310       raise errors.OpPrereqError("Selective OS query not supported",
2311                                  errors.ECODE_INVAL)
2312
2313     _CheckOutputFields(static=self._FIELDS_STATIC,
2314                        dynamic=self._FIELDS_DYNAMIC,
2315                        selected=self.op.output_fields)
2316
2317     # Lock all nodes, in shared mode
2318     # Temporary removal of locks, should be reverted later
2319     # TODO: reintroduce locks when they are lighter-weight
2320     self.needed_locks = {}
2321     #self.share_locks[locking.LEVEL_NODE] = 1
2322     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2323
2324   def CheckPrereq(self):
2325     """Check prerequisites.
2326
2327     """
2328
2329   @staticmethod
2330   def _DiagnoseByOS(rlist):
2331     """Remaps a per-node return list into an a per-os per-node dictionary
2332
2333     @param rlist: a map with node names as keys and OS objects as values
2334
2335     @rtype: dict
2336     @return: a dictionary with osnames as keys and as value another map, with
2337         nodes as keys and tuples of (path, status, diagnose) as values, eg::
2338
2339           {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2340                                      (/srv/..., False, "invalid api")],
2341                            "node2": [(/srv/..., True, "")]}
2342           }
2343
2344     """
2345     all_os = {}
2346     # we build here the list of nodes that didn't fail the RPC (at RPC
2347     # level), so that nodes with a non-responding node daemon don't
2348     # make all OSes invalid
2349     good_nodes = [node_name for node_name in rlist
2350                   if not rlist[node_name].fail_msg]
2351     for node_name, nr in rlist.items():
2352       if nr.fail_msg or not nr.payload:
2353         continue
2354       for name, path, status, diagnose, variants in nr.payload:
2355         if name not in all_os:
2356           # build a list of nodes for this os containing empty lists
2357           # for each node in node_list
2358           all_os[name] = {}
2359           for nname in good_nodes:
2360             all_os[name][nname] = []
2361         all_os[name][node_name].append((path, status, diagnose, variants))
2362     return all_os
2363
2364   def Exec(self, feedback_fn):
2365     """Compute the list of OSes.
2366
2367     """
2368     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2369     node_data = self.rpc.call_os_diagnose(valid_nodes)
2370     pol = self._DiagnoseByOS(node_data)
2371     output = []
2372     calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2373     calc_variants = "variants" in self.op.output_fields
2374
2375     for os_name, os_data in pol.items():
2376       row = []
2377       if calc_valid:
2378         valid = True
2379         variants = None
2380         for osl in os_data.values():
2381           valid = valid and osl and osl[0][1]
2382           if not valid:
2383             variants = None
2384             break
2385           if calc_variants:
2386             node_variants = osl[0][3]
2387             if variants is None:
2388               variants = node_variants
2389             else:
2390               variants = [v for v in variants if v in node_variants]
2391
2392       for field in self.op.output_fields:
2393         if field == "name":
2394           val = os_name
2395         elif field == "valid":
2396           val = valid
2397         elif field == "node_status":
2398           # this is just a copy of the dict
2399           val = {}
2400           for node_name, nos_list in os_data.items():
2401             val[node_name] = nos_list
2402         elif field == "variants":
2403           val =  variants
2404         else:
2405           raise errors.ParameterError(field)
2406         row.append(val)
2407       output.append(row)
2408
2409     return output
2410
2411
2412 class LURemoveNode(LogicalUnit):
2413   """Logical unit for removing a node.
2414
2415   """
2416   HPATH = "node-remove"
2417   HTYPE = constants.HTYPE_NODE
2418   _OP_REQP = ["node_name"]
2419
2420   def BuildHooksEnv(self):
2421     """Build hooks env.
2422
2423     This doesn't run on the target node in the pre phase as a failed
2424     node would then be impossible to remove.
2425
2426     """
2427     env = {
2428       "OP_TARGET": self.op.node_name,
2429       "NODE_NAME": self.op.node_name,
2430       }
2431     all_nodes = self.cfg.GetNodeList()
2432     try:
2433       all_nodes.remove(self.op.node_name)
2434     except ValueError:
2435       logging.warning("Node %s which is about to be removed not found"
2436                       " in the all nodes list", self.op.node_name)
2437     return env, all_nodes, all_nodes
2438
2439   def CheckPrereq(self):
2440     """Check prerequisites.
2441
2442     This checks:
2443      - the node exists in the configuration
2444      - it does not have primary or secondary instances
2445      - it's not the master
2446
2447     Any errors are signaled by raising errors.OpPrereqError.
2448
2449     """
2450     node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2451     if node is None:
2452       raise errors.OpPrereqError("Node '%s' is unknown." % self.op.node_name,
2453                                  errors.ECODE_NOENT)
2454
2455     instance_list = self.cfg.GetInstanceList()
2456
2457     masternode = self.cfg.GetMasterNode()
2458     if node.name == masternode:
2459       raise errors.OpPrereqError("Node is the master node,"
2460                                  " you need to failover first.",
2461                                  errors.ECODE_INVAL)
2462
2463     for instance_name in instance_list:
2464       instance = self.cfg.GetInstanceInfo(instance_name)
2465       if node.name in instance.all_nodes:
2466         raise errors.OpPrereqError("Instance %s is still running on the node,"
2467                                    " please remove first." % instance_name,
2468                                    errors.ECODE_INVAL)
2469     self.op.node_name = node.name
2470     self.node = node
2471
2472   def Exec(self, feedback_fn):
2473     """Removes the node from the cluster.
2474
2475     """
2476     node = self.node
2477     logging.info("Stopping the node daemon and removing configs from node %s",
2478                  node.name)
2479
2480     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2481
2482     # Promote nodes to master candidate as needed
2483     _AdjustCandidatePool(self, exceptions=[node.name])
2484     self.context.RemoveNode(node.name)
2485
2486     # Run post hooks on the node before it's removed
2487     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2488     try:
2489       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2490     except:
2491       # pylint: disable-msg=W0702
2492       self.LogWarning("Errors occurred running hooks on %s" % node.name)
2493
2494     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2495     msg = result.fail_msg
2496     if msg:
2497       self.LogWarning("Errors encountered on the remote node while leaving"
2498                       " the cluster: %s", msg)
2499
2500
2501 class LUQueryNodes(NoHooksLU):
2502   """Logical unit for querying nodes.
2503
2504   """
2505   # pylint: disable-msg=W0142
2506   _OP_REQP = ["output_fields", "names", "use_locking"]
2507   REQ_BGL = False
2508
2509   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2510                     "master_candidate", "offline", "drained"]
2511
2512   _FIELDS_DYNAMIC = utils.FieldSet(
2513     "dtotal", "dfree",
2514     "mtotal", "mnode", "mfree",
2515     "bootid",
2516     "ctotal", "cnodes", "csockets",
2517     )
2518
2519   _FIELDS_STATIC = utils.FieldSet(*[
2520     "pinst_cnt", "sinst_cnt",
2521     "pinst_list", "sinst_list",
2522     "pip", "sip", "tags",
2523     "master",
2524     "role"] + _SIMPLE_FIELDS
2525     )
2526
2527   def ExpandNames(self):
2528     _CheckOutputFields(static=self._FIELDS_STATIC,
2529                        dynamic=self._FIELDS_DYNAMIC,
2530                        selected=self.op.output_fields)
2531
2532     self.needed_locks = {}
2533     self.share_locks[locking.LEVEL_NODE] = 1
2534
2535     if self.op.names:
2536       self.wanted = _GetWantedNodes(self, self.op.names)
2537     else:
2538       self.wanted = locking.ALL_SET
2539
2540     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2541     self.do_locking = self.do_node_query and self.op.use_locking
2542     if self.do_locking:
2543       # if we don't request only static fields, we need to lock the nodes
2544       self.needed_locks[locking.LEVEL_NODE] = self.wanted
2545
2546   def CheckPrereq(self):
2547     """Check prerequisites.
2548
2549     """
2550     # The validation of the node list is done in the _GetWantedNodes,
2551     # if non empty, and if empty, there's no validation to do
2552     pass
2553
2554   def Exec(self, feedback_fn):
2555     """Computes the list of nodes and their attributes.
2556
2557     """
2558     all_info = self.cfg.GetAllNodesInfo()
2559     if self.do_locking:
2560       nodenames = self.acquired_locks[locking.LEVEL_NODE]
2561     elif self.wanted != locking.ALL_SET:
2562       nodenames = self.wanted
2563       missing = set(nodenames).difference(all_info.keys())
2564       if missing:
2565         raise errors.OpExecError(
2566           "Some nodes were removed before retrieving their data: %s" % missing)
2567     else:
2568       nodenames = all_info.keys()
2569
2570     nodenames = utils.NiceSort(nodenames)
2571     nodelist = [all_info[name] for name in nodenames]
2572
2573     # begin data gathering
2574
2575     if self.do_node_query:
2576       live_data = {}
2577       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2578                                           self.cfg.GetHypervisorType())
2579       for name in nodenames:
2580         nodeinfo = node_data[name]
2581         if not nodeinfo.fail_msg and nodeinfo.payload:
2582           nodeinfo = nodeinfo.payload
2583           fn = utils.TryConvert
2584           live_data[name] = {
2585             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2586             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2587             "mfree": fn(int, nodeinfo.get('memory_free', None)),
2588             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2589             "dfree": fn(int, nodeinfo.get('vg_free', None)),
2590             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2591             "bootid": nodeinfo.get('bootid', None),
2592             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2593             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2594             }
2595         else:
2596           live_data[name] = {}
2597     else:
2598       live_data = dict.fromkeys(nodenames, {})
2599
2600     node_to_primary = dict([(name, set()) for name in nodenames])
2601     node_to_secondary = dict([(name, set()) for name in nodenames])
2602
2603     inst_fields = frozenset(("pinst_cnt", "pinst_list",
2604                              "sinst_cnt", "sinst_list"))
2605     if inst_fields & frozenset(self.op.output_fields):
2606       inst_data = self.cfg.GetAllInstancesInfo()
2607
2608       for inst in inst_data.values():
2609         if inst.primary_node in node_to_primary:
2610           node_to_primary[inst.primary_node].add(inst.name)
2611         for secnode in inst.secondary_nodes:
2612           if secnode in node_to_secondary:
2613             node_to_secondary[secnode].add(inst.name)
2614
2615     master_node = self.cfg.GetMasterNode()
2616
2617     # end data gathering
2618
2619     output = []
2620     for node in nodelist:
2621       node_output = []
2622       for field in self.op.output_fields:
2623         if field in self._SIMPLE_FIELDS:
2624           val = getattr(node, field)
2625         elif field == "pinst_list":
2626           val = list(node_to_primary[node.name])
2627         elif field == "sinst_list":
2628           val = list(node_to_secondary[node.name])
2629         elif field == "pinst_cnt":
2630           val = len(node_to_primary[node.name])
2631         elif field == "sinst_cnt":
2632           val = len(node_to_secondary[node.name])
2633         elif field == "pip":
2634           val = node.primary_ip
2635         elif field == "sip":
2636           val = node.secondary_ip
2637         elif field == "tags":
2638           val = list(node.GetTags())
2639         elif field == "master":
2640           val = node.name == master_node
2641         elif self._FIELDS_DYNAMIC.Matches(field):
2642           val = live_data[node.name].get(field, None)
2643         elif field == "role":
2644           if node.name == master_node:
2645             val = "M"
2646           elif node.master_candidate:
2647             val = "C"
2648           elif node.drained:
2649             val = "D"
2650           elif node.offline:
2651             val = "O"
2652           else:
2653             val = "R"
2654         else:
2655           raise errors.ParameterError(field)
2656         node_output.append(val)
2657       output.append(node_output)
2658
2659     return output
2660
2661
2662 class LUQueryNodeVolumes(NoHooksLU):
2663   """Logical unit for getting volumes on node(s).
2664
2665   """
2666   _OP_REQP = ["nodes", "output_fields"]
2667   REQ_BGL = False
2668   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2669   _FIELDS_STATIC = utils.FieldSet("node")
2670
2671   def ExpandNames(self):
2672     _CheckOutputFields(static=self._FIELDS_STATIC,
2673                        dynamic=self._FIELDS_DYNAMIC,
2674                        selected=self.op.output_fields)
2675
2676     self.needed_locks = {}
2677     self.share_locks[locking.LEVEL_NODE] = 1
2678     if not self.op.nodes:
2679       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2680     else:
2681       self.needed_locks[locking.LEVEL_NODE] = \
2682         _GetWantedNodes(self, self.op.nodes)
2683
2684   def CheckPrereq(self):
2685     """Check prerequisites.
2686
2687     This checks that the fields required are valid output fields.
2688
2689     """
2690     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2691
2692   def Exec(self, feedback_fn):
2693     """Computes the list of nodes and their attributes.
2694
2695     """
2696     nodenames = self.nodes
2697     volumes = self.rpc.call_node_volumes(nodenames)
2698
2699     ilist = [self.cfg.GetInstanceInfo(iname) for iname
2700              in self.cfg.GetInstanceList()]
2701
2702     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2703
2704     output = []
2705     for node in nodenames:
2706       nresult = volumes[node]
2707       if nresult.offline:
2708         continue
2709       msg = nresult.fail_msg
2710       if msg:
2711         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2712         continue
2713
2714       node_vols = nresult.payload[:]
2715       node_vols.sort(key=lambda vol: vol['dev'])
2716
2717       for vol in node_vols:
2718         node_output = []
2719         for field in self.op.output_fields:
2720           if field == "node":
2721             val = node
2722           elif field == "phys":
2723             val = vol['dev']
2724           elif field == "vg":
2725             val = vol['vg']
2726           elif field == "name":
2727             val = vol['name']
2728           elif field == "size":
2729             val = int(float(vol['size']))
2730           elif field == "instance":
2731             for inst in ilist:
2732               if node not in lv_by_node[inst]:
2733                 continue
2734               if vol['name'] in lv_by_node[inst][node]:
2735                 val = inst.name
2736                 break
2737             else:
2738               val = '-'
2739           else:
2740             raise errors.ParameterError(field)
2741           node_output.append(str(val))
2742
2743         output.append(node_output)
2744
2745     return output
2746
2747
2748 class LUQueryNodeStorage(NoHooksLU):
2749   """Logical unit for getting information on storage units on node(s).
2750
2751   """
2752   _OP_REQP = ["nodes", "storage_type", "output_fields"]
2753   REQ_BGL = False
2754   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2755
2756   def ExpandNames(self):
2757     storage_type = self.op.storage_type
2758
2759     if storage_type not in constants.VALID_STORAGE_TYPES:
2760       raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2761                                  errors.ECODE_INVAL)
2762
2763     _CheckOutputFields(static=self._FIELDS_STATIC,
2764                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2765                        selected=self.op.output_fields)
2766
2767     self.needed_locks = {}
2768     self.share_locks[locking.LEVEL_NODE] = 1
2769
2770     if self.op.nodes:
2771       self.needed_locks[locking.LEVEL_NODE] = \
2772         _GetWantedNodes(self, self.op.nodes)
2773     else:
2774       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2775
2776   def CheckPrereq(self):
2777     """Check prerequisites.
2778
2779     This checks that the fields required are valid output fields.
2780
2781     """
2782     self.op.name = getattr(self.op, "name", None)
2783
2784     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2785
2786   def Exec(self, feedback_fn):
2787     """Computes the list of nodes and their attributes.
2788
2789     """
2790     # Always get name to sort by
2791     if constants.SF_NAME in self.op.output_fields:
2792       fields = self.op.output_fields[:]
2793     else:
2794       fields = [constants.SF_NAME] + self.op.output_fields
2795
2796     # Never ask for node or type as it's only known to the LU
2797     for extra in [constants.SF_NODE, constants.SF_TYPE]:
2798       while extra in fields:
2799         fields.remove(extra)
2800
2801     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2802     name_idx = field_idx[constants.SF_NAME]
2803
2804     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2805     data = self.rpc.call_storage_list(self.nodes,
2806                                       self.op.storage_type, st_args,
2807                                       self.op.name, fields)
2808
2809     result = []
2810
2811     for node in utils.NiceSort(self.nodes):
2812       nresult = data[node]
2813       if nresult.offline:
2814         continue
2815
2816       msg = nresult.fail_msg
2817       if msg:
2818         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2819         continue
2820
2821       rows = dict([(row[name_idx], row) for row in nresult.payload])
2822
2823       for name in utils.NiceSort(rows.keys()):
2824         row = rows[name]
2825
2826         out = []
2827
2828         for field in self.op.output_fields:
2829           if field == constants.SF_NODE:
2830             val = node
2831           elif field == constants.SF_TYPE:
2832             val = self.op.storage_type
2833           elif field in field_idx:
2834             val = row[field_idx[field]]
2835           else:
2836             raise errors.ParameterError(field)
2837
2838           out.append(val)
2839
2840         result.append(out)
2841
2842     return result
2843
2844
2845 class LUModifyNodeStorage(NoHooksLU):
2846   """Logical unit for modifying a storage volume on a node.
2847
2848   """
2849   _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2850   REQ_BGL = False
2851
2852   def CheckArguments(self):
2853     node_name = self.cfg.ExpandNodeName(self.op.node_name)
2854     if node_name is None:
2855       raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
2856                                  errors.ECODE_NOENT)
2857
2858     self.op.node_name = node_name
2859
2860     storage_type = self.op.storage_type
2861     if storage_type not in constants.VALID_STORAGE_TYPES:
2862       raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2863                                  errors.ECODE_INVAL)
2864
2865   def ExpandNames(self):
2866     self.needed_locks = {
2867       locking.LEVEL_NODE: self.op.node_name,
2868       }
2869
2870   def CheckPrereq(self):
2871     """Check prerequisites.
2872
2873     """
2874     storage_type = self.op.storage_type
2875
2876     try:
2877       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2878     except KeyError:
2879       raise errors.OpPrereqError("Storage units of type '%s' can not be"
2880                                  " modified" % storage_type,
2881                                  errors.ECODE_INVAL)
2882
2883     diff = set(self.op.changes.keys()) - modifiable
2884     if diff:
2885       raise errors.OpPrereqError("The following fields can not be modified for"
2886                                  " storage units of type '%s': %r" %
2887                                  (storage_type, list(diff)),
2888                                  errors.ECODE_INVAL)
2889
2890   def Exec(self, feedback_fn):
2891     """Computes the list of nodes and their attributes.
2892
2893     """
2894     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2895     result = self.rpc.call_storage_modify(self.op.node_name,
2896                                           self.op.storage_type, st_args,
2897                                           self.op.name, self.op.changes)
2898     result.Raise("Failed to modify storage unit '%s' on %s" %
2899                  (self.op.name, self.op.node_name))
2900
2901
2902 class LUAddNode(LogicalUnit):
2903   """Logical unit for adding node to the cluster.
2904
2905   """
2906   HPATH = "node-add"
2907   HTYPE = constants.HTYPE_NODE
2908   _OP_REQP = ["node_name"]
2909
2910   def BuildHooksEnv(self):
2911     """Build hooks env.
2912
2913     This will run on all nodes before, and on all nodes + the new node after.
2914
2915     """
2916     env = {
2917       "OP_TARGET": self.op.node_name,
2918       "NODE_NAME": self.op.node_name,
2919       "NODE_PIP": self.op.primary_ip,
2920       "NODE_SIP": self.op.secondary_ip,
2921       }
2922     nodes_0 = self.cfg.GetNodeList()
2923     nodes_1 = nodes_0 + [self.op.node_name, ]
2924     return env, nodes_0, nodes_1
2925
2926   def CheckPrereq(self):
2927     """Check prerequisites.
2928
2929     This checks:
2930      - the new node is not already in the config
2931      - it is resolvable
2932      - its parameters (single/dual homed) matches the cluster
2933
2934     Any errors are signaled by raising errors.OpPrereqError.
2935
2936     """
2937     node_name = self.op.node_name
2938     cfg = self.cfg
2939
2940     dns_data = utils.GetHostInfo(node_name)
2941
2942     node = dns_data.name
2943     primary_ip = self.op.primary_ip = dns_data.ip
2944     secondary_ip = getattr(self.op, "secondary_ip", None)
2945     if secondary_ip is None:
2946       secondary_ip = primary_ip
2947     if not utils.IsValidIP(secondary_ip):
2948       raise errors.OpPrereqError("Invalid secondary IP given",
2949                                  errors.ECODE_INVAL)
2950     self.op.secondary_ip = secondary_ip
2951
2952     node_list = cfg.GetNodeList()
2953     if not self.op.readd and node in node_list:
2954       raise errors.OpPrereqError("Node %s is already in the configuration" %
2955                                  node, errors.ECODE_EXISTS)
2956     elif self.op.readd and node not in node_list:
2957       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
2958                                  errors.ECODE_NOENT)
2959
2960     for existing_node_name in node_list:
2961       existing_node = cfg.GetNodeInfo(existing_node_name)
2962
2963       if self.op.readd and node == existing_node_name:
2964         if (existing_node.primary_ip != primary_ip or
2965             existing_node.secondary_ip != secondary_ip):
2966           raise errors.OpPrereqError("Readded node doesn't have the same IP"
2967                                      " address configuration as before",
2968                                      errors.ECODE_INVAL)
2969         continue
2970
2971       if (existing_node.primary_ip == primary_ip or
2972           existing_node.secondary_ip == primary_ip or
2973           existing_node.primary_ip == secondary_ip or
2974           existing_node.secondary_ip == secondary_ip):
2975         raise errors.OpPrereqError("New node ip address(es) conflict with"
2976                                    " existing node %s" % existing_node.name,
2977                                    errors.ECODE_NOTUNIQUE)
2978
2979     # check that the type of the node (single versus dual homed) is the
2980     # same as for the master
2981     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2982     master_singlehomed = myself.secondary_ip == myself.primary_ip
2983     newbie_singlehomed = secondary_ip == primary_ip
2984     if master_singlehomed != newbie_singlehomed:
2985       if master_singlehomed:
2986         raise errors.OpPrereqError("The master has no private ip but the"
2987                                    " new node has one",
2988                                    errors.ECODE_INVAL)
2989       else:
2990         raise errors.OpPrereqError("The master has a private ip but the"
2991                                    " new node doesn't have one",
2992                                    errors.ECODE_INVAL)
2993
2994     # checks reachability
2995     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2996       raise errors.OpPrereqError("Node not reachable by ping",
2997                                  errors.ECODE_ENVIRON)
2998
2999     if not newbie_singlehomed:
3000       # check reachability from my secondary ip to newbie's secondary ip
3001       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3002                            source=myself.secondary_ip):
3003         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3004                                    " based ping to noded port",
3005                                    errors.ECODE_ENVIRON)
3006
3007     if self.op.readd:
3008       exceptions = [node]
3009     else:
3010       exceptions = []
3011
3012     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3013
3014     if self.op.readd:
3015       self.new_node = self.cfg.GetNodeInfo(node)
3016       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3017     else:
3018       self.new_node = objects.Node(name=node,
3019                                    primary_ip=primary_ip,
3020                                    secondary_ip=secondary_ip,
3021                                    master_candidate=self.master_candidate,
3022                                    offline=False, drained=False)
3023
3024   def Exec(self, feedback_fn):
3025     """Adds the new node to the cluster.
3026
3027     """
3028     new_node = self.new_node
3029     node = new_node.name
3030
3031     # for re-adds, reset the offline/drained/master-candidate flags;
3032     # we need to reset here, otherwise offline would prevent RPC calls
3033     # later in the procedure; this also means that if the re-add
3034     # fails, we are left with a non-offlined, broken node
3035     if self.op.readd:
3036       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3037       self.LogInfo("Readding a node, the offline/drained flags were reset")
3038       # if we demote the node, we do cleanup later in the procedure
3039       new_node.master_candidate = self.master_candidate
3040
3041     # notify the user about any possible mc promotion
3042     if new_node.master_candidate:
3043       self.LogInfo("Node will be a master candidate")
3044
3045     # check connectivity
3046     result = self.rpc.call_version([node])[node]
3047     result.Raise("Can't get version information from node %s" % node)
3048     if constants.PROTOCOL_VERSION == result.payload:
3049       logging.info("Communication to node %s fine, sw version %s match",
3050                    node, result.payload)
3051     else:
3052       raise errors.OpExecError("Version mismatch master version %s,"
3053                                " node version %s" %
3054                                (constants.PROTOCOL_VERSION, result.payload))
3055
3056     # setup ssh on node
3057     if self.cfg.GetClusterInfo().modify_ssh_setup:
3058       logging.info("Copy ssh key to node %s", node)
3059       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3060       keyarray = []
3061       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3062                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3063                   priv_key, pub_key]
3064
3065       for i in keyfiles:
3066         keyarray.append(utils.ReadFile(i))
3067
3068       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3069                                       keyarray[2], keyarray[3], keyarray[4],
3070                                       keyarray[5])
3071       result.Raise("Cannot transfer ssh keys to the new node")
3072
3073     # Add node to our /etc/hosts, and add key to known_hosts
3074     if self.cfg.GetClusterInfo().modify_etc_hosts:
3075       utils.AddHostToEtcHosts(new_node.name)
3076
3077     if new_node.secondary_ip != new_node.primary_ip:
3078       result = self.rpc.call_node_has_ip_address(new_node.name,
3079                                                  new_node.secondary_ip)
3080       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3081                    prereq=True, ecode=errors.ECODE_ENVIRON)
3082       if not result.payload:
3083         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3084                                  " you gave (%s). Please fix and re-run this"
3085                                  " command." % new_node.secondary_ip)
3086
3087     node_verify_list = [self.cfg.GetMasterNode()]
3088     node_verify_param = {
3089       constants.NV_NODELIST: [node],
3090       # TODO: do a node-net-test as well?
3091     }
3092
3093     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3094                                        self.cfg.GetClusterName())
3095     for verifier in node_verify_list:
3096       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3097       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3098       if nl_payload:
3099         for failed in nl_payload:
3100           feedback_fn("ssh/hostname verification failed"
3101                       " (checking from %s): %s" %
3102                       (verifier, nl_payload[failed]))
3103         raise errors.OpExecError("ssh/hostname verification failed.")
3104
3105     if self.op.readd:
3106       _RedistributeAncillaryFiles(self)
3107       self.context.ReaddNode(new_node)
3108       # make sure we redistribute the config
3109       self.cfg.Update(new_node, feedback_fn)
3110       # and make sure the new node will not have old files around
3111       if not new_node.master_candidate:
3112         result = self.rpc.call_node_demote_from_mc(new_node.name)
3113         msg = result.fail_msg
3114         if msg:
3115           self.LogWarning("Node failed to demote itself from master"
3116                           " candidate status: %s" % msg)
3117     else:
3118       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3119       self.context.AddNode(new_node, self.proc.GetECId())
3120
3121
3122 class LUSetNodeParams(LogicalUnit):
3123   """Modifies the parameters of a node.
3124
3125   """
3126   HPATH = "node-modify"
3127   HTYPE = constants.HTYPE_NODE
3128   _OP_REQP = ["node_name"]
3129   REQ_BGL = False
3130
3131   def CheckArguments(self):
3132     node_name = self.cfg.ExpandNodeName(self.op.node_name)
3133     if node_name is None:
3134       raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
3135                                  errors.ECODE_INVAL)
3136     self.op.node_name = node_name
3137     _CheckBooleanOpField(self.op, 'master_candidate')
3138     _CheckBooleanOpField(self.op, 'offline')
3139     _CheckBooleanOpField(self.op, 'drained')
3140     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3141     if all_mods.count(None) == 3:
3142       raise errors.OpPrereqError("Please pass at least one modification",
3143                                  errors.ECODE_INVAL)
3144     if all_mods.count(True) > 1:
3145       raise errors.OpPrereqError("Can't set the node into more than one"
3146                                  " state at the same time",
3147                                  errors.ECODE_INVAL)
3148
3149   def ExpandNames(self):
3150     self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3151
3152   def BuildHooksEnv(self):
3153     """Build hooks env.
3154
3155     This runs on the master node.
3156
3157     """
3158     env = {
3159       "OP_TARGET": self.op.node_name,
3160       "MASTER_CANDIDATE": str(self.op.master_candidate),
3161       "OFFLINE": str(self.op.offline),
3162       "DRAINED": str(self.op.drained),
3163       }
3164     nl = [self.cfg.GetMasterNode(),
3165           self.op.node_name]
3166     return env, nl, nl
3167
3168   def CheckPrereq(self):
3169     """Check prerequisites.
3170
3171     This only checks the instance list against the existing names.
3172
3173     """
3174     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3175
3176     if (self.op.master_candidate is not None or
3177         self.op.drained is not None or
3178         self.op.offline is not None):
3179       # we can't change the master's node flags
3180       if self.op.node_name == self.cfg.GetMasterNode():
3181         raise errors.OpPrereqError("The master role can be changed"
3182                                    " only via masterfailover",
3183                                    errors.ECODE_INVAL)
3184
3185     # Boolean value that tells us whether we're offlining or draining the node
3186     offline_or_drain = self.op.offline == True or self.op.drained == True
3187     deoffline_or_drain = self.op.offline == False or self.op.drained == False
3188
3189     if (node.master_candidate and
3190         (self.op.master_candidate == False or offline_or_drain)):
3191       cp_size = self.cfg.GetClusterInfo().candidate_pool_size
3192       mc_now, mc_should, mc_max = self.cfg.GetMasterCandidateStats()
3193       if mc_now <= cp_size:
3194         msg = ("Not enough master candidates (desired"
3195                " %d, new value will be %d)" % (cp_size, mc_now-1))
3196         # Only allow forcing the operation if it's an offline/drain operation,
3197         # and we could not possibly promote more nodes.
3198         # FIXME: this can still lead to issues if in any way another node which
3199         # could be promoted appears in the meantime.
3200         if self.op.force and offline_or_drain and mc_should == mc_max:
3201           self.LogWarning(msg)
3202         else:
3203           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
3204
3205     if (self.op.master_candidate == True and
3206         ((node.offline and not self.op.offline == False) or
3207          (node.drained and not self.op.drained == False))):
3208       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3209                                  " to master_candidate" % node.name,
3210                                  errors.ECODE_INVAL)
3211
3212     # If we're being deofflined/drained, we'll MC ourself if needed
3213     if (deoffline_or_drain and not offline_or_drain and not
3214         self.op.master_candidate == True and not node.master_candidate):
3215       self.op.master_candidate = _DecideSelfPromotion(self)
3216       if self.op.master_candidate:
3217         self.LogInfo("Autopromoting node to master candidate")
3218
3219     return
3220
3221   def Exec(self, feedback_fn):
3222     """Modifies a node.
3223
3224     """
3225     node = self.node
3226
3227     result = []
3228     changed_mc = False
3229
3230     if self.op.offline is not None:
3231       node.offline = self.op.offline
3232       result.append(("offline", str(self.op.offline)))
3233       if self.op.offline == True:
3234         if node.master_candidate:
3235           node.master_candidate = False
3236           changed_mc = True
3237           result.append(("master_candidate", "auto-demotion due to offline"))
3238         if node.drained:
3239           node.drained = False
3240           result.append(("drained", "clear drained status due to offline"))
3241
3242     if self.op.master_candidate is not None:
3243       node.master_candidate = self.op.master_candidate
3244       changed_mc = True
3245       result.append(("master_candidate", str(self.op.master_candidate)))
3246       if self.op.master_candidate == False:
3247         rrc = self.rpc.call_node_demote_from_mc(node.name)
3248         msg = rrc.fail_msg
3249         if msg:
3250           self.LogWarning("Node failed to demote itself: %s" % msg)
3251
3252     if self.op.drained is not None:
3253       node.drained = self.op.drained
3254       result.append(("drained", str(self.op.drained)))
3255       if self.op.drained == True:
3256         if node.master_candidate:
3257           node.master_candidate = False
3258           changed_mc = True
3259           result.append(("master_candidate", "auto-demotion due to drain"))
3260           rrc = self.rpc.call_node_demote_from_mc(node.name)
3261           msg = rrc.fail_msg
3262           if msg:
3263             self.LogWarning("Node failed to demote itself: %s" % msg)
3264         if node.offline:
3265           node.offline = False
3266           result.append(("offline", "clear offline status due to drain"))
3267
3268     # this will trigger configuration file update, if needed
3269     self.cfg.Update(node, feedback_fn)
3270     # this will trigger job queue propagation or cleanup
3271     if changed_mc:
3272       self.context.ReaddNode(node)
3273
3274     return result
3275
3276
3277 class LUPowercycleNode(NoHooksLU):
3278   """Powercycles a node.
3279
3280   """
3281   _OP_REQP = ["node_name", "force"]
3282   REQ_BGL = False
3283
3284   def CheckArguments(self):
3285     node_name = self.cfg.ExpandNodeName(self.op.node_name)
3286     if node_name is None:
3287       raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
3288                                  errors.ECODE_NOENT)
3289     self.op.node_name = node_name
3290     if node_name == self.cfg.GetMasterNode() and not self.op.force:
3291       raise errors.OpPrereqError("The node is the master and the force"
3292                                  " parameter was not set",
3293                                  errors.ECODE_INVAL)
3294
3295   def ExpandNames(self):
3296     """Locking for PowercycleNode.
3297
3298     This is a last-resort option and shouldn't block on other
3299     jobs. Therefore, we grab no locks.
3300
3301     """
3302     self.needed_locks = {}
3303
3304   def CheckPrereq(self):
3305     """Check prerequisites.
3306
3307     This LU has no prereqs.
3308
3309     """
3310     pass
3311
3312   def Exec(self, feedback_fn):
3313     """Reboots a node.
3314
3315     """
3316     result = self.rpc.call_node_powercycle(self.op.node_name,
3317                                            self.cfg.GetHypervisorType())
3318     result.Raise("Failed to schedule the reboot")
3319     return result.payload
3320
3321
3322 class LUQueryClusterInfo(NoHooksLU):
3323   """Query cluster configuration.
3324
3325   """
3326   _OP_REQP = []
3327   REQ_BGL = False
3328
3329   def ExpandNames(self):
3330     self.needed_locks = {}
3331
3332   def CheckPrereq(self):
3333     """No prerequsites needed for this LU.
3334
3335     """
3336     pass
3337
3338   def Exec(self, feedback_fn):
3339     """Return cluster config.
3340
3341     """
3342     cluster = self.cfg.GetClusterInfo()
3343     result = {
3344       "software_version": constants.RELEASE_VERSION,
3345       "protocol_version": constants.PROTOCOL_VERSION,
3346       "config_version": constants.CONFIG_VERSION,
3347       "os_api_version": max(constants.OS_API_VERSIONS),
3348       "export_version": constants.EXPORT_VERSION,
3349       "architecture": (platform.architecture()[0], platform.machine()),
3350       "name": cluster.cluster_name,
3351       "master": cluster.master_node,
3352       "default_hypervisor": cluster.enabled_hypervisors[0],
3353       "enabled_hypervisors": cluster.enabled_hypervisors,
3354       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3355                         for hypervisor_name in cluster.enabled_hypervisors]),
3356       "beparams": cluster.beparams,
3357       "nicparams": cluster.nicparams,
3358       "candidate_pool_size": cluster.candidate_pool_size,
3359       "master_netdev": cluster.master_netdev,
3360       "volume_group_name": cluster.volume_group_name,
3361       "file_storage_dir": cluster.file_storage_dir,
3362       "ctime": cluster.ctime,
3363       "mtime": cluster.mtime,
3364       "uuid": cluster.uuid,
3365       "tags": list(cluster.GetTags()),
3366       }
3367
3368     return result
3369
3370
3371 class LUQueryConfigValues(NoHooksLU):
3372   """Return configuration values.
3373
3374   """
3375   _OP_REQP = []
3376   REQ_BGL = False
3377   _FIELDS_DYNAMIC = utils.FieldSet()
3378   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3379                                   "watcher_pause")
3380
3381   def ExpandNames(self):
3382     self.needed_locks = {}
3383
3384     _CheckOutputFields(static=self._FIELDS_STATIC,
3385                        dynamic=self._FIELDS_DYNAMIC,
3386                        selected=self.op.output_fields)
3387
3388   def CheckPrereq(self):
3389     """No prerequisites.
3390
3391     """
3392     pass
3393
3394   def Exec(self, feedback_fn):
3395     """Dump a representation of the cluster config to the standard output.
3396
3397     """
3398     values = []
3399     for field in self.op.output_fields:
3400       if field == "cluster_name":
3401         entry = self.cfg.GetClusterName()
3402       elif field == "master_node":
3403         entry = self.cfg.GetMasterNode()
3404       elif field == "drain_flag":
3405         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3406       elif field == "watcher_pause":
3407         return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3408       else:
3409         raise errors.ParameterError(field)
3410       values.append(entry)
3411     return values
3412
3413
3414 class LUActivateInstanceDisks(NoHooksLU):
3415   """Bring up an instance's disks.
3416
3417   """
3418   _OP_REQP = ["instance_name"]
3419   REQ_BGL = False
3420
3421   def ExpandNames(self):
3422     self._ExpandAndLockInstance()
3423     self.needed_locks[locking.LEVEL_NODE] = []
3424     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3425
3426   def DeclareLocks(self, level):
3427     if level == locking.LEVEL_NODE:
3428       self._LockInstancesNodes()
3429
3430   def CheckPrereq(self):
3431     """Check prerequisites.
3432
3433     This checks that the instance is in the cluster.
3434
3435     """
3436     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3437     assert self.instance is not None, \
3438       "Cannot retrieve locked instance %s" % self.op.instance_name
3439     _CheckNodeOnline(self, self.instance.primary_node)
3440     if not hasattr(self.op, "ignore_size"):
3441       self.op.ignore_size = False
3442
3443   def Exec(self, feedback_fn):
3444     """Activate the disks.
3445
3446     """
3447     disks_ok, disks_info = \
3448               _AssembleInstanceDisks(self, self.instance,
3449                                      ignore_size=self.op.ignore_size)
3450     if not disks_ok:
3451       raise errors.OpExecError("Cannot activate block devices")
3452
3453     return disks_info
3454
3455
3456 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3457                            ignore_size=False):
3458   """Prepare the block devices for an instance.
3459
3460   This sets up the block devices on all nodes.
3461
3462   @type lu: L{LogicalUnit}
3463   @param lu: the logical unit on whose behalf we execute
3464   @type instance: L{objects.Instance}
3465   @param instance: the instance for whose disks we assemble
3466   @type ignore_secondaries: boolean
3467   @param ignore_secondaries: if true, errors on secondary nodes
3468       won't result in an error return from the function
3469   @type ignore_size: boolean
3470   @param ignore_size: if true, the current known size of the disk
3471       will not be used during the disk activation, useful for cases
3472       when the size is wrong
3473   @return: False if the operation failed, otherwise a list of
3474       (host, instance_visible_name, node_visible_name)
3475       with the mapping from node devices to instance devices
3476
3477   """
3478   device_info = []
3479   disks_ok = True
3480   iname = instance.name
3481   # With the two passes mechanism we try to reduce the window of
3482   # opportunity for the race condition of switching DRBD to primary
3483   # before handshaking occured, but we do not eliminate it
3484
3485   # The proper fix would be to wait (with some limits) until the
3486   # connection has been made and drbd transitions from WFConnection
3487   # into any other network-connected state (Connected, SyncTarget,
3488   # SyncSource, etc.)
3489
3490   # 1st pass, assemble on all nodes in secondary mode
3491   for inst_disk in instance.disks:
3492     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3493       if ignore_size:
3494         node_disk = node_disk.Copy()
3495         node_disk.UnsetSize()
3496       lu.cfg.SetDiskID(node_disk, node)
3497       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3498       msg = result.fail_msg
3499       if msg:
3500         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3501                            " (is_primary=False, pass=1): %s",
3502                            inst_disk.iv_name, node, msg)
3503         if not ignore_secondaries:
3504           disks_ok = False
3505
3506   # FIXME: race condition on drbd migration to primary
3507
3508   # 2nd pass, do only the primary node
3509   for inst_disk in instance.disks:
3510     dev_path = None
3511
3512     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3513       if node != instance.primary_node:
3514         continue
3515       if ignore_size:
3516         node_disk = node_disk.Copy()
3517         node_disk.UnsetSize()
3518       lu.cfg.SetDiskID(node_disk, node)
3519       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3520       msg = result.fail_msg
3521       if msg:
3522         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3523                            " (is_primary=True, pass=2): %s",
3524                            inst_disk.iv_name, node, msg)
3525         disks_ok = False
3526       else:
3527         dev_path = result.payload
3528
3529     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3530
3531   # leave the disks configured for the primary node
3532   # this is a workaround that would be fixed better by
3533   # improving the logical/physical id handling
3534   for disk in instance.disks:
3535     lu.cfg.SetDiskID(disk, instance.primary_node)
3536
3537   return disks_ok, device_info
3538
3539
3540 def _StartInstanceDisks(lu, instance, force):
3541   """Start the disks of an instance.
3542
3543   """
3544   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3545                                            ignore_secondaries=force)
3546   if not disks_ok:
3547     _ShutdownInstanceDisks(lu, instance)
3548     if force is not None and not force:
3549       lu.proc.LogWarning("", hint="If the message above refers to a"
3550                          " secondary node,"
3551                          " you can retry the operation using '--force'.")
3552     raise errors.OpExecError("Disk consistency error")
3553
3554
3555 class LUDeactivateInstanceDisks(NoHooksLU):
3556   """Shutdown an instance's disks.
3557
3558   """
3559   _OP_REQP = ["instance_name"]
3560   REQ_BGL = False
3561
3562   def ExpandNames(self):
3563     self._ExpandAndLockInstance()
3564     self.needed_locks[locking.LEVEL_NODE] = []
3565     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3566
3567   def DeclareLocks(self, level):
3568     if level == locking.LEVEL_NODE:
3569       self._LockInstancesNodes()
3570
3571   def CheckPrereq(self):
3572     """Check prerequisites.
3573
3574     This checks that the instance is in the cluster.
3575
3576     """
3577     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3578     assert self.instance is not None, \
3579       "Cannot retrieve locked instance %s" % self.op.instance_name
3580
3581   def Exec(self, feedback_fn):
3582     """Deactivate the disks
3583
3584     """
3585     instance = self.instance
3586     _SafeShutdownInstanceDisks(self, instance)
3587
3588
3589 def _SafeShutdownInstanceDisks(lu, instance):
3590   """Shutdown block devices of an instance.
3591
3592   This function checks if an instance is running, before calling
3593   _ShutdownInstanceDisks.
3594
3595   """
3596   pnode = instance.primary_node
3597   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3598   ins_l.Raise("Can't contact node %s" % pnode)
3599
3600   if instance.name in ins_l.payload:
3601     raise errors.OpExecError("Instance is running, can't shutdown"
3602                              " block devices.")
3603
3604   _ShutdownInstanceDisks(lu, instance)
3605
3606
3607 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3608   """Shutdown block devices of an instance.
3609
3610   This does the shutdown on all nodes of the instance.
3611
3612   If the ignore_primary is false, errors on the primary node are
3613   ignored.
3614
3615   """
3616   all_result = True
3617   for disk in instance.disks:
3618     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3619       lu.cfg.SetDiskID(top_disk, node)
3620       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3621       msg = result.fail_msg
3622       if msg:
3623         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3624                       disk.iv_name, node, msg)
3625         if not ignore_primary or node != instance.primary_node:
3626           all_result = False
3627   return all_result
3628
3629
3630 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3631   """Checks if a node has enough free memory.
3632
3633   This function check if a given node has the needed amount of free
3634   memory. In case the node has less memory or we cannot get the
3635   information from the node, this function raise an OpPrereqError
3636   exception.
3637
3638   @type lu: C{LogicalUnit}
3639   @param lu: a logical unit from which we get configuration data
3640   @type node: C{str}
3641   @param node: the node to check
3642   @type reason: C{str}
3643   @param reason: string to use in the error message
3644   @type requested: C{int}
3645   @param requested: the amount of memory in MiB to check for
3646   @type hypervisor_name: C{str}
3647   @param hypervisor_name: the hypervisor to ask for memory stats
3648   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3649       we cannot check the node
3650
3651   """
3652   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3653   nodeinfo[node].Raise("Can't get data from node %s" % node,
3654                        prereq=True, ecode=errors.ECODE_ENVIRON)
3655   free_mem = nodeinfo[node].payload.get('memory_free', None)
3656   if not isinstance(free_mem, int):
3657     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3658                                " was '%s'" % (node, free_mem),
3659                                errors.ECODE_ENVIRON)
3660   if requested > free_mem:
3661     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3662                                " needed %s MiB, available %s MiB" %
3663                                (node, reason, requested, free_mem),
3664                                errors.ECODE_NORES)
3665
3666
3667 class LUStartupInstance(LogicalUnit):
3668   """Starts an instance.
3669
3670   """
3671   HPATH = "instance-start"
3672   HTYPE = constants.HTYPE_INSTANCE
3673   _OP_REQP = ["instance_name", "force"]
3674   REQ_BGL = False
3675
3676   def ExpandNames(self):
3677     self._ExpandAndLockInstance()
3678
3679   def BuildHooksEnv(self):
3680     """Build hooks env.
3681
3682     This runs on master, primary and secondary nodes of the instance.
3683
3684     """
3685     env = {
3686       "FORCE": self.op.force,
3687       }
3688     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3689     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3690     return env, nl, nl
3691
3692   def CheckPrereq(self):
3693     """Check prerequisites.
3694
3695     This checks that the instance is in the cluster.
3696
3697     """
3698     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3699     assert self.instance is not None, \
3700       "Cannot retrieve locked instance %s" % self.op.instance_name
3701
3702     # extra beparams
3703     self.beparams = getattr(self.op, "beparams", {})
3704     if self.beparams:
3705       if not isinstance(self.beparams, dict):
3706         raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3707                                    " dict" % (type(self.beparams), ),
3708                                    errors.ECODE_INVAL)
3709       # fill the beparams dict
3710       utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3711       self.op.beparams = self.beparams
3712
3713     # extra hvparams
3714     self.hvparams = getattr(self.op, "hvparams", {})
3715     if self.hvparams:
3716       if not isinstance(self.hvparams, dict):
3717         raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3718                                    " dict" % (type(self.hvparams), ),
3719                                    errors.ECODE_INVAL)
3720
3721       # check hypervisor parameter syntax (locally)
3722       cluster = self.cfg.GetClusterInfo()
3723       utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3724       filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3725                                     instance.hvparams)
3726       filled_hvp.update(self.hvparams)
3727       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3728       hv_type.CheckParameterSyntax(filled_hvp)
3729       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3730       self.op.hvparams = self.hvparams
3731
3732     _CheckNodeOnline(self, instance.primary_node)
3733
3734     bep = self.cfg.GetClusterInfo().FillBE(instance)
3735     # check bridges existence
3736     _CheckInstanceBridgesExist(self, instance)
3737
3738     remote_info = self.rpc.call_instance_info(instance.primary_node,
3739                                               instance.name,
3740                                               instance.hypervisor)
3741     remote_info.Raise("Error checking node %s" % instance.primary_node,
3742                       prereq=True, ecode=errors.ECODE_ENVIRON)
3743     if not remote_info.payload: # not running already
3744       _CheckNodeFreeMemory(self, instance.primary_node,
3745                            "starting instance %s" % instance.name,
3746                            bep[constants.BE_MEMORY], instance.hypervisor)
3747
3748   def Exec(self, feedback_fn):
3749     """Start the instance.
3750
3751     """
3752     instance = self.instance
3753     force = self.op.force
3754
3755     self.cfg.MarkInstanceUp(instance.name)
3756
3757     node_current = instance.primary_node
3758
3759     _StartInstanceDisks(self, instance, force)
3760
3761     result = self.rpc.call_instance_start(node_current, instance,
3762                                           self.hvparams, self.beparams)
3763     msg = result.fail_msg
3764     if msg:
3765       _ShutdownInstanceDisks(self, instance)
3766       raise errors.OpExecError("Could not start instance: %s" % msg)
3767
3768
3769 class LURebootInstance(LogicalUnit):
3770   """Reboot an instance.
3771
3772   """
3773   HPATH = "instance-reboot"
3774   HTYPE = constants.HTYPE_INSTANCE
3775   _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3776   REQ_BGL = False
3777
3778   def CheckArguments(self):
3779     """Check the arguments.
3780
3781     """
3782     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3783                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
3784
3785   def ExpandNames(self):
3786     if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3787                                    constants.INSTANCE_REBOOT_HARD,
3788                                    constants.INSTANCE_REBOOT_FULL]:
3789       raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3790                                   (constants.INSTANCE_REBOOT_SOFT,
3791                                    constants.INSTANCE_REBOOT_HARD,
3792                                    constants.INSTANCE_REBOOT_FULL))
3793     self._ExpandAndLockInstance()
3794
3795   def BuildHooksEnv(self):
3796     """Build hooks env.
3797
3798     This runs on master, primary and secondary nodes of the instance.
3799
3800     """
3801     env = {
3802       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3803       "REBOOT_TYPE": self.op.reboot_type,
3804       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3805       }
3806     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3807     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3808     return env, nl, nl
3809
3810   def CheckPrereq(self):
3811     """Check prerequisites.
3812
3813     This checks that the instance is in the cluster.
3814
3815     """
3816     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3817     assert self.instance is not None, \
3818       "Cannot retrieve locked instance %s" % self.op.instance_name
3819
3820     _CheckNodeOnline(self, instance.primary_node)
3821
3822     # check bridges existence
3823     _CheckInstanceBridgesExist(self, instance)
3824
3825   def Exec(self, feedback_fn):
3826     """Reboot the instance.
3827
3828     """
3829     instance = self.instance
3830     ignore_secondaries = self.op.ignore_secondaries
3831     reboot_type = self.op.reboot_type
3832
3833     node_current = instance.primary_node
3834
3835     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3836                        constants.INSTANCE_REBOOT_HARD]:
3837       for disk in instance.disks:
3838         self.cfg.SetDiskID(disk, node_current)
3839       result = self.rpc.call_instance_reboot(node_current, instance,
3840                                              reboot_type,
3841                                              self.shutdown_timeout)
3842       result.Raise("Could not reboot instance")
3843     else:
3844       result = self.rpc.call_instance_shutdown(node_current, instance,
3845                                                self.shutdown_timeout)
3846       result.Raise("Could not shutdown instance for full reboot")
3847       _ShutdownInstanceDisks(self, instance)
3848       _StartInstanceDisks(self, instance, ignore_secondaries)
3849       result = self.rpc.call_instance_start(node_current, instance, None, None)
3850       msg = result.fail_msg
3851       if msg:
3852         _ShutdownInstanceDisks(self, instance)
3853         raise errors.OpExecError("Could not start instance for"
3854                                  " full reboot: %s" % msg)
3855
3856     self.cfg.MarkInstanceUp(instance.name)
3857
3858
3859 class LUShutdownInstance(LogicalUnit):
3860   """Shutdown an instance.
3861
3862   """
3863   HPATH = "instance-stop"
3864   HTYPE = constants.HTYPE_INSTANCE
3865   _OP_REQP = ["instance_name"]
3866   REQ_BGL = False
3867
3868   def CheckArguments(self):
3869     """Check the arguments.
3870
3871     """
3872     self.timeout = getattr(self.op, "timeout",
3873                            constants.DEFAULT_SHUTDOWN_TIMEOUT)
3874
3875   def ExpandNames(self):
3876     self._ExpandAndLockInstance()
3877
3878   def BuildHooksEnv(self):
3879     """Build hooks env.
3880
3881     This runs on master, primary and secondary nodes of the instance.
3882
3883     """
3884     env = _BuildInstanceHookEnvByObject(self, self.instance)
3885     env["TIMEOUT"] = self.timeout
3886     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3887     return env, nl, nl
3888
3889   def CheckPrereq(self):
3890     """Check prerequisites.
3891
3892     This checks that the instance is in the cluster.
3893
3894     """
3895     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3896     assert self.instance is not None, \
3897       "Cannot retrieve locked instance %s" % self.op.instance_name
3898     _CheckNodeOnline(self, self.instance.primary_node)
3899
3900   def Exec(self, feedback_fn):
3901     """Shutdown the instance.
3902
3903     """
3904     instance = self.instance
3905     node_current = instance.primary_node
3906     timeout = self.timeout
3907     self.cfg.MarkInstanceDown(instance.name)
3908     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
3909     msg = result.fail_msg
3910     if msg:
3911       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3912
3913     _ShutdownInstanceDisks(self, instance)
3914
3915
3916 class LUReinstallInstance(LogicalUnit):
3917   """Reinstall an instance.
3918
3919   """
3920   HPATH = "instance-reinstall"
3921   HTYPE = constants.HTYPE_INSTANCE
3922   _OP_REQP = ["instance_name"]
3923   REQ_BGL = False
3924
3925   def ExpandNames(self):
3926     self._ExpandAndLockInstance()
3927
3928   def BuildHooksEnv(self):
3929     """Build hooks env.
3930
3931     This runs on master, primary and secondary nodes of the instance.
3932
3933     """
3934     env = _BuildInstanceHookEnvByObject(self, self.instance)
3935     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3936     return env, nl, nl
3937
3938   def CheckPrereq(self):
3939     """Check prerequisites.
3940
3941     This checks that the instance is in the cluster and is not running.
3942
3943     """
3944     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3945     assert instance is not None, \
3946       "Cannot retrieve locked instance %s" % self.op.instance_name
3947     _CheckNodeOnline(self, instance.primary_node)
3948
3949     if instance.disk_template == constants.DT_DISKLESS:
3950       raise errors.OpPrereqError("Instance '%s' has no disks" %
3951                                  self.op.instance_name,
3952                                  errors.ECODE_INVAL)
3953     if instance.admin_up:
3954       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3955                                  self.op.instance_name,
3956                                  errors.ECODE_STATE)
3957     remote_info = self.rpc.call_instance_info(instance.primary_node,
3958                                               instance.name,
3959                                               instance.hypervisor)
3960     remote_info.Raise("Error checking node %s" % instance.primary_node,
3961                       prereq=True, ecode=errors.ECODE_ENVIRON)
3962     if remote_info.payload:
3963       raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3964                                  (self.op.instance_name,
3965                                   instance.primary_node),
3966                                  errors.ECODE_STATE)
3967
3968     self.op.os_type = getattr(self.op, "os_type", None)
3969     self.op.force_variant = getattr(self.op, "force_variant", False)
3970     if self.op.os_type is not None:
3971       # OS verification
3972       pnode = self.cfg.GetNodeInfo(
3973         self.cfg.ExpandNodeName(instance.primary_node))
3974       if pnode is None:
3975         raise errors.OpPrereqError("Primary node '%s' is unknown" %
3976                                    self.op.pnode, errors.ECODE_NOENT)
3977       result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3978       result.Raise("OS '%s' not in supported OS list for primary node %s" %
3979                    (self.op.os_type, pnode.name),
3980                    prereq=True, ecode=errors.ECODE_INVAL)
3981       if not self.op.force_variant:
3982         _CheckOSVariant(result.payload, self.op.os_type)
3983
3984     self.instance = instance
3985
3986   def Exec(self, feedback_fn):
3987     """Reinstall the instance.
3988
3989     """
3990     inst = self.instance
3991
3992     if self.op.os_type is not None:
3993       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3994       inst.os = self.op.os_type
3995       self.cfg.Update(inst, feedback_fn)
3996
3997     _StartInstanceDisks(self, inst, None)
3998     try:
3999       feedback_fn("Running the instance OS create scripts...")
4000       # FIXME: pass debug option from opcode to backend
4001       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True, 0)
4002       result.Raise("Could not install OS for instance %s on node %s" %
4003                    (inst.name, inst.primary_node))
4004     finally:
4005       _ShutdownInstanceDisks(self, inst)
4006
4007
4008 class LURecreateInstanceDisks(LogicalUnit):
4009   """Recreate an instance's missing disks.
4010
4011   """
4012   HPATH = "instance-recreate-disks"
4013   HTYPE = constants.HTYPE_INSTANCE
4014   _OP_REQP = ["instance_name", "disks"]
4015   REQ_BGL = False
4016
4017   def CheckArguments(self):
4018     """Check the arguments.
4019
4020     """
4021     if not isinstance(self.op.disks, list):
4022       raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4023     for item in self.op.disks:
4024       if (not isinstance(item, int) or
4025           item < 0):
4026         raise errors.OpPrereqError("Invalid disk specification '%s'" %
4027                                    str(item), errors.ECODE_INVAL)
4028
4029   def ExpandNames(self):
4030     self._ExpandAndLockInstance()
4031
4032   def BuildHooksEnv(self):
4033     """Build hooks env.
4034
4035     This runs on master, primary and secondary nodes of the instance.
4036
4037     """
4038     env = _BuildInstanceHookEnvByObject(self, self.instance)
4039     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4040     return env, nl, nl
4041
4042   def CheckPrereq(self):
4043     """Check prerequisites.
4044
4045     This checks that the instance is in the cluster and is not running.
4046
4047     """
4048     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4049     assert instance is not None, \
4050       "Cannot retrieve locked instance %s" % self.op.instance_name
4051     _CheckNodeOnline(self, instance.primary_node)
4052
4053     if instance.disk_template == constants.DT_DISKLESS:
4054       raise errors.OpPrereqError("Instance '%s' has no disks" %
4055                                  self.op.instance_name, errors.ECODE_INVAL)
4056     if instance.admin_up:
4057       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4058                                  self.op.instance_name, errors.ECODE_STATE)
4059     remote_info = self.rpc.call_instance_info(instance.primary_node,
4060                                               instance.name,
4061                                               instance.hypervisor)
4062     remote_info.Raise("Error checking node %s" % instance.primary_node,
4063                       prereq=True, ecode=errors.ECODE_ENVIRON)
4064     if remote_info.payload:
4065       raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4066                                  (self.op.instance_name,
4067                                   instance.primary_node), errors.ECODE_STATE)
4068
4069     if not self.op.disks:
4070       self.op.disks = range(len(instance.disks))
4071     else:
4072       for idx in self.op.disks:
4073         if idx >= len(instance.disks):
4074           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4075                                      errors.ECODE_INVAL)
4076
4077     self.instance = instance
4078
4079   def Exec(self, feedback_fn):
4080     """Recreate the disks.
4081
4082     """
4083     to_skip = []
4084     for idx, _ in enumerate(self.instance.disks):
4085       if idx not in self.op.disks: # disk idx has not been passed in
4086         to_skip.append(idx)
4087         continue
4088
4089     _CreateDisks(self, self.instance, to_skip=to_skip)
4090
4091
4092 class LURenameInstance(LogicalUnit):
4093   """Rename an instance.
4094
4095   """
4096   HPATH = "instance-rename"
4097   HTYPE = constants.HTYPE_INSTANCE
4098   _OP_REQP = ["instance_name", "new_name"]
4099
4100   def BuildHooksEnv(self):
4101     """Build hooks env.
4102
4103     This runs on master, primary and secondary nodes of the instance.
4104
4105     """
4106     env = _BuildInstanceHookEnvByObject(self, self.instance)
4107     env["INSTANCE_NEW_NAME"] = self.op.new_name
4108     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4109     return env, nl, nl
4110
4111   def CheckPrereq(self):
4112     """Check prerequisites.
4113
4114     This checks that the instance is in the cluster and is not running.
4115
4116     """
4117     instance = self.cfg.GetInstanceInfo(
4118       self.cfg.ExpandInstanceName(self.op.instance_name))
4119     if instance is None:
4120       raise errors.OpPrereqError("Instance '%s' not known" %
4121                                  self.op.instance_name, errors.ECODE_NOENT)
4122     _CheckNodeOnline(self, instance.primary_node)
4123
4124     if instance.admin_up:
4125       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4126                                  self.op.instance_name, errors.ECODE_STATE)
4127     remote_info = self.rpc.call_instance_info(instance.primary_node,
4128                                               instance.name,
4129                                               instance.hypervisor)
4130     remote_info.Raise("Error checking node %s" % instance.primary_node,
4131                       prereq=True, ecode=errors.ECODE_ENVIRON)
4132     if remote_info.payload:
4133       raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4134                                  (self.op.instance_name,
4135                                   instance.primary_node), errors.ECODE_STATE)
4136     self.instance = instance
4137
4138     # new name verification
4139     name_info = utils.GetHostInfo(self.op.new_name)
4140
4141     self.op.new_name = new_name = name_info.name
4142     instance_list = self.cfg.GetInstanceList()
4143     if new_name in instance_list:
4144       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4145                                  new_name, errors.ECODE_EXISTS)
4146
4147     if not getattr(self.op, "ignore_ip", False):
4148       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4149         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4150                                    (name_info.ip, new_name),
4151                                    errors.ECODE_NOTUNIQUE)
4152
4153
4154   def Exec(self, feedback_fn):
4155     """Reinstall the instance.
4156
4157     """
4158     inst = self.instance
4159     old_name = inst.name
4160
4161     if inst.disk_template == constants.DT_FILE:
4162       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4163
4164     self.cfg.RenameInstance(inst.name, self.op.new_name)
4165     # Change the instance lock. This is definitely safe while we hold the BGL
4166     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4167     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4168
4169     # re-read the instance from the configuration after rename
4170     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4171
4172     if inst.disk_template == constants.DT_FILE:
4173       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4174       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4175                                                      old_file_storage_dir,
4176                                                      new_file_storage_dir)
4177       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4178                    " (but the instance has been renamed in Ganeti)" %
4179                    (inst.primary_node, old_file_storage_dir,
4180                     new_file_storage_dir))
4181
4182     _StartInstanceDisks(self, inst, None)
4183     try:
4184       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4185                                                  old_name, 0)
4186       msg = result.fail_msg
4187       if msg:
4188         msg = ("Could not run OS rename script for instance %s on node %s"
4189                " (but the instance has been renamed in Ganeti): %s" %
4190                (inst.name, inst.primary_node, msg))
4191         self.proc.LogWarning(msg)
4192     finally:
4193       _ShutdownInstanceDisks(self, inst)
4194
4195
4196 class LURemoveInstance(LogicalUnit):
4197   """Remove an instance.
4198
4199   """
4200   HPATH = "instance-remove"
4201   HTYPE = constants.HTYPE_INSTANCE
4202   _OP_REQP = ["instance_name", "ignore_failures"]
4203   REQ_BGL = False
4204
4205   def CheckArguments(self):
4206     """Check the arguments.
4207
4208     """
4209     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4210                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4211
4212   def ExpandNames(self):
4213     self._ExpandAndLockInstance()
4214     self.needed_locks[locking.LEVEL_NODE] = []
4215     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4216
4217   def DeclareLocks(self, level):
4218     if level == locking.LEVEL_NODE:
4219       self._LockInstancesNodes()
4220
4221   def BuildHooksEnv(self):
4222     """Build hooks env.
4223
4224     This runs on master, primary and secondary nodes of the instance.
4225
4226     """
4227     env = _BuildInstanceHookEnvByObject(self, self.instance)
4228     env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4229     nl = [self.cfg.GetMasterNode()]
4230     return env, nl, nl
4231
4232   def CheckPrereq(self):
4233     """Check prerequisites.
4234
4235     This checks that the instance is in the cluster.
4236
4237     """
4238     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4239     assert self.instance is not None, \
4240       "Cannot retrieve locked instance %s" % self.op.instance_name
4241
4242   def Exec(self, feedback_fn):
4243     """Remove the instance.
4244
4245     """
4246     instance = self.instance
4247     logging.info("Shutting down instance %s on node %s",
4248                  instance.name, instance.primary_node)
4249
4250     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4251                                              self.shutdown_timeout)
4252     msg = result.fail_msg
4253     if msg:
4254       if self.op.ignore_failures:
4255         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4256       else:
4257         raise errors.OpExecError("Could not shutdown instance %s on"
4258                                  " node %s: %s" %
4259                                  (instance.name, instance.primary_node, msg))
4260
4261     logging.info("Removing block devices for instance %s", instance.name)
4262
4263     if not _RemoveDisks(self, instance):
4264       if self.op.ignore_failures:
4265         feedback_fn("Warning: can't remove instance's disks")
4266       else:
4267         raise errors.OpExecError("Can't remove instance's disks")
4268
4269     logging.info("Removing instance %s out of cluster config", instance.name)
4270
4271     self.cfg.RemoveInstance(instance.name)
4272     self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4273
4274
4275 class LUQueryInstances(NoHooksLU):
4276   """Logical unit for querying instances.
4277
4278   """
4279   # pylint: disable-msg=W0142
4280   _OP_REQP = ["output_fields", "names", "use_locking"]
4281   REQ_BGL = False
4282   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4283                     "serial_no", "ctime", "mtime", "uuid"]
4284   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4285                                     "admin_state",
4286                                     "disk_template", "ip", "mac", "bridge",
4287                                     "nic_mode", "nic_link",
4288                                     "sda_size", "sdb_size", "vcpus", "tags",
4289                                     "network_port", "beparams",
4290                                     r"(disk)\.(size)/([0-9]+)",
4291                                     r"(disk)\.(sizes)", "disk_usage",
4292                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4293                                     r"(nic)\.(bridge)/([0-9]+)",
4294                                     r"(nic)\.(macs|ips|modes|links|bridges)",
4295                                     r"(disk|nic)\.(count)",
4296                                     "hvparams",
4297                                     ] + _SIMPLE_FIELDS +
4298                                   ["hv/%s" % name
4299                                    for name in constants.HVS_PARAMETERS
4300                                    if name not in constants.HVC_GLOBALS] +
4301                                   ["be/%s" % name
4302                                    for name in constants.BES_PARAMETERS])
4303   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4304
4305
4306   def ExpandNames(self):
4307     _CheckOutputFields(static=self._FIELDS_STATIC,
4308                        dynamic=self._FIELDS_DYNAMIC,
4309                        selected=self.op.output_fields)
4310
4311     self.needed_locks = {}
4312     self.share_locks[locking.LEVEL_INSTANCE] = 1
4313     self.share_locks[locking.LEVEL_NODE] = 1
4314
4315     if self.op.names:
4316       self.wanted = _GetWantedInstances(self, self.op.names)
4317     else:
4318       self.wanted = locking.ALL_SET
4319
4320     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4321     self.do_locking = self.do_node_query and self.op.use_locking
4322     if self.do_locking:
4323       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4324       self.needed_locks[locking.LEVEL_NODE] = []
4325       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4326
4327   def DeclareLocks(self, level):
4328     if level == locking.LEVEL_NODE and self.do_locking:
4329       self._LockInstancesNodes()
4330
4331   def CheckPrereq(self):
4332     """Check prerequisites.
4333
4334     """
4335     pass
4336
4337   def Exec(self, feedback_fn):
4338     """Computes the list of nodes and their attributes.
4339
4340     """
4341     # pylint: disable-msg=R0912
4342     # way too many branches here
4343     all_info = self.cfg.GetAllInstancesInfo()
4344     if self.wanted == locking.ALL_SET:
4345       # caller didn't specify instance names, so ordering is not important
4346       if self.do_locking:
4347         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4348       else:
4349         instance_names = all_info.keys()
4350       instance_names = utils.NiceSort(instance_names)
4351     else:
4352       # caller did specify names, so we must keep the ordering
4353       if self.do_locking:
4354         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4355       else:
4356         tgt_set = all_info.keys()
4357       missing = set(self.wanted).difference(tgt_set)
4358       if missing:
4359         raise errors.OpExecError("Some instances were removed before"
4360                                  " retrieving their data: %s" % missing)
4361       instance_names = self.wanted
4362
4363     instance_list = [all_info[iname] for iname in instance_names]
4364
4365     # begin data gathering
4366
4367     nodes = frozenset([inst.primary_node for inst in instance_list])
4368     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4369
4370     bad_nodes = []
4371     off_nodes = []
4372     if self.do_node_query:
4373       live_data = {}
4374       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4375       for name in nodes:
4376         result = node_data[name]
4377         if result.offline:
4378           # offline nodes will be in both lists
4379           off_nodes.append(name)
4380         if result.fail_msg:
4381           bad_nodes.append(name)
4382         else:
4383           if result.payload:
4384             live_data.update(result.payload)
4385           # else no instance is alive
4386     else:
4387       live_data = dict([(name, {}) for name in instance_names])
4388
4389     # end data gathering
4390
4391     HVPREFIX = "hv/"
4392     BEPREFIX = "be/"
4393     output = []
4394     cluster = self.cfg.GetClusterInfo()
4395     for instance in instance_list:
4396       iout = []
4397       i_hv = cluster.FillHV(instance, skip_globals=True)
4398       i_be = cluster.FillBE(instance)
4399       i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4400                                  nic.nicparams) for nic in instance.nics]
4401       for field in self.op.output_fields:
4402         st_match = self._FIELDS_STATIC.Matches(field)
4403         if field in self._SIMPLE_FIELDS:
4404           val = getattr(instance, field)
4405         elif field == "pnode":
4406           val = instance.primary_node
4407         elif field == "snodes":
4408           val = list(instance.secondary_nodes)
4409         elif field == "admin_state":
4410           val = instance.admin_up
4411         elif field == "oper_state":
4412           if instance.primary_node in bad_nodes:
4413             val = None
4414           else:
4415             val = bool(live_data.get(instance.name))
4416         elif field == "status":
4417           if instance.primary_node in off_nodes:
4418             val = "ERROR_nodeoffline"
4419           elif instance.primary_node in bad_nodes:
4420             val = "ERROR_nodedown"
4421           else:
4422             running = bool(live_data.get(instance.name))
4423             if running:
4424               if instance.admin_up:
4425                 val = "running"
4426               else:
4427                 val = "ERROR_up"
4428             else:
4429               if instance.admin_up:
4430                 val = "ERROR_down"
4431               else:
4432                 val = "ADMIN_down"
4433         elif field == "oper_ram":
4434           if instance.primary_node in bad_nodes:
4435             val = None
4436           elif instance.name in live_data:
4437             val = live_data[instance.name].get("memory", "?")
4438           else:
4439             val = "-"
4440         elif field == "vcpus":
4441           val = i_be[constants.BE_VCPUS]
4442         elif field == "disk_template":
4443           val = instance.disk_template
4444         elif field == "ip":
4445           if instance.nics:
4446             val = instance.nics[0].ip
4447           else:
4448             val = None
4449         elif field == "nic_mode":
4450           if instance.nics:
4451             val = i_nicp[0][constants.NIC_MODE]
4452           else:
4453             val = None
4454         elif field == "nic_link":
4455           if instance.nics:
4456             val = i_nicp[0][constants.NIC_LINK]
4457           else:
4458             val = None
4459         elif field == "bridge":
4460           if (instance.nics and
4461               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4462             val = i_nicp[0][constants.NIC_LINK]
4463           else:
4464             val = None
4465         elif field == "mac":
4466           if instance.nics:
4467             val = instance.nics[0].mac
4468           else:
4469             val = None
4470         elif field == "sda_size" or field == "sdb_size":
4471           idx = ord(field[2]) - ord('a')
4472           try:
4473             val = instance.FindDisk(idx).size
4474           except errors.OpPrereqError:
4475             val = None
4476         elif field == "disk_usage": # total disk usage per node
4477           disk_sizes = [{'size': disk.size} for disk in instance.disks]
4478           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4479         elif field == "tags":
4480           val = list(instance.GetTags())
4481         elif field == "hvparams":
4482           val = i_hv
4483         elif (field.startswith(HVPREFIX) and
4484               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4485               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4486           val = i_hv.get(field[len(HVPREFIX):], None)
4487         elif field == "beparams":
4488           val = i_be
4489         elif (field.startswith(BEPREFIX) and
4490               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4491           val = i_be.get(field[len(BEPREFIX):], None)
4492         elif st_match and st_match.groups():
4493           # matches a variable list
4494           st_groups = st_match.groups()
4495           if st_groups and st_groups[0] == "disk":
4496             if st_groups[1] == "count":
4497               val = len(instance.disks)
4498             elif st_groups[1] == "sizes":
4499               val = [disk.size for disk in instance.disks]
4500             elif st_groups[1] == "size":
4501               try:
4502                 val = instance.FindDisk(st_groups[2]).size
4503               except errors.OpPrereqError:
4504                 val = None
4505             else:
4506               assert False, "Unhandled disk parameter"
4507           elif st_groups[0] == "nic":
4508             if st_groups[1] == "count":
4509               val = len(instance.nics)
4510             elif st_groups[1] == "macs":
4511               val = [nic.mac for nic in instance.nics]
4512             elif st_groups[1] == "ips":
4513               val = [nic.ip for nic in instance.nics]
4514             elif st_groups[1] == "modes":
4515               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4516             elif st_groups[1] == "links":
4517               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4518             elif st_groups[1] == "bridges":
4519               val = []
4520               for nicp in i_nicp:
4521                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4522                   val.append(nicp[constants.NIC_LINK])
4523                 else:
4524                   val.append(None)
4525             else:
4526               # index-based item
4527               nic_idx = int(st_groups[2])
4528               if nic_idx >= len(instance.nics):
4529                 val = None
4530               else:
4531                 if st_groups[1] == "mac":
4532                   val = instance.nics[nic_idx].mac
4533                 elif st_groups[1] == "ip":
4534                   val = instance.nics[nic_idx].ip
4535                 elif st_groups[1] == "mode":
4536                   val = i_nicp[nic_idx][constants.NIC_MODE]
4537                 elif st_groups[1] == "link":
4538                   val = i_nicp[nic_idx][constants.NIC_LINK]
4539                 elif st_groups[1] == "bridge":
4540                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4541                   if nic_mode == constants.NIC_MODE_BRIDGED:
4542                     val = i_nicp[nic_idx][constants.NIC_LINK]
4543                   else:
4544                     val = None
4545                 else:
4546                   assert False, "Unhandled NIC parameter"
4547           else:
4548             assert False, ("Declared but unhandled variable parameter '%s'" %
4549                            field)
4550         else:
4551           assert False, "Declared but unhandled parameter '%s'" % field
4552         iout.append(val)
4553       output.append(iout)
4554
4555     return output
4556
4557
4558 class LUFailoverInstance(LogicalUnit):
4559   """Failover an instance.
4560
4561   """
4562   HPATH = "instance-failover"
4563   HTYPE = constants.HTYPE_INSTANCE
4564   _OP_REQP = ["instance_name", "ignore_consistency"]
4565   REQ_BGL = False
4566
4567   def CheckArguments(self):
4568     """Check the arguments.
4569
4570     """
4571     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4572                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4573
4574   def ExpandNames(self):
4575     self._ExpandAndLockInstance()
4576     self.needed_locks[locking.LEVEL_NODE] = []
4577     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4578
4579   def DeclareLocks(self, level):
4580     if level == locking.LEVEL_NODE:
4581       self._LockInstancesNodes()
4582
4583   def BuildHooksEnv(self):
4584     """Build hooks env.
4585
4586     This runs on master, primary and secondary nodes of the instance.
4587
4588     """
4589     env = {
4590       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4591       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4592       }
4593     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4594     nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4595     return env, nl, nl
4596
4597   def CheckPrereq(self):
4598     """Check prerequisites.
4599
4600     This checks that the instance is in the cluster.
4601
4602     """
4603     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4604     assert self.instance is not None, \
4605       "Cannot retrieve locked instance %s" % self.op.instance_name
4606
4607     bep = self.cfg.GetClusterInfo().FillBE(instance)
4608     if instance.disk_template not in constants.DTS_NET_MIRROR:
4609       raise errors.OpPrereqError("Instance's disk layout is not"
4610                                  " network mirrored, cannot failover.",
4611                                  errors.ECODE_STATE)
4612
4613     secondary_nodes = instance.secondary_nodes
4614     if not secondary_nodes:
4615       raise errors.ProgrammerError("no secondary node but using "
4616                                    "a mirrored disk template")
4617
4618     target_node = secondary_nodes[0]
4619     _CheckNodeOnline(self, target_node)
4620     _CheckNodeNotDrained(self, target_node)
4621     if instance.admin_up:
4622       # check memory requirements on the secondary node
4623       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4624                            instance.name, bep[constants.BE_MEMORY],
4625                            instance.hypervisor)
4626     else:
4627       self.LogInfo("Not checking memory on the secondary node as"
4628                    " instance will not be started")
4629
4630     # check bridge existance
4631     _CheckInstanceBridgesExist(self, instance, node=target_node)
4632
4633   def Exec(self, feedback_fn):
4634     """Failover an instance.
4635
4636     The failover is done by shutting it down on its present node and
4637     starting it on the secondary.
4638
4639     """
4640     instance = self.instance
4641
4642     source_node = instance.primary_node
4643     target_node = instance.secondary_nodes[0]
4644
4645     if instance.admin_up:
4646       feedback_fn("* checking disk consistency between source and target")
4647       for dev in instance.disks:
4648         # for drbd, these are drbd over lvm
4649         if not _CheckDiskConsistency(self, dev, target_node, False):
4650           if not self.op.ignore_consistency:
4651             raise errors.OpExecError("Disk %s is degraded on target node,"
4652                                      " aborting failover." % dev.iv_name)
4653     else:
4654       feedback_fn("* not checking disk consistency as instance is not running")
4655
4656     feedback_fn("* shutting down instance on source node")
4657     logging.info("Shutting down instance %s on node %s",
4658                  instance.name, source_node)
4659
4660     result = self.rpc.call_instance_shutdown(source_node, instance,
4661                                              self.shutdown_timeout)
4662     msg = result.fail_msg
4663     if msg:
4664       if self.op.ignore_consistency:
4665         self.proc.LogWarning("Could not shutdown instance %s on node %s."
4666                              " Proceeding anyway. Please make sure node"
4667                              " %s is down. Error details: %s",
4668                              instance.name, source_node, source_node, msg)
4669       else:
4670         raise errors.OpExecError("Could not shutdown instance %s on"
4671                                  " node %s: %s" %
4672                                  (instance.name, source_node, msg))
4673
4674     feedback_fn("* deactivating the instance's disks on source node")
4675     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4676       raise errors.OpExecError("Can't shut down the instance's disks.")
4677
4678     instance.primary_node = target_node
4679     # distribute new instance config to the other nodes
4680     self.cfg.Update(instance, feedback_fn)
4681
4682     # Only start the instance if it's marked as up
4683     if instance.admin_up:
4684       feedback_fn("* activating the instance's disks on target node")
4685       logging.info("Starting instance %s on node %s",
4686                    instance.name, target_node)
4687
4688       disks_ok, _ = _AssembleInstanceDisks(self, instance,
4689                                                ignore_secondaries=True)
4690       if not disks_ok:
4691         _ShutdownInstanceDisks(self, instance)
4692         raise errors.OpExecError("Can't activate the instance's disks")
4693
4694       feedback_fn("* starting the instance on the target node")
4695       result = self.rpc.call_instance_start(target_node, instance, None, None)
4696       msg = result.fail_msg
4697       if msg:
4698         _ShutdownInstanceDisks(self, instance)
4699         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4700                                  (instance.name, target_node, msg))
4701
4702
4703 class LUMigrateInstance(LogicalUnit):
4704   """Migrate an instance.
4705
4706   This is migration without shutting down, compared to the failover,
4707   which is done with shutdown.
4708
4709   """
4710   HPATH = "instance-migrate"
4711   HTYPE = constants.HTYPE_INSTANCE
4712   _OP_REQP = ["instance_name", "live", "cleanup"]
4713
4714   REQ_BGL = False
4715
4716   def ExpandNames(self):
4717     self._ExpandAndLockInstance()
4718
4719     self.needed_locks[locking.LEVEL_NODE] = []
4720     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4721
4722     self._migrater = TLMigrateInstance(self, self.op.instance_name,
4723                                        self.op.live, self.op.cleanup)
4724     self.tasklets = [self._migrater]
4725
4726   def DeclareLocks(self, level):
4727     if level == locking.LEVEL_NODE:
4728       self._LockInstancesNodes()
4729
4730   def BuildHooksEnv(self):
4731     """Build hooks env.
4732
4733     This runs on master, primary and secondary nodes of the instance.
4734
4735     """
4736     instance = self._migrater.instance
4737     env = _BuildInstanceHookEnvByObject(self, instance)
4738     env["MIGRATE_LIVE"] = self.op.live
4739     env["MIGRATE_CLEANUP"] = self.op.cleanup
4740     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4741     return env, nl, nl
4742
4743
4744 class LUMoveInstance(LogicalUnit):
4745   """Move an instance by data-copying.
4746
4747   """
4748   HPATH = "instance-move"
4749   HTYPE = constants.HTYPE_INSTANCE
4750   _OP_REQP = ["instance_name", "target_node"]
4751   REQ_BGL = False
4752
4753   def CheckArguments(self):
4754     """Check the arguments.
4755
4756     """
4757     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4758                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4759
4760   def ExpandNames(self):
4761     self._ExpandAndLockInstance()
4762     target_node = self.cfg.ExpandNodeName(self.op.target_node)
4763     if target_node is None:
4764       raise errors.OpPrereqError("Node '%s' not known" %
4765                                   self.op.target_node, errors.ECODE_NOENT)
4766     self.op.target_node = target_node
4767     self.needed_locks[locking.LEVEL_NODE] = [target_node]
4768     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4769
4770   def DeclareLocks(self, level):
4771     if level == locking.LEVEL_NODE:
4772       self._LockInstancesNodes(primary_only=True)
4773
4774   def BuildHooksEnv(self):
4775     """Build hooks env.
4776
4777     This runs on master, primary and secondary nodes of the instance.
4778
4779     """
4780     env = {
4781       "TARGET_NODE": self.op.target_node,
4782       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4783       }
4784     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4785     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4786                                        self.op.target_node]
4787     return env, nl, nl
4788
4789   def CheckPrereq(self):
4790     """Check prerequisites.
4791
4792     This checks that the instance is in the cluster.
4793
4794     """
4795     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4796     assert self.instance is not None, \
4797       "Cannot retrieve locked instance %s" % self.op.instance_name
4798
4799     node = self.cfg.GetNodeInfo(self.op.target_node)
4800     assert node is not None, \
4801       "Cannot retrieve locked node %s" % self.op.target_node
4802
4803     self.target_node = target_node = node.name
4804
4805     if target_node == instance.primary_node:
4806       raise errors.OpPrereqError("Instance %s is already on the node %s" %
4807                                  (instance.name, target_node),
4808                                  errors.ECODE_STATE)
4809
4810     bep = self.cfg.GetClusterInfo().FillBE(instance)
4811
4812     for idx, dsk in enumerate(instance.disks):
4813       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4814         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4815                                    " cannot copy" % idx, errors.ECODE_STATE)
4816
4817     _CheckNodeOnline(self, target_node)
4818     _CheckNodeNotDrained(self, target_node)
4819
4820     if instance.admin_up:
4821       # check memory requirements on the secondary node
4822       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4823                            instance.name, bep[constants.BE_MEMORY],
4824                            instance.hypervisor)
4825     else:
4826       self.LogInfo("Not checking memory on the secondary node as"
4827                    " instance will not be started")
4828
4829     # check bridge existance
4830     _CheckInstanceBridgesExist(self, instance, node=target_node)
4831
4832   def Exec(self, feedback_fn):
4833     """Move an instance.
4834
4835     The move is done by shutting it down on its present node, copying
4836     the data over (slow) and starting it on the new node.
4837
4838     """
4839     instance = self.instance
4840
4841     source_node = instance.primary_node
4842     target_node = self.target_node
4843
4844     self.LogInfo("Shutting down instance %s on source node %s",
4845                  instance.name, source_node)
4846
4847     result = self.rpc.call_instance_shutdown(source_node, instance,
4848                                              self.shutdown_timeout)
4849     msg = result.fail_msg
4850     if msg:
4851       if self.op.ignore_consistency:
4852         self.proc.LogWarning("Could not shutdown instance %s on node %s."
4853                              " Proceeding anyway. Please make sure node"
4854                              " %s is down. Error details: %s",
4855                              instance.name, source_node, source_node, msg)
4856       else:
4857         raise errors.OpExecError("Could not shutdown instance %s on"
4858                                  " node %s: %s" %
4859                                  (instance.name, source_node, msg))
4860
4861     # create the target disks
4862     try:
4863       _CreateDisks(self, instance, target_node=target_node)
4864     except errors.OpExecError:
4865       self.LogWarning("Device creation failed, reverting...")
4866       try:
4867         _RemoveDisks(self, instance, target_node=target_node)
4868       finally:
4869         self.cfg.ReleaseDRBDMinors(instance.name)
4870         raise
4871
4872     cluster_name = self.cfg.GetClusterInfo().cluster_name
4873
4874     errs = []
4875     # activate, get path, copy the data over
4876     for idx, disk in enumerate(instance.disks):
4877       self.LogInfo("Copying data for disk %d", idx)
4878       result = self.rpc.call_blockdev_assemble(target_node, disk,
4879                                                instance.name, True)
4880       if result.fail_msg:
4881         self.LogWarning("Can't assemble newly created disk %d: %s",
4882                         idx, result.fail_msg)
4883         errs.append(result.fail_msg)
4884         break
4885       dev_path = result.payload
4886       result = self.rpc.call_blockdev_export(source_node, disk,
4887                                              target_node, dev_path,
4888                                              cluster_name)
4889       if result.fail_msg:
4890         self.LogWarning("Can't copy data over for disk %d: %s",
4891                         idx, result.fail_msg)
4892         errs.append(result.fail_msg)
4893         break
4894
4895     if errs:
4896       self.LogWarning("Some disks failed to copy, aborting")
4897       try:
4898         _RemoveDisks(self, instance, target_node=target_node)
4899       finally:
4900         self.cfg.ReleaseDRBDMinors(instance.name)
4901         raise errors.OpExecError("Errors during disk copy: %s" %
4902                                  (",".join(errs),))
4903
4904     instance.primary_node = target_node
4905     self.cfg.Update(instance, feedback_fn)
4906
4907     self.LogInfo("Removing the disks on the original node")
4908     _RemoveDisks(self, instance, target_node=source_node)
4909
4910     # Only start the instance if it's marked as up
4911     if instance.admin_up:
4912       self.LogInfo("Starting instance %s on node %s",
4913                    instance.name, target_node)
4914
4915       disks_ok, _ = _AssembleInstanceDisks(self, instance,
4916                                            ignore_secondaries=True)
4917       if not disks_ok:
4918         _ShutdownInstanceDisks(self, instance)
4919         raise errors.OpExecError("Can't activate the instance's disks")
4920
4921       result = self.rpc.call_instance_start(target_node, instance, None, None)
4922       msg = result.fail_msg
4923       if msg:
4924         _ShutdownInstanceDisks(self, instance)
4925         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4926                                  (instance.name, target_node, msg))
4927
4928
4929 class LUMigrateNode(LogicalUnit):
4930   """Migrate all instances from a node.
4931
4932   """
4933   HPATH = "node-migrate"
4934   HTYPE = constants.HTYPE_NODE
4935   _OP_REQP = ["node_name", "live"]
4936   REQ_BGL = False
4937
4938   def ExpandNames(self):
4939     self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4940     if self.op.node_name is None:
4941       raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name,
4942                                  errors.ECODE_NOENT)
4943
4944     self.needed_locks = {
4945       locking.LEVEL_NODE: [self.op.node_name],
4946       }
4947
4948     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4949
4950     # Create tasklets for migrating instances for all instances on this node
4951     names = []
4952     tasklets = []
4953
4954     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4955       logging.debug("Migrating instance %s", inst.name)
4956       names.append(inst.name)
4957
4958       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4959
4960     self.tasklets = tasklets
4961
4962     # Declare instance locks
4963     self.needed_locks[locking.LEVEL_INSTANCE] = names
4964
4965   def DeclareLocks(self, level):
4966     if level == locking.LEVEL_NODE:
4967       self._LockInstancesNodes()
4968
4969   def BuildHooksEnv(self):
4970     """Build hooks env.
4971
4972     This runs on the master, the primary and all the secondaries.
4973
4974     """
4975     env = {
4976       "NODE_NAME": self.op.node_name,
4977       }
4978
4979     nl = [self.cfg.GetMasterNode()]
4980
4981     return (env, nl, nl)
4982
4983
4984 class TLMigrateInstance(Tasklet):
4985   def __init__(self, lu, instance_name, live, cleanup):
4986     """Initializes this class.
4987
4988     """
4989     Tasklet.__init__(self, lu)
4990
4991     # Parameters
4992     self.instance_name = instance_name
4993     self.live = live
4994     self.cleanup = cleanup
4995
4996   def CheckPrereq(self):
4997     """Check prerequisites.
4998
4999     This checks that the instance is in the cluster.
5000
5001     """
5002     instance = self.cfg.GetInstanceInfo(
5003       self.cfg.ExpandInstanceName(self.instance_name))
5004     if instance is None:
5005       raise errors.OpPrereqError("Instance '%s' not known" %
5006                                  self.instance_name, errors.ECODE_NOENT)
5007
5008     if instance.disk_template != constants.DT_DRBD8:
5009       raise errors.OpPrereqError("Instance's disk layout is not"
5010                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5011
5012     secondary_nodes = instance.secondary_nodes
5013     if not secondary_nodes:
5014       raise errors.ConfigurationError("No secondary node but using"
5015                                       " drbd8 disk template")
5016
5017     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5018
5019     target_node = secondary_nodes[0]
5020     # check memory requirements on the secondary node
5021     _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5022                          instance.name, i_be[constants.BE_MEMORY],
5023                          instance.hypervisor)
5024
5025     # check bridge existance
5026     _CheckInstanceBridgesExist(self, instance, node=target_node)
5027
5028     if not self.cleanup:
5029       _CheckNodeNotDrained(self, target_node)
5030       result = self.rpc.call_instance_migratable(instance.primary_node,
5031                                                  instance)
5032       result.Raise("Can't migrate, please use failover",
5033                    prereq=True, ecode=errors.ECODE_STATE)
5034
5035     self.instance = instance
5036
5037   def _WaitUntilSync(self):
5038     """Poll with custom rpc for disk sync.
5039
5040     This uses our own step-based rpc call.
5041
5042     """
5043     self.feedback_fn("* wait until resync is done")
5044     all_done = False
5045     while not all_done:
5046       all_done = True
5047       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5048                                             self.nodes_ip,
5049                                             self.instance.disks)
5050       min_percent = 100
5051       for node, nres in result.items():
5052         nres.Raise("Cannot resync disks on node %s" % node)
5053         node_done, node_percent = nres.payload
5054         all_done = all_done and node_done
5055         if node_percent is not None:
5056           min_percent = min(min_percent, node_percent)
5057       if not all_done:
5058         if min_percent < 100:
5059           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5060         time.sleep(2)
5061
5062   def _EnsureSecondary(self, node):
5063     """Demote a node to secondary.
5064
5065     """
5066     self.feedback_fn("* switching node %s to secondary mode" % node)
5067
5068     for dev in self.instance.disks:
5069       self.cfg.SetDiskID(dev, node)
5070
5071     result = self.rpc.call_blockdev_close(node, self.instance.name,
5072                                           self.instance.disks)
5073     result.Raise("Cannot change disk to secondary on node %s" % node)
5074
5075   def _GoStandalone(self):
5076     """Disconnect from the network.
5077
5078     """
5079     self.feedback_fn("* changing into standalone mode")
5080     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5081                                                self.instance.disks)
5082     for node, nres in result.items():
5083       nres.Raise("Cannot disconnect disks node %s" % node)
5084
5085   def _GoReconnect(self, multimaster):
5086     """Reconnect to the network.
5087
5088     """
5089     if multimaster:
5090       msg = "dual-master"
5091     else:
5092       msg = "single-master"
5093     self.feedback_fn("* changing disks into %s mode" % msg)
5094     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5095                                            self.instance.disks,
5096                                            self.instance.name, multimaster)
5097     for node, nres in result.items():
5098       nres.Raise("Cannot change disks config on node %s" % node)
5099
5100   def _ExecCleanup(self):
5101     """Try to cleanup after a failed migration.
5102
5103     The cleanup is done by:
5104       - check that the instance is running only on one node
5105         (and update the config if needed)
5106       - change disks on its secondary node to secondary
5107       - wait until disks are fully synchronized
5108       - disconnect from the network
5109       - change disks into single-master mode
5110       - wait again until disks are fully synchronized
5111
5112     """
5113     instance = self.instance
5114     target_node = self.target_node
5115     source_node = self.source_node
5116
5117     # check running on only one node
5118     self.feedback_fn("* checking where the instance actually runs"
5119                      " (if this hangs, the hypervisor might be in"
5120                      " a bad state)")
5121     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5122     for node, result in ins_l.items():
5123       result.Raise("Can't contact node %s" % node)
5124
5125     runningon_source = instance.name in ins_l[source_node].payload
5126     runningon_target = instance.name in ins_l[target_node].payload
5127
5128     if runningon_source and runningon_target:
5129       raise errors.OpExecError("Instance seems to be running on two nodes,"
5130                                " or the hypervisor is confused. You will have"
5131                                " to ensure manually that it runs only on one"
5132                                " and restart this operation.")
5133
5134     if not (runningon_source or runningon_target):
5135       raise errors.OpExecError("Instance does not seem to be running at all."
5136                                " In this case, it's safer to repair by"
5137                                " running 'gnt-instance stop' to ensure disk"
5138                                " shutdown, and then restarting it.")
5139
5140     if runningon_target:
5141       # the migration has actually succeeded, we need to update the config
5142       self.feedback_fn("* instance running on secondary node (%s),"
5143                        " updating config" % target_node)
5144       instance.primary_node = target_node
5145       self.cfg.Update(instance, self.feedback_fn)
5146       demoted_node = source_node
5147     else:
5148       self.feedback_fn("* instance confirmed to be running on its"
5149                        " primary node (%s)" % source_node)
5150       demoted_node = target_node
5151
5152     self._EnsureSecondary(demoted_node)
5153     try:
5154       self._WaitUntilSync()
5155     except errors.OpExecError:
5156       # we ignore here errors, since if the device is standalone, it
5157       # won't be able to sync
5158       pass
5159     self._GoStandalone()
5160     self._GoReconnect(False)
5161     self._WaitUntilSync()
5162
5163     self.feedback_fn("* done")
5164
5165   def _RevertDiskStatus(self):
5166     """Try to revert the disk status after a failed migration.
5167
5168     """
5169     target_node = self.target_node
5170     try:
5171       self._EnsureSecondary(target_node)
5172       self._GoStandalone()
5173       self._GoReconnect(False)
5174       self._WaitUntilSync()
5175     except errors.OpExecError, err:
5176       self.lu.LogWarning("Migration failed and I can't reconnect the"
5177                          " drives: error '%s'\n"
5178                          "Please look and recover the instance status" %
5179                          str(err))
5180
5181   def _AbortMigration(self):
5182     """Call the hypervisor code to abort a started migration.
5183
5184     """
5185     instance = self.instance
5186     target_node = self.target_node
5187     migration_info = self.migration_info
5188
5189     abort_result = self.rpc.call_finalize_migration(target_node,
5190                                                     instance,
5191                                                     migration_info,
5192                                                     False)
5193     abort_msg = abort_result.fail_msg
5194     if abort_msg:
5195       logging.error("Aborting migration failed on target node %s: %s",
5196                     target_node, abort_msg)
5197       # Don't raise an exception here, as we stil have to try to revert the
5198       # disk status, even if this step failed.
5199
5200   def _ExecMigration(self):
5201     """Migrate an instance.
5202
5203     The migrate is done by:
5204       - change the disks into dual-master mode
5205       - wait until disks are fully synchronized again
5206       - migrate the instance
5207       - change disks on the new secondary node (the old primary) to secondary
5208       - wait until disks are fully synchronized
5209       - change disks into single-master mode
5210
5211     """
5212     instance = self.instance
5213     target_node = self.target_node
5214     source_node = self.source_node
5215
5216     self.feedback_fn("* checking disk consistency between source and target")
5217     for dev in instance.disks:
5218       if not _CheckDiskConsistency(self, dev, target_node, False):
5219         raise errors.OpExecError("Disk %s is degraded or not fully"
5220                                  " synchronized on target node,"
5221                                  " aborting migrate." % dev.iv_name)
5222
5223     # First get the migration information from the remote node
5224     result = self.rpc.call_migration_info(source_node, instance)
5225     msg = result.fail_msg
5226     if msg:
5227       log_err = ("Failed fetching source migration information from %s: %s" %
5228                  (source_node, msg))
5229       logging.error(log_err)
5230       raise errors.OpExecError(log_err)
5231
5232     self.migration_info = migration_info = result.payload
5233
5234     # Then switch the disks to master/master mode
5235     self._EnsureSecondary(target_node)
5236     self._GoStandalone()
5237     self._GoReconnect(True)
5238     self._WaitUntilSync()
5239
5240     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5241     result = self.rpc.call_accept_instance(target_node,
5242                                            instance,
5243                                            migration_info,
5244                                            self.nodes_ip[target_node])
5245
5246     msg = result.fail_msg
5247     if msg:
5248       logging.error("Instance pre-migration failed, trying to revert"
5249                     " disk status: %s", msg)
5250       self.feedback_fn("Pre-migration failed, aborting")
5251       self._AbortMigration()
5252       self._RevertDiskStatus()
5253       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5254                                (instance.name, msg))
5255
5256     self.feedback_fn("* migrating instance to %s" % target_node)
5257     time.sleep(10)
5258     result = self.rpc.call_instance_migrate(source_node, instance,
5259                                             self.nodes_ip[target_node],
5260                                             self.live)
5261     msg = result.fail_msg
5262     if msg:
5263       logging.error("Instance migration failed, trying to revert"
5264                     " disk status: %s", msg)
5265       self.feedback_fn("Migration failed, aborting")
5266       self._AbortMigration()
5267       self._RevertDiskStatus()
5268       raise errors.OpExecError("Could not migrate instance %s: %s" %
5269                                (instance.name, msg))
5270     time.sleep(10)
5271
5272     instance.primary_node = target_node
5273     # distribute new instance config to the other nodes
5274     self.cfg.Update(instance, self.feedback_fn)
5275
5276     result = self.rpc.call_finalize_migration(target_node,
5277                                               instance,
5278                                               migration_info,
5279                                               True)
5280     msg = result.fail_msg
5281     if msg:
5282       logging.error("Instance migration succeeded, but finalization failed:"
5283                     " %s", msg)
5284       raise errors.OpExecError("Could not finalize instance migration: %s" %
5285                                msg)
5286
5287     self._EnsureSecondary(source_node)
5288     self._WaitUntilSync()
5289     self._GoStandalone()
5290     self._GoReconnect(False)
5291     self._WaitUntilSync()
5292
5293     self.feedback_fn("* done")
5294
5295   def Exec(self, feedback_fn):
5296     """Perform the migration.
5297
5298     """
5299     feedback_fn("Migrating instance %s" % self.instance.name)
5300
5301     self.feedback_fn = feedback_fn
5302
5303     self.source_node = self.instance.primary_node
5304     self.target_node = self.instance.secondary_nodes[0]
5305     self.all_nodes = [self.source_node, self.target_node]
5306     self.nodes_ip = {
5307       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5308       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5309       }
5310
5311     if self.cleanup:
5312       return self._ExecCleanup()
5313     else:
5314       return self._ExecMigration()
5315
5316
5317 def _CreateBlockDev(lu, node, instance, device, force_create,
5318                     info, force_open):
5319   """Create a tree of block devices on a given node.
5320
5321   If this device type has to be created on secondaries, create it and
5322   all its children.
5323
5324   If not, just recurse to children keeping the same 'force' value.
5325
5326   @param lu: the lu on whose behalf we execute
5327   @param node: the node on which to create the device
5328   @type instance: L{objects.Instance}
5329   @param instance: the instance which owns the device
5330   @type device: L{objects.Disk}
5331   @param device: the device to create
5332   @type force_create: boolean
5333   @param force_create: whether to force creation of this device; this
5334       will be change to True whenever we find a device which has
5335       CreateOnSecondary() attribute
5336   @param info: the extra 'metadata' we should attach to the device
5337       (this will be represented as a LVM tag)
5338   @type force_open: boolean
5339   @param force_open: this parameter will be passes to the
5340       L{backend.BlockdevCreate} function where it specifies
5341       whether we run on primary or not, and it affects both
5342       the child assembly and the device own Open() execution
5343
5344   """
5345   if device.CreateOnSecondary():
5346     force_create = True
5347
5348   if device.children:
5349     for child in device.children:
5350       _CreateBlockDev(lu, node, instance, child, force_create,
5351                       info, force_open)
5352
5353   if not force_create:
5354     return
5355
5356   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5357
5358
5359 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5360   """Create a single block device on a given node.
5361
5362   This will not recurse over children of the device, so they must be
5363   created in advance.
5364
5365   @param lu: the lu on whose behalf we execute
5366   @param node: the node on which to create the device
5367   @type instance: L{objects.Instance}
5368   @param instance: the instance which owns the device
5369   @type device: L{objects.Disk}
5370   @param device: the device to create
5371   @param info: the extra 'metadata' we should attach to the device
5372       (this will be represented as a LVM tag)
5373   @type force_open: boolean
5374   @param force_open: this parameter will be passes to the
5375       L{backend.BlockdevCreate} function where it specifies
5376       whether we run on primary or not, and it affects both
5377       the child assembly and the device own Open() execution
5378
5379   """
5380   lu.cfg.SetDiskID(device, node)
5381   result = lu.rpc.call_blockdev_create(node, device, device.size,
5382                                        instance.name, force_open, info)
5383   result.Raise("Can't create block device %s on"
5384                " node %s for instance %s" % (device, node, instance.name))
5385   if device.physical_id is None:
5386     device.physical_id = result.payload
5387
5388
5389 def _GenerateUniqueNames(lu, exts):
5390   """Generate a suitable LV name.
5391
5392   This will generate a logical volume name for the given instance.
5393
5394   """
5395   results = []
5396   for val in exts:
5397     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5398     results.append("%s%s" % (new_id, val))
5399   return results
5400
5401
5402 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5403                          p_minor, s_minor):
5404   """Generate a drbd8 device complete with its children.
5405
5406   """
5407   port = lu.cfg.AllocatePort()
5408   vgname = lu.cfg.GetVGName()
5409   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5410   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5411                           logical_id=(vgname, names[0]))
5412   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5413                           logical_id=(vgname, names[1]))
5414   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5415                           logical_id=(primary, secondary, port,
5416                                       p_minor, s_minor,
5417                                       shared_secret),
5418                           children=[dev_data, dev_meta],
5419                           iv_name=iv_name)
5420   return drbd_dev
5421
5422
5423 def _GenerateDiskTemplate(lu, template_name,
5424                           instance_name, primary_node,
5425                           secondary_nodes, disk_info,
5426                           file_storage_dir, file_driver,
5427                           base_index):
5428   """Generate the entire disk layout for a given template type.
5429
5430   """
5431   #TODO: compute space requirements
5432
5433   vgname = lu.cfg.GetVGName()
5434   disk_count = len(disk_info)
5435   disks = []
5436   if template_name == constants.DT_DISKLESS:
5437     pass
5438   elif template_name == constants.DT_PLAIN:
5439     if len(secondary_nodes) != 0:
5440       raise errors.ProgrammerError("Wrong template configuration")
5441
5442     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5443                                       for i in range(disk_count)])
5444     for idx, disk in enumerate(disk_info):
5445       disk_index = idx + base_index
5446       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5447                               logical_id=(vgname, names[idx]),
5448                               iv_name="disk/%d" % disk_index,
5449                               mode=disk["mode"])
5450       disks.append(disk_dev)
5451   elif template_name == constants.DT_DRBD8:
5452     if len(secondary_nodes) != 1:
5453       raise errors.ProgrammerError("Wrong template configuration")
5454     remote_node = secondary_nodes[0]
5455     minors = lu.cfg.AllocateDRBDMinor(
5456       [primary_node, remote_node] * len(disk_info), instance_name)
5457
5458     names = []
5459     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5460                                                for i in range(disk_count)]):
5461       names.append(lv_prefix + "_data")
5462       names.append(lv_prefix + "_meta")
5463     for idx, disk in enumerate(disk_info):
5464       disk_index = idx + base_index
5465       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5466                                       disk["size"], names[idx*2:idx*2+2],
5467                                       "disk/%d" % disk_index,
5468                                       minors[idx*2], minors[idx*2+1])
5469       disk_dev.mode = disk["mode"]
5470       disks.append(disk_dev)
5471   elif template_name == constants.DT_FILE:
5472     if len(secondary_nodes) != 0:
5473       raise errors.ProgrammerError("Wrong template configuration")
5474
5475     for idx, disk in enumerate(disk_info):
5476       disk_index = idx + base_index
5477       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5478                               iv_name="disk/%d" % disk_index,
5479                               logical_id=(file_driver,
5480                                           "%s/disk%d" % (file_storage_dir,
5481                                                          disk_index)),
5482                               mode=disk["mode"])
5483       disks.append(disk_dev)
5484   else:
5485     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5486   return disks
5487
5488
5489 def _GetInstanceInfoText(instance):
5490   """Compute that text that should be added to the disk's metadata.
5491
5492   """
5493   return "originstname+%s" % instance.name
5494
5495
5496 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5497   """Create all disks for an instance.
5498
5499   This abstracts away some work from AddInstance.
5500
5501   @type lu: L{LogicalUnit}
5502   @param lu: the logical unit on whose behalf we execute
5503   @type instance: L{objects.Instance}
5504   @param instance: the instance whose disks we should create
5505   @type to_skip: list
5506   @param to_skip: list of indices to skip
5507   @type target_node: string
5508   @param target_node: if passed, overrides the target node for creation
5509   @rtype: boolean
5510   @return: the success of the creation
5511
5512   """
5513   info = _GetInstanceInfoText(instance)
5514   if target_node is None:
5515     pnode = instance.primary_node
5516     all_nodes = instance.all_nodes
5517   else:
5518     pnode = target_node
5519     all_nodes = [pnode]
5520
5521   if instance.disk_template == constants.DT_FILE:
5522     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5523     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5524
5525     result.Raise("Failed to create directory '%s' on"
5526                  " node %s" % (file_storage_dir, pnode))
5527
5528   # Note: this needs to be kept in sync with adding of disks in
5529   # LUSetInstanceParams
5530   for idx, device in enumerate(instance.disks):
5531     if to_skip and idx in to_skip:
5532       continue
5533     logging.info("Creating volume %s for instance %s",
5534                  device.iv_name, instance.name)
5535     #HARDCODE
5536     for node in all_nodes:
5537       f_create = node == pnode
5538       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5539
5540
5541 def _RemoveDisks(lu, instance, target_node=None):
5542   """Remove all disks for an instance.
5543
5544   This abstracts away some work from `AddInstance()` and
5545   `RemoveInstance()`. Note that in case some of the devices couldn't
5546   be removed, the removal will continue with the other ones (compare
5547   with `_CreateDisks()`).
5548
5549   @type lu: L{LogicalUnit}
5550   @param lu: the logical unit on whose behalf we execute
5551   @type instance: L{objects.Instance}
5552   @param instance: the instance whose disks we should remove
5553   @type target_node: string
5554   @param target_node: used to override the node on which to remove the disks
5555   @rtype: boolean
5556   @return: the success of the removal
5557
5558   """
5559   logging.info("Removing block devices for instance %s", instance.name)
5560
5561   all_result = True
5562   for device in instance.disks:
5563     if target_node:
5564       edata = [(target_node, device)]
5565     else:
5566       edata = device.ComputeNodeTree(instance.primary_node)
5567     for node, disk in edata:
5568       lu.cfg.SetDiskID(disk, node)
5569       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5570       if msg:
5571         lu.LogWarning("Could not remove block device %s on node %s,"
5572                       " continuing anyway: %s", device.iv_name, node, msg)
5573         all_result = False
5574
5575   if instance.disk_template == constants.DT_FILE:
5576     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5577     if target_node:
5578       tgt = target_node
5579     else:
5580       tgt = instance.primary_node
5581     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5582     if result.fail_msg:
5583       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5584                     file_storage_dir, instance.primary_node, result.fail_msg)
5585       all_result = False
5586
5587   return all_result
5588
5589
5590 def _ComputeDiskSize(disk_template, disks):
5591   """Compute disk size requirements in the volume group
5592
5593   """
5594   # Required free disk space as a function of disk and swap space
5595   req_size_dict = {
5596     constants.DT_DISKLESS: None,
5597     constants.DT_PLAIN: sum(d["size"] for d in disks),
5598     # 128 MB are added for drbd metadata for each disk
5599     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5600     constants.DT_FILE: None,
5601   }
5602
5603   if disk_template not in req_size_dict:
5604     raise errors.ProgrammerError("Disk template '%s' size requirement"
5605                                  " is unknown" %  disk_template)
5606
5607   return req_size_dict[disk_template]
5608
5609
5610 def _CheckHVParams(lu, nodenames, hvname, hvparams):
5611   """Hypervisor parameter validation.
5612
5613   This function abstract the hypervisor parameter validation to be
5614   used in both instance create and instance modify.
5615
5616   @type lu: L{LogicalUnit}
5617   @param lu: the logical unit for which we check
5618   @type nodenames: list
5619   @param nodenames: the list of nodes on which we should check
5620   @type hvname: string
5621   @param hvname: the name of the hypervisor we should use
5622   @type hvparams: dict
5623   @param hvparams: the parameters which we need to check
5624   @raise errors.OpPrereqError: if the parameters are not valid
5625
5626   """
5627   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5628                                                   hvname,
5629                                                   hvparams)
5630   for node in nodenames:
5631     info = hvinfo[node]
5632     if info.offline:
5633       continue
5634     info.Raise("Hypervisor parameter validation failed on node %s" % node)
5635
5636
5637 class LUCreateInstance(LogicalUnit):
5638   """Create an instance.
5639
5640   """
5641   HPATH = "instance-add"
5642   HTYPE = constants.HTYPE_INSTANCE
5643   _OP_REQP = ["instance_name", "disks", "disk_template",
5644               "mode", "start",
5645               "wait_for_sync", "ip_check", "nics",
5646               "hvparams", "beparams"]
5647   REQ_BGL = False
5648
5649   def CheckArguments(self):
5650     """Check arguments.
5651
5652     """
5653     # do not require name_check to ease forward/backward compatibility
5654     # for tools
5655     if not hasattr(self.op, "name_check"):
5656       self.op.name_check = True
5657     if self.op.ip_check and not self.op.name_check:
5658       # TODO: make the ip check more flexible and not depend on the name check
5659       raise errors.OpPrereqError("Cannot do ip checks without a name check",
5660                                  errors.ECODE_INVAL)
5661
5662   def _ExpandNode(self, node):
5663     """Expands and checks one node name.
5664
5665     """
5666     node_full = self.cfg.ExpandNodeName(node)
5667     if node_full is None:
5668       raise errors.OpPrereqError("Unknown node %s" % node, errors.ECODE_NOENT)
5669     return node_full
5670
5671   def ExpandNames(self):
5672     """ExpandNames for CreateInstance.
5673
5674     Figure out the right locks for instance creation.
5675
5676     """
5677     self.needed_locks = {}
5678
5679     # set optional parameters to none if they don't exist
5680     for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5681       if not hasattr(self.op, attr):
5682         setattr(self.op, attr, None)
5683
5684     # cheap checks, mostly valid constants given
5685
5686     # verify creation mode
5687     if self.op.mode not in (constants.INSTANCE_CREATE,
5688                             constants.INSTANCE_IMPORT):
5689       raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5690                                  self.op.mode, errors.ECODE_INVAL)
5691
5692     # disk template and mirror node verification
5693     if self.op.disk_template not in constants.DISK_TEMPLATES:
5694       raise errors.OpPrereqError("Invalid disk template name",
5695                                  errors.ECODE_INVAL)
5696
5697     if self.op.hypervisor is None:
5698       self.op.hypervisor = self.cfg.GetHypervisorType()
5699
5700     cluster = self.cfg.GetClusterInfo()
5701     enabled_hvs = cluster.enabled_hypervisors
5702     if self.op.hypervisor not in enabled_hvs:
5703       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5704                                  " cluster (%s)" % (self.op.hypervisor,
5705                                   ",".join(enabled_hvs)),
5706                                  errors.ECODE_STATE)
5707
5708     # check hypervisor parameter syntax (locally)
5709     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5710     filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5711                                   self.op.hvparams)
5712     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5713     hv_type.CheckParameterSyntax(filled_hvp)
5714     self.hv_full = filled_hvp
5715     # check that we don't specify global parameters on an instance
5716     _CheckGlobalHvParams(self.op.hvparams)
5717
5718     # fill and remember the beparams dict
5719     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5720     self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5721                                     self.op.beparams)
5722
5723     #### instance parameters check
5724
5725     # instance name verification
5726     if self.op.name_check:
5727       hostname1 = utils.GetHostInfo(self.op.instance_name)
5728       self.op.instance_name = instance_name = hostname1.name
5729       # used in CheckPrereq for ip ping check
5730       self.check_ip = hostname1.ip
5731     else:
5732       instance_name = self.op.instance_name
5733       self.check_ip = None
5734
5735     # this is just a preventive check, but someone might still add this
5736     # instance in the meantime, and creation will fail at lock-add time
5737     if instance_name in self.cfg.GetInstanceList():
5738       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5739                                  instance_name, errors.ECODE_EXISTS)
5740
5741     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5742
5743     # NIC buildup
5744     self.nics = []
5745     for idx, nic in enumerate(self.op.nics):
5746       nic_mode_req = nic.get("mode", None)
5747       nic_mode = nic_mode_req
5748       if nic_mode is None:
5749         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5750
5751       # in routed mode, for the first nic, the default ip is 'auto'
5752       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5753         default_ip_mode = constants.VALUE_AUTO
5754       else:
5755         default_ip_mode = constants.VALUE_NONE
5756
5757       # ip validity checks
5758       ip = nic.get("ip", default_ip_mode)
5759       if ip is None or ip.lower() == constants.VALUE_NONE:
5760         nic_ip = None
5761       elif ip.lower() == constants.VALUE_AUTO:
5762         if not self.op.name_check:
5763           raise errors.OpPrereqError("IP address set to auto but name checks"
5764                                      " have been skipped. Aborting.",
5765                                      errors.ECODE_INVAL)
5766         nic_ip = hostname1.ip
5767       else:
5768         if not utils.IsValidIP(ip):
5769           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5770                                      " like a valid IP" % ip,
5771                                      errors.ECODE_INVAL)
5772         nic_ip = ip
5773
5774       # TODO: check the ip address for uniqueness
5775       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5776         raise errors.OpPrereqError("Routed nic mode requires an ip address",
5777                                    errors.ECODE_INVAL)
5778
5779       # MAC address verification
5780       mac = nic.get("mac", constants.VALUE_AUTO)
5781       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5782         mac = utils.NormalizeAndValidateMac(mac)
5783
5784         try:
5785           self.cfg.ReserveMAC(mac, self.proc.GetECId())
5786         except errors.ReservationError:
5787           raise errors.OpPrereqError("MAC address %s already in use"
5788                                      " in cluster" % mac,
5789                                      errors.ECODE_NOTUNIQUE)
5790
5791       # bridge verification
5792       bridge = nic.get("bridge", None)
5793       link = nic.get("link", None)
5794       if bridge and link:
5795         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5796                                    " at the same time", errors.ECODE_INVAL)
5797       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5798         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
5799                                    errors.ECODE_INVAL)
5800       elif bridge:
5801         link = bridge
5802
5803       nicparams = {}
5804       if nic_mode_req:
5805         nicparams[constants.NIC_MODE] = nic_mode_req
5806       if link:
5807         nicparams[constants.NIC_LINK] = link
5808
5809       check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5810                                       nicparams)
5811       objects.NIC.CheckParameterSyntax(check_params)
5812       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5813
5814     # disk checks/pre-build
5815     self.disks = []
5816     for disk in self.op.disks:
5817       mode = disk.get("mode", constants.DISK_RDWR)
5818       if mode not in constants.DISK_ACCESS_SET:
5819         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5820                                    mode, errors.ECODE_INVAL)
5821       size = disk.get("size", None)
5822       if size is None:
5823         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
5824       try:
5825         size = int(size)
5826       except (TypeError, ValueError):
5827         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
5828                                    errors.ECODE_INVAL)
5829       self.disks.append({"size": size, "mode": mode})
5830
5831     # file storage checks
5832     if (self.op.file_driver and
5833         not self.op.file_driver in constants.FILE_DRIVER):
5834       raise errors.OpPrereqError("Invalid file driver name '%s'" %
5835                                  self.op.file_driver, errors.ECODE_INVAL)
5836
5837     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5838       raise errors.OpPrereqError("File storage directory path not absolute",
5839                                  errors.ECODE_INVAL)
5840
5841     ### Node/iallocator related checks
5842     if [self.op.iallocator, self.op.pnode].count(None) != 1:
5843       raise errors.OpPrereqError("One and only one of iallocator and primary"
5844                                  " node must be given",
5845                                  errors.ECODE_INVAL)
5846
5847     if self.op.iallocator:
5848       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5849     else:
5850       self.op.pnode = self._ExpandNode(self.op.pnode)
5851       nodelist = [self.op.pnode]
5852       if self.op.snode is not None:
5853         self.op.snode = self._ExpandNode(self.op.snode)
5854         nodelist.append(self.op.snode)
5855       self.needed_locks[locking.LEVEL_NODE] = nodelist
5856
5857     # in case of import lock the source node too
5858     if self.op.mode == constants.INSTANCE_IMPORT:
5859       src_node = getattr(self.op, "src_node", None)
5860       src_path = getattr(self.op, "src_path", None)
5861
5862       if src_path is None:
5863         self.op.src_path = src_path = self.op.instance_name
5864
5865       if src_node is None:
5866         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5867         self.op.src_node = None
5868         if os.path.isabs(src_path):
5869           raise errors.OpPrereqError("Importing an instance from an absolute"
5870                                      " path requires a source node option.",
5871                                      errors.ECODE_INVAL)
5872       else:
5873         self.op.src_node = src_node = self._ExpandNode(src_node)
5874         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5875           self.needed_locks[locking.LEVEL_NODE].append(src_node)
5876         if not os.path.isabs(src_path):
5877           self.op.src_path = src_path = \
5878             os.path.join(constants.EXPORT_DIR, src_path)
5879
5880       # On import force_variant must be True, because if we forced it at
5881       # initial install, our only chance when importing it back is that it
5882       # works again!
5883       self.op.force_variant = True
5884
5885     else: # INSTANCE_CREATE
5886       if getattr(self.op, "os_type", None) is None:
5887         raise errors.OpPrereqError("No guest OS specified",
5888                                    errors.ECODE_INVAL)
5889       self.op.force_variant = getattr(self.op, "force_variant", False)
5890
5891   def _RunAllocator(self):
5892     """Run the allocator based on input opcode.
5893
5894     """
5895     nics = [n.ToDict() for n in self.nics]
5896     ial = IAllocator(self.cfg, self.rpc,
5897                      mode=constants.IALLOCATOR_MODE_ALLOC,
5898                      name=self.op.instance_name,
5899                      disk_template=self.op.disk_template,
5900                      tags=[],
5901                      os=self.op.os_type,
5902                      vcpus=self.be_full[constants.BE_VCPUS],
5903                      mem_size=self.be_full[constants.BE_MEMORY],
5904                      disks=self.disks,
5905                      nics=nics,
5906                      hypervisor=self.op.hypervisor,
5907                      )
5908
5909     ial.Run(self.op.iallocator)
5910
5911     if not ial.success:
5912       raise errors.OpPrereqError("Can't compute nodes using"
5913                                  " iallocator '%s': %s" %
5914                                  (self.op.iallocator, ial.info),
5915                                  errors.ECODE_NORES)
5916     if len(ial.nodes) != ial.required_nodes:
5917       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5918                                  " of nodes (%s), required %s" %
5919                                  (self.op.iallocator, len(ial.nodes),
5920                                   ial.required_nodes), errors.ECODE_FAULT)
5921     self.op.pnode = ial.nodes[0]
5922     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5923                  self.op.instance_name, self.op.iallocator,
5924                  utils.CommaJoin(ial.nodes))
5925     if ial.required_nodes == 2:
5926       self.op.snode = ial.nodes[1]
5927
5928   def BuildHooksEnv(self):
5929     """Build hooks env.
5930
5931     This runs on master, primary and secondary nodes of the instance.
5932
5933     """
5934     env = {
5935       "ADD_MODE": self.op.mode,
5936       }
5937     if self.op.mode == constants.INSTANCE_IMPORT:
5938       env["SRC_NODE"] = self.op.src_node
5939       env["SRC_PATH"] = self.op.src_path
5940       env["SRC_IMAGES"] = self.src_images
5941
5942     env.update(_BuildInstanceHookEnv(
5943       name=self.op.instance_name,
5944       primary_node=self.op.pnode,
5945       secondary_nodes=self.secondaries,
5946       status=self.op.start,
5947       os_type=self.op.os_type,
5948       memory=self.be_full[constants.BE_MEMORY],
5949       vcpus=self.be_full[constants.BE_VCPUS],
5950       nics=_NICListToTuple(self, self.nics),
5951       disk_template=self.op.disk_template,
5952       disks=[(d["size"], d["mode"]) for d in self.disks],
5953       bep=self.be_full,
5954       hvp=self.hv_full,
5955       hypervisor_name=self.op.hypervisor,
5956     ))
5957
5958     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5959           self.secondaries)
5960     return env, nl, nl
5961
5962
5963   def CheckPrereq(self):
5964     """Check prerequisites.
5965
5966     """
5967     if (not self.cfg.GetVGName() and
5968         self.op.disk_template not in constants.DTS_NOT_LVM):
5969       raise errors.OpPrereqError("Cluster does not support lvm-based"
5970                                  " instances", errors.ECODE_STATE)
5971
5972     if self.op.mode == constants.INSTANCE_IMPORT:
5973       src_node = self.op.src_node
5974       src_path = self.op.src_path
5975
5976       if src_node is None:
5977         locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5978         exp_list = self.rpc.call_export_list(locked_nodes)
5979         found = False
5980         for node in exp_list:
5981           if exp_list[node].fail_msg:
5982             continue
5983           if src_path in exp_list[node].payload:
5984             found = True
5985             self.op.src_node = src_node = node
5986             self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5987                                                        src_path)
5988             break
5989         if not found:
5990           raise errors.OpPrereqError("No export found for relative path %s" %
5991                                       src_path, errors.ECODE_INVAL)
5992
5993       _CheckNodeOnline(self, src_node)
5994       result = self.rpc.call_export_info(src_node, src_path)
5995       result.Raise("No export or invalid export found in dir %s" % src_path)
5996
5997       export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5998       if not export_info.has_section(constants.INISECT_EXP):
5999         raise errors.ProgrammerError("Corrupted export config",
6000                                      errors.ECODE_ENVIRON)
6001
6002       ei_version = export_info.get(constants.INISECT_EXP, 'version')
6003       if (int(ei_version) != constants.EXPORT_VERSION):
6004         raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6005                                    (ei_version, constants.EXPORT_VERSION),
6006                                    errors.ECODE_ENVIRON)
6007
6008       # Check that the new instance doesn't have less disks than the export
6009       instance_disks = len(self.disks)
6010       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6011       if instance_disks < export_disks:
6012         raise errors.OpPrereqError("Not enough disks to import."
6013                                    " (instance: %d, export: %d)" %
6014                                    (instance_disks, export_disks),
6015                                    errors.ECODE_INVAL)
6016
6017       self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6018       disk_images = []
6019       for idx in range(export_disks):
6020         option = 'disk%d_dump' % idx
6021         if export_info.has_option(constants.INISECT_INS, option):
6022           # FIXME: are the old os-es, disk sizes, etc. useful?
6023           export_name = export_info.get(constants.INISECT_INS, option)
6024           image = os.path.join(src_path, export_name)
6025           disk_images.append(image)
6026         else:
6027           disk_images.append(False)
6028
6029       self.src_images = disk_images
6030
6031       old_name = export_info.get(constants.INISECT_INS, 'name')
6032       # FIXME: int() here could throw a ValueError on broken exports
6033       exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
6034       if self.op.instance_name == old_name:
6035         for idx, nic in enumerate(self.nics):
6036           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6037             nic_mac_ini = 'nic%d_mac' % idx
6038             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6039
6040     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6041
6042     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6043     if self.op.ip_check:
6044       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6045         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6046                                    (self.check_ip, self.op.instance_name),
6047                                    errors.ECODE_NOTUNIQUE)
6048
6049     #### mac address generation
6050     # By generating here the mac address both the allocator and the hooks get
6051     # the real final mac address rather than the 'auto' or 'generate' value.
6052     # There is a race condition between the generation and the instance object
6053     # creation, which means that we know the mac is valid now, but we're not
6054     # sure it will be when we actually add the instance. If things go bad
6055     # adding the instance will abort because of a duplicate mac, and the
6056     # creation job will fail.
6057     for nic in self.nics:
6058       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6059         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6060
6061     #### allocator run
6062
6063     if self.op.iallocator is not None:
6064       self._RunAllocator()
6065
6066     #### node related checks
6067
6068     # check primary node
6069     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6070     assert self.pnode is not None, \
6071       "Cannot retrieve locked node %s" % self.op.pnode
6072     if pnode.offline:
6073       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6074                                  pnode.name, errors.ECODE_STATE)
6075     if pnode.drained:
6076       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6077                                  pnode.name, errors.ECODE_STATE)
6078
6079     self.secondaries = []
6080
6081     # mirror node verification
6082     if self.op.disk_template in constants.DTS_NET_MIRROR:
6083       if self.op.snode is None:
6084         raise errors.OpPrereqError("The networked disk templates need"
6085                                    " a mirror node", errors.ECODE_INVAL)
6086       if self.op.snode == pnode.name:
6087         raise errors.OpPrereqError("The secondary node cannot be the"
6088                                    " primary node.", errors.ECODE_INVAL)
6089       _CheckNodeOnline(self, self.op.snode)
6090       _CheckNodeNotDrained(self, self.op.snode)
6091       self.secondaries.append(self.op.snode)
6092
6093     nodenames = [pnode.name] + self.secondaries
6094
6095     req_size = _ComputeDiskSize(self.op.disk_template,
6096                                 self.disks)
6097
6098     # Check lv size requirements
6099     if req_size is not None:
6100       nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6101                                          self.op.hypervisor)
6102       for node in nodenames:
6103         info = nodeinfo[node]
6104         info.Raise("Cannot get current information from node %s" % node)
6105         info = info.payload
6106         vg_free = info.get('vg_free', None)
6107         if not isinstance(vg_free, int):
6108           raise errors.OpPrereqError("Can't compute free disk space on"
6109                                      " node %s" % node, errors.ECODE_ENVIRON)
6110         if req_size > vg_free:
6111           raise errors.OpPrereqError("Not enough disk space on target node %s."
6112                                      " %d MB available, %d MB required" %
6113                                      (node, vg_free, req_size),
6114                                      errors.ECODE_NORES)
6115
6116     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6117
6118     # os verification
6119     result = self.rpc.call_os_get(pnode.name, self.op.os_type)
6120     result.Raise("OS '%s' not in supported os list for primary node %s" %
6121                  (self.op.os_type, pnode.name),
6122                  prereq=True, ecode=errors.ECODE_INVAL)
6123     if not self.op.force_variant:
6124       _CheckOSVariant(result.payload, self.op.os_type)
6125
6126     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6127
6128     # memory check on primary node
6129     if self.op.start:
6130       _CheckNodeFreeMemory(self, self.pnode.name,
6131                            "creating instance %s" % self.op.instance_name,
6132                            self.be_full[constants.BE_MEMORY],
6133                            self.op.hypervisor)
6134
6135     self.dry_run_result = list(nodenames)
6136
6137   def Exec(self, feedback_fn):
6138     """Create and add the instance to the cluster.
6139
6140     """
6141     instance = self.op.instance_name
6142     pnode_name = self.pnode.name
6143
6144     ht_kind = self.op.hypervisor
6145     if ht_kind in constants.HTS_REQ_PORT:
6146       network_port = self.cfg.AllocatePort()
6147     else:
6148       network_port = None
6149
6150     ##if self.op.vnc_bind_address is None:
6151     ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6152
6153     # this is needed because os.path.join does not accept None arguments
6154     if self.op.file_storage_dir is None:
6155       string_file_storage_dir = ""
6156     else:
6157       string_file_storage_dir = self.op.file_storage_dir
6158
6159     # build the full file storage dir path
6160     file_storage_dir = os.path.normpath(os.path.join(
6161                                         self.cfg.GetFileStorageDir(),
6162                                         string_file_storage_dir, instance))
6163
6164
6165     disks = _GenerateDiskTemplate(self,
6166                                   self.op.disk_template,
6167                                   instance, pnode_name,
6168                                   self.secondaries,
6169                                   self.disks,
6170                                   file_storage_dir,
6171                                   self.op.file_driver,
6172                                   0)
6173
6174     iobj = objects.Instance(name=instance, os=self.op.os_type,
6175                             primary_node=pnode_name,
6176                             nics=self.nics, disks=disks,
6177                             disk_template=self.op.disk_template,
6178                             admin_up=False,
6179                             network_port=network_port,
6180                             beparams=self.op.beparams,
6181                             hvparams=self.op.hvparams,
6182                             hypervisor=self.op.hypervisor,
6183                             )
6184
6185     feedback_fn("* creating instance disks...")
6186     try:
6187       _CreateDisks(self, iobj)
6188     except errors.OpExecError:
6189       self.LogWarning("Device creation failed, reverting...")
6190       try:
6191         _RemoveDisks(self, iobj)
6192       finally:
6193         self.cfg.ReleaseDRBDMinors(instance)
6194         raise
6195
6196     feedback_fn("adding instance %s to cluster config" % instance)
6197
6198     self.cfg.AddInstance(iobj, self.proc.GetECId())
6199
6200     # Declare that we don't want to remove the instance lock anymore, as we've
6201     # added the instance to the config
6202     del self.remove_locks[locking.LEVEL_INSTANCE]
6203     # Unlock all the nodes
6204     if self.op.mode == constants.INSTANCE_IMPORT:
6205       nodes_keep = [self.op.src_node]
6206       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6207                        if node != self.op.src_node]
6208       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6209       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6210     else:
6211       self.context.glm.release(locking.LEVEL_NODE)
6212       del self.acquired_locks[locking.LEVEL_NODE]
6213
6214     if self.op.wait_for_sync:
6215       disk_abort = not _WaitForSync(self, iobj)
6216     elif iobj.disk_template in constants.DTS_NET_MIRROR:
6217       # make sure the disks are not degraded (still sync-ing is ok)
6218       time.sleep(15)
6219       feedback_fn("* checking mirrors status")
6220       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6221     else:
6222       disk_abort = False
6223
6224     if disk_abort:
6225       _RemoveDisks(self, iobj)
6226       self.cfg.RemoveInstance(iobj.name)
6227       # Make sure the instance lock gets removed
6228       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6229       raise errors.OpExecError("There are some degraded disks for"
6230                                " this instance")
6231
6232     feedback_fn("creating os for instance %s on node %s" %
6233                 (instance, pnode_name))
6234
6235     if iobj.disk_template != constants.DT_DISKLESS:
6236       if self.op.mode == constants.INSTANCE_CREATE:
6237         feedback_fn("* running the instance OS create scripts...")
6238         # FIXME: pass debug option from opcode to backend
6239         result = self.rpc.call_instance_os_add(pnode_name, iobj, False, 0)
6240         result.Raise("Could not add os for instance %s"
6241                      " on node %s" % (instance, pnode_name))
6242
6243       elif self.op.mode == constants.INSTANCE_IMPORT:
6244         feedback_fn("* running the instance OS import scripts...")
6245         src_node = self.op.src_node
6246         src_images = self.src_images
6247         cluster_name = self.cfg.GetClusterName()
6248         # FIXME: pass debug option from opcode to backend
6249         import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6250                                                          src_node, src_images,
6251                                                          cluster_name, 0)
6252         msg = import_result.fail_msg
6253         if msg:
6254           self.LogWarning("Error while importing the disk images for instance"
6255                           " %s on node %s: %s" % (instance, pnode_name, msg))
6256       else:
6257         # also checked in the prereq part
6258         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6259                                      % self.op.mode)
6260
6261     if self.op.start:
6262       iobj.admin_up = True
6263       self.cfg.Update(iobj, feedback_fn)
6264       logging.info("Starting instance %s on node %s", instance, pnode_name)
6265       feedback_fn("* starting instance...")
6266       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6267       result.Raise("Could not start instance")
6268
6269     return list(iobj.all_nodes)
6270
6271
6272 class LUConnectConsole(NoHooksLU):
6273   """Connect to an instance's console.
6274
6275   This is somewhat special in that it returns the command line that
6276   you need to run on the master node in order to connect to the
6277   console.
6278
6279   """
6280   _OP_REQP = ["instance_name"]
6281   REQ_BGL = False
6282
6283   def ExpandNames(self):
6284     self._ExpandAndLockInstance()
6285
6286   def CheckPrereq(self):
6287     """Check prerequisites.
6288
6289     This checks that the instance is in the cluster.
6290
6291     """
6292     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6293     assert self.instance is not None, \
6294       "Cannot retrieve locked instance %s" % self.op.instance_name
6295     _CheckNodeOnline(self, self.instance.primary_node)
6296
6297   def Exec(self, feedback_fn):
6298     """Connect to the console of an instance
6299
6300     """
6301     instance = self.instance
6302     node = instance.primary_node
6303
6304     node_insts = self.rpc.call_instance_list([node],
6305                                              [instance.hypervisor])[node]
6306     node_insts.Raise("Can't get node information from %s" % node)
6307
6308     if instance.name not in node_insts.payload:
6309       raise errors.OpExecError("Instance %s is not running." % instance.name)
6310
6311     logging.debug("Connecting to console of %s on %s", instance.name, node)
6312
6313     hyper = hypervisor.GetHypervisor(instance.hypervisor)
6314     cluster = self.cfg.GetClusterInfo()
6315     # beparams and hvparams are passed separately, to avoid editing the
6316     # instance and then saving the defaults in the instance itself.
6317     hvparams = cluster.FillHV(instance)
6318     beparams = cluster.FillBE(instance)
6319     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6320
6321     # build ssh cmdline
6322     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6323
6324
6325 class LUReplaceDisks(LogicalUnit):
6326   """Replace the disks of an instance.
6327
6328   """
6329   HPATH = "mirrors-replace"
6330   HTYPE = constants.HTYPE_INSTANCE
6331   _OP_REQP = ["instance_name", "mode", "disks"]
6332   REQ_BGL = False
6333
6334   def CheckArguments(self):
6335     if not hasattr(self.op, "remote_node"):
6336       self.op.remote_node = None
6337     if not hasattr(self.op, "iallocator"):
6338       self.op.iallocator = None
6339     if not hasattr(self.op, "early_release"):
6340       self.op.early_release = False
6341
6342     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6343                                   self.op.iallocator)
6344
6345   def ExpandNames(self):
6346     self._ExpandAndLockInstance()
6347
6348     if self.op.iallocator is not None:
6349       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6350
6351     elif self.op.remote_node is not None:
6352       remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6353       if remote_node is None:
6354         raise errors.OpPrereqError("Node '%s' not known" %
6355                                    self.op.remote_node, errors.ECODE_NOENT)
6356
6357       self.op.remote_node = remote_node
6358
6359       # Warning: do not remove the locking of the new secondary here
6360       # unless DRBD8.AddChildren is changed to work in parallel;
6361       # currently it doesn't since parallel invocations of
6362       # FindUnusedMinor will conflict
6363       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6364       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6365
6366     else:
6367       self.needed_locks[locking.LEVEL_NODE] = []
6368       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6369
6370     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6371                                    self.op.iallocator, self.op.remote_node,
6372                                    self.op.disks, False, self.op.early_release)
6373
6374     self.tasklets = [self.replacer]
6375
6376   def DeclareLocks(self, level):
6377     # If we're not already locking all nodes in the set we have to declare the
6378     # instance's primary/secondary nodes.
6379     if (level == locking.LEVEL_NODE and
6380         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6381       self._LockInstancesNodes()
6382
6383   def BuildHooksEnv(self):
6384     """Build hooks env.
6385
6386     This runs on the master, the primary and all the secondaries.
6387
6388     """
6389     instance = self.replacer.instance
6390     env = {
6391       "MODE": self.op.mode,
6392       "NEW_SECONDARY": self.op.remote_node,
6393       "OLD_SECONDARY": instance.secondary_nodes[0],
6394       }
6395     env.update(_BuildInstanceHookEnvByObject(self, instance))
6396     nl = [
6397       self.cfg.GetMasterNode(),
6398       instance.primary_node,
6399       ]
6400     if self.op.remote_node is not None:
6401       nl.append(self.op.remote_node)
6402     return env, nl, nl
6403
6404
6405 class LUEvacuateNode(LogicalUnit):
6406   """Relocate the secondary instances from a node.
6407
6408   """
6409   HPATH = "node-evacuate"
6410   HTYPE = constants.HTYPE_NODE
6411   _OP_REQP = ["node_name"]
6412   REQ_BGL = False
6413
6414   def CheckArguments(self):
6415     if not hasattr(self.op, "remote_node"):
6416       self.op.remote_node = None
6417     if not hasattr(self.op, "iallocator"):
6418       self.op.iallocator = None
6419     if not hasattr(self.op, "early_release"):
6420       self.op.early_release = False
6421
6422     TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6423                                   self.op.remote_node,
6424                                   self.op.iallocator)
6425
6426   def ExpandNames(self):
6427     self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
6428     if self.op.node_name is None:
6429       raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name,
6430                                  errors.ECODE_NOENT)
6431
6432     self.needed_locks = {}
6433
6434     # Declare node locks
6435     if self.op.iallocator is not None:
6436       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6437
6438     elif self.op.remote_node is not None:
6439       remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6440       if remote_node is None:
6441         raise errors.OpPrereqError("Node '%s' not known" %
6442                                    self.op.remote_node, errors.ECODE_NOENT)
6443
6444       self.op.remote_node = remote_node
6445
6446       # Warning: do not remove the locking of the new secondary here
6447       # unless DRBD8.AddChildren is changed to work in parallel;
6448       # currently it doesn't since parallel invocations of
6449       # FindUnusedMinor will conflict
6450       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6451       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6452
6453     else:
6454       raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6455
6456     # Create tasklets for replacing disks for all secondary instances on this
6457     # node
6458     names = []
6459     tasklets = []
6460
6461     for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6462       logging.debug("Replacing disks for instance %s", inst.name)
6463       names.append(inst.name)
6464
6465       replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6466                                 self.op.iallocator, self.op.remote_node, [],
6467                                 True, self.op.early_release)
6468       tasklets.append(replacer)
6469
6470     self.tasklets = tasklets
6471     self.instance_names = names
6472
6473     # Declare instance locks
6474     self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6475
6476   def DeclareLocks(self, level):
6477     # If we're not already locking all nodes in the set we have to declare the
6478     # instance's primary/secondary nodes.
6479     if (level == locking.LEVEL_NODE and
6480         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6481       self._LockInstancesNodes()
6482
6483   def BuildHooksEnv(self):
6484     """Build hooks env.
6485
6486     This runs on the master, the primary and all the secondaries.
6487
6488     """
6489     env = {
6490       "NODE_NAME": self.op.node_name,
6491       }
6492
6493     nl = [self.cfg.GetMasterNode()]
6494
6495     if self.op.remote_node is not None:
6496       env["NEW_SECONDARY"] = self.op.remote_node
6497       nl.append(self.op.remote_node)
6498
6499     return (env, nl, nl)
6500
6501
6502 class TLReplaceDisks(Tasklet):
6503   """Replaces disks for an instance.
6504
6505   Note: Locking is not within the scope of this class.
6506
6507   """
6508   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6509                disks, delay_iallocator, early_release):
6510     """Initializes this class.
6511
6512     """
6513     Tasklet.__init__(self, lu)
6514
6515     # Parameters
6516     self.instance_name = instance_name
6517     self.mode = mode
6518     self.iallocator_name = iallocator_name
6519     self.remote_node = remote_node
6520     self.disks = disks
6521     self.delay_iallocator = delay_iallocator
6522     self.early_release = early_release
6523
6524     # Runtime data
6525     self.instance = None
6526     self.new_node = None
6527     self.target_node = None
6528     self.other_node = None
6529     self.remote_node_info = None
6530     self.node_secondary_ip = None
6531
6532   @staticmethod
6533   def CheckArguments(mode, remote_node, iallocator):
6534     """Helper function for users of this class.
6535
6536     """
6537     # check for valid parameter combination
6538     if mode == constants.REPLACE_DISK_CHG:
6539       if remote_node is None and iallocator is None:
6540         raise errors.OpPrereqError("When changing the secondary either an"
6541                                    " iallocator script must be used or the"
6542                                    " new node given", errors.ECODE_INVAL)
6543
6544       if remote_node is not None and iallocator is not None:
6545         raise errors.OpPrereqError("Give either the iallocator or the new"
6546                                    " secondary, not both", errors.ECODE_INVAL)
6547
6548     elif remote_node is not None or iallocator is not None:
6549       # Not replacing the secondary
6550       raise errors.OpPrereqError("The iallocator and new node options can"
6551                                  " only be used when changing the"
6552                                  " secondary node", errors.ECODE_INVAL)
6553
6554   @staticmethod
6555   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6556     """Compute a new secondary node using an IAllocator.
6557
6558     """
6559     ial = IAllocator(lu.cfg, lu.rpc,
6560                      mode=constants.IALLOCATOR_MODE_RELOC,
6561                      name=instance_name,
6562                      relocate_from=relocate_from)
6563
6564     ial.Run(iallocator_name)
6565
6566     if not ial.success:
6567       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6568                                  " %s" % (iallocator_name, ial.info),
6569                                  errors.ECODE_NORES)
6570
6571     if len(ial.nodes) != ial.required_nodes:
6572       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6573                                  " of nodes (%s), required %s" %
6574                                  (iallocator_name,
6575                                   len(ial.nodes), ial.required_nodes),
6576                                  errors.ECODE_FAULT)
6577
6578     remote_node_name = ial.nodes[0]
6579
6580     lu.LogInfo("Selected new secondary for instance '%s': %s",
6581                instance_name, remote_node_name)
6582
6583     return remote_node_name
6584
6585   def _FindFaultyDisks(self, node_name):
6586     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6587                                     node_name, True)
6588
6589   def CheckPrereq(self):
6590     """Check prerequisites.
6591
6592     This checks that the instance is in the cluster.
6593
6594     """
6595     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6596     assert instance is not None, \
6597       "Cannot retrieve locked instance %s" % self.instance_name
6598
6599     if instance.disk_template != constants.DT_DRBD8:
6600       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6601                                  " instances", errors.ECODE_INVAL)
6602
6603     if len(instance.secondary_nodes) != 1:
6604       raise errors.OpPrereqError("The instance has a strange layout,"
6605                                  " expected one secondary but found %d" %
6606                                  len(instance.secondary_nodes),
6607                                  errors.ECODE_FAULT)
6608
6609     if not self.delay_iallocator:
6610       self._CheckPrereq2()
6611
6612   def _CheckPrereq2(self):
6613     """Check prerequisites, second part.
6614
6615     This function should always be part of CheckPrereq. It was separated and is
6616     now called from Exec because during node evacuation iallocator was only
6617     called with an unmodified cluster model, not taking planned changes into
6618     account.
6619
6620     """
6621     instance = self.instance
6622     secondary_node = instance.secondary_nodes[0]
6623
6624     if self.iallocator_name is None:
6625       remote_node = self.remote_node
6626     else:
6627       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6628                                        instance.name, instance.secondary_nodes)
6629
6630     if remote_node is not None:
6631       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6632       assert self.remote_node_info is not None, \
6633         "Cannot retrieve locked node %s" % remote_node
6634     else:
6635       self.remote_node_info = None
6636
6637     if remote_node == self.instance.primary_node:
6638       raise errors.OpPrereqError("The specified node is the primary node of"
6639                                  " the instance.", errors.ECODE_INVAL)
6640
6641     if remote_node == secondary_node:
6642       raise errors.OpPrereqError("The specified node is already the"
6643                                  " secondary node of the instance.",
6644                                  errors.ECODE_INVAL)
6645
6646     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6647                                     constants.REPLACE_DISK_CHG):
6648       raise errors.OpPrereqError("Cannot specify disks to be replaced",
6649                                  errors.ECODE_INVAL)
6650
6651     if self.mode == constants.REPLACE_DISK_AUTO:
6652       faulty_primary = self._FindFaultyDisks(instance.primary_node)
6653       faulty_secondary = self._FindFaultyDisks(secondary_node)
6654
6655       if faulty_primary and faulty_secondary:
6656         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6657                                    " one node and can not be repaired"
6658                                    " automatically" % self.instance_name,
6659                                    errors.ECODE_STATE)
6660
6661       if faulty_primary:
6662         self.disks = faulty_primary
6663         self.target_node = instance.primary_node
6664         self.other_node = secondary_node
6665         check_nodes = [self.target_node, self.other_node]
6666       elif faulty_secondary:
6667         self.disks = faulty_secondary
6668         self.target_node = secondary_node
6669         self.other_node = instance.primary_node
6670         check_nodes = [self.target_node, self.other_node]
6671       else:
6672         self.disks = []
6673         check_nodes = []
6674
6675     else:
6676       # Non-automatic modes
6677       if self.mode == constants.REPLACE_DISK_PRI:
6678         self.target_node = instance.primary_node
6679         self.other_node = secondary_node
6680         check_nodes = [self.target_node, self.other_node]
6681
6682       elif self.mode == constants.REPLACE_DISK_SEC:
6683         self.target_node = secondary_node
6684         self.other_node = instance.primary_node
6685         check_nodes = [self.target_node, self.other_node]
6686
6687       elif self.mode == constants.REPLACE_DISK_CHG:
6688         self.new_node = remote_node
6689         self.other_node = instance.primary_node
6690         self.target_node = secondary_node
6691         check_nodes = [self.new_node, self.other_node]
6692
6693         _CheckNodeNotDrained(self.lu, remote_node)
6694
6695       else:
6696         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6697                                      self.mode)
6698
6699       # If not specified all disks should be replaced
6700       if not self.disks:
6701         self.disks = range(len(self.instance.disks))
6702
6703     for node in check_nodes:
6704       _CheckNodeOnline(self.lu, node)
6705
6706     # Check whether disks are valid
6707     for disk_idx in self.disks:
6708       instance.FindDisk(disk_idx)
6709
6710     # Get secondary node IP addresses
6711     node_2nd_ip = {}
6712
6713     for node_name in [self.target_node, self.other_node, self.new_node]:
6714       if node_name is not None:
6715         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6716
6717     self.node_secondary_ip = node_2nd_ip
6718
6719   def Exec(self, feedback_fn):
6720     """Execute disk replacement.
6721
6722     This dispatches the disk replacement to the appropriate handler.
6723
6724     """
6725     if self.delay_iallocator:
6726       self._CheckPrereq2()
6727
6728     if not self.disks:
6729       feedback_fn("No disks need replacement")
6730       return
6731
6732     feedback_fn("Replacing disk(s) %s for %s" %
6733                 (utils.CommaJoin(self.disks), self.instance.name))
6734
6735     activate_disks = (not self.instance.admin_up)
6736
6737     # Activate the instance disks if we're replacing them on a down instance
6738     if activate_disks:
6739       _StartInstanceDisks(self.lu, self.instance, True)
6740
6741     try:
6742       # Should we replace the secondary node?
6743       if self.new_node is not None:
6744         fn = self._ExecDrbd8Secondary
6745       else:
6746         fn = self._ExecDrbd8DiskOnly
6747
6748       return fn(feedback_fn)
6749
6750     finally:
6751       # Deactivate the instance disks if we're replacing them on a
6752       # down instance
6753       if activate_disks:
6754         _SafeShutdownInstanceDisks(self.lu, self.instance)
6755
6756   def _CheckVolumeGroup(self, nodes):
6757     self.lu.LogInfo("Checking volume groups")
6758
6759     vgname = self.cfg.GetVGName()
6760
6761     # Make sure volume group exists on all involved nodes
6762     results = self.rpc.call_vg_list(nodes)
6763     if not results:
6764       raise errors.OpExecError("Can't list volume groups on the nodes")
6765
6766     for node in nodes:
6767       res = results[node]
6768       res.Raise("Error checking node %s" % node)
6769       if vgname not in res.payload:
6770         raise errors.OpExecError("Volume group '%s' not found on node %s" %
6771                                  (vgname, node))
6772
6773   def _CheckDisksExistence(self, nodes):
6774     # Check disk existence
6775     for idx, dev in enumerate(self.instance.disks):
6776       if idx not in self.disks:
6777         continue
6778
6779       for node in nodes:
6780         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6781         self.cfg.SetDiskID(dev, node)
6782
6783         result = self.rpc.call_blockdev_find(node, dev)
6784
6785         msg = result.fail_msg
6786         if msg or not result.payload:
6787           if not msg:
6788             msg = "disk not found"
6789           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6790                                    (idx, node, msg))
6791
6792   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6793     for idx, dev in enumerate(self.instance.disks):
6794       if idx not in self.disks:
6795         continue
6796
6797       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6798                       (idx, node_name))
6799
6800       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6801                                    ldisk=ldisk):
6802         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6803                                  " replace disks for instance %s" %
6804                                  (node_name, self.instance.name))
6805
6806   def _CreateNewStorage(self, node_name):
6807     vgname = self.cfg.GetVGName()
6808     iv_names = {}
6809
6810     for idx, dev in enumerate(self.instance.disks):
6811       if idx not in self.disks:
6812         continue
6813
6814       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6815
6816       self.cfg.SetDiskID(dev, node_name)
6817
6818       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6819       names = _GenerateUniqueNames(self.lu, lv_names)
6820
6821       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6822                              logical_id=(vgname, names[0]))
6823       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6824                              logical_id=(vgname, names[1]))
6825
6826       new_lvs = [lv_data, lv_meta]
6827       old_lvs = dev.children
6828       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6829
6830       # we pass force_create=True to force the LVM creation
6831       for new_lv in new_lvs:
6832         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6833                         _GetInstanceInfoText(self.instance), False)
6834
6835     return iv_names
6836
6837   def _CheckDevices(self, node_name, iv_names):
6838     for name, (dev, _, _) in iv_names.iteritems():
6839       self.cfg.SetDiskID(dev, node_name)
6840
6841       result = self.rpc.call_blockdev_find(node_name, dev)
6842
6843       msg = result.fail_msg
6844       if msg or not result.payload:
6845         if not msg:
6846           msg = "disk not found"
6847         raise errors.OpExecError("Can't find DRBD device %s: %s" %
6848                                  (name, msg))
6849
6850       if result.payload.is_degraded:
6851         raise errors.OpExecError("DRBD device %s is degraded!" % name)
6852
6853   def _RemoveOldStorage(self, node_name, iv_names):
6854     for name, (_, old_lvs, _) in iv_names.iteritems():
6855       self.lu.LogInfo("Remove logical volumes for %s" % name)
6856
6857       for lv in old_lvs:
6858         self.cfg.SetDiskID(lv, node_name)
6859
6860         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6861         if msg:
6862           self.lu.LogWarning("Can't remove old LV: %s" % msg,
6863                              hint="remove unused LVs manually")
6864
6865   def _ReleaseNodeLock(self, node_name):
6866     """Releases the lock for a given node."""
6867     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
6868
6869   def _ExecDrbd8DiskOnly(self, feedback_fn):
6870     """Replace a disk on the primary or secondary for DRBD 8.
6871
6872     The algorithm for replace is quite complicated:
6873
6874       1. for each disk to be replaced:
6875
6876         1. create new LVs on the target node with unique names
6877         1. detach old LVs from the drbd device
6878         1. rename old LVs to name_replaced.<time_t>
6879         1. rename new LVs to old LVs
6880         1. attach the new LVs (with the old names now) to the drbd device
6881
6882       1. wait for sync across all devices
6883
6884       1. for each modified disk:
6885
6886         1. remove old LVs (which have the name name_replaces.<time_t>)
6887
6888     Failures are not very well handled.
6889
6890     """
6891     steps_total = 6
6892
6893     # Step: check device activation
6894     self.lu.LogStep(1, steps_total, "Check device existence")
6895     self._CheckDisksExistence([self.other_node, self.target_node])
6896     self._CheckVolumeGroup([self.target_node, self.other_node])
6897
6898     # Step: check other node consistency
6899     self.lu.LogStep(2, steps_total, "Check peer consistency")
6900     self._CheckDisksConsistency(self.other_node,
6901                                 self.other_node == self.instance.primary_node,
6902                                 False)
6903
6904     # Step: create new storage
6905     self.lu.LogStep(3, steps_total, "Allocate new storage")
6906     iv_names = self._CreateNewStorage(self.target_node)
6907
6908     # Step: for each lv, detach+rename*2+attach
6909     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6910     for dev, old_lvs, new_lvs in iv_names.itervalues():
6911       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6912
6913       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
6914                                                      old_lvs)
6915       result.Raise("Can't detach drbd from local storage on node"
6916                    " %s for device %s" % (self.target_node, dev.iv_name))
6917       #dev.children = []
6918       #cfg.Update(instance)
6919
6920       # ok, we created the new LVs, so now we know we have the needed
6921       # storage; as such, we proceed on the target node to rename
6922       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6923       # using the assumption that logical_id == physical_id (which in
6924       # turn is the unique_id on that node)
6925
6926       # FIXME(iustin): use a better name for the replaced LVs
6927       temp_suffix = int(time.time())
6928       ren_fn = lambda d, suff: (d.physical_id[0],
6929                                 d.physical_id[1] + "_replaced-%s" % suff)
6930
6931       # Build the rename list based on what LVs exist on the node
6932       rename_old_to_new = []
6933       for to_ren in old_lvs:
6934         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6935         if not result.fail_msg and result.payload:
6936           # device exists
6937           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6938
6939       self.lu.LogInfo("Renaming the old LVs on the target node")
6940       result = self.rpc.call_blockdev_rename(self.target_node,
6941                                              rename_old_to_new)
6942       result.Raise("Can't rename old LVs on node %s" % self.target_node)
6943
6944       # Now we rename the new LVs to the old LVs
6945       self.lu.LogInfo("Renaming the new LVs on the target node")
6946       rename_new_to_old = [(new, old.physical_id)
6947                            for old, new in zip(old_lvs, new_lvs)]
6948       result = self.rpc.call_blockdev_rename(self.target_node,
6949                                              rename_new_to_old)
6950       result.Raise("Can't rename new LVs on node %s" % self.target_node)
6951
6952       for old, new in zip(old_lvs, new_lvs):
6953         new.logical_id = old.logical_id
6954         self.cfg.SetDiskID(new, self.target_node)
6955
6956       for disk in old_lvs:
6957         disk.logical_id = ren_fn(disk, temp_suffix)
6958         self.cfg.SetDiskID(disk, self.target_node)
6959
6960       # Now that the new lvs have the old name, we can add them to the device
6961       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6962       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
6963                                                   new_lvs)
6964       msg = result.fail_msg
6965       if msg:
6966         for new_lv in new_lvs:
6967           msg2 = self.rpc.call_blockdev_remove(self.target_node,
6968                                                new_lv).fail_msg
6969           if msg2:
6970             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6971                                hint=("cleanup manually the unused logical"
6972                                      "volumes"))
6973         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6974
6975       dev.children = new_lvs
6976
6977       self.cfg.Update(self.instance, feedback_fn)
6978
6979     cstep = 5
6980     if self.early_release:
6981       self.lu.LogStep(cstep, steps_total, "Removing old storage")
6982       cstep += 1
6983       self._RemoveOldStorage(self.target_node, iv_names)
6984       # only release the lock if we're doing secondary replace, since
6985       # we use the primary node later
6986       if self.target_node != self.instance.primary_node:
6987         self._ReleaseNodeLock(self.target_node)
6988
6989     # Wait for sync
6990     # This can fail as the old devices are degraded and _WaitForSync
6991     # does a combined result over all disks, so we don't check its return value
6992     self.lu.LogStep(cstep, steps_total, "Sync devices")
6993     cstep += 1
6994     _WaitForSync(self.lu, self.instance)
6995
6996     # Check all devices manually
6997     self._CheckDevices(self.instance.primary_node, iv_names)
6998
6999     # Step: remove old storage
7000     if not self.early_release:
7001       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7002       cstep += 1
7003       self._RemoveOldStorage(self.target_node, iv_names)
7004
7005   def _ExecDrbd8Secondary(self, feedback_fn):
7006     """Replace the secondary node for DRBD 8.
7007
7008     The algorithm for replace is quite complicated:
7009       - for all disks of the instance:
7010         - create new LVs on the new node with same names
7011         - shutdown the drbd device on the old secondary
7012         - disconnect the drbd network on the primary
7013         - create the drbd device on the new secondary
7014         - network attach the drbd on the primary, using an artifice:
7015           the drbd code for Attach() will connect to the network if it
7016           finds a device which is connected to the good local disks but
7017           not network enabled
7018       - wait for sync across all devices
7019       - remove all disks from the old secondary
7020
7021     Failures are not very well handled.
7022
7023     """
7024     steps_total = 6
7025
7026     # Step: check device activation
7027     self.lu.LogStep(1, steps_total, "Check device existence")
7028     self._CheckDisksExistence([self.instance.primary_node])
7029     self._CheckVolumeGroup([self.instance.primary_node])
7030
7031     # Step: check other node consistency
7032     self.lu.LogStep(2, steps_total, "Check peer consistency")
7033     self._CheckDisksConsistency(self.instance.primary_node, True, True)
7034
7035     # Step: create new storage
7036     self.lu.LogStep(3, steps_total, "Allocate new storage")
7037     for idx, dev in enumerate(self.instance.disks):
7038       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7039                       (self.new_node, idx))
7040       # we pass force_create=True to force LVM creation
7041       for new_lv in dev.children:
7042         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7043                         _GetInstanceInfoText(self.instance), False)
7044
7045     # Step 4: dbrd minors and drbd setups changes
7046     # after this, we must manually remove the drbd minors on both the
7047     # error and the success paths
7048     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7049     minors = self.cfg.AllocateDRBDMinor([self.new_node
7050                                          for dev in self.instance.disks],
7051                                         self.instance.name)
7052     logging.debug("Allocated minors %r", minors)
7053
7054     iv_names = {}
7055     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7056       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7057                       (self.new_node, idx))
7058       # create new devices on new_node; note that we create two IDs:
7059       # one without port, so the drbd will be activated without
7060       # networking information on the new node at this stage, and one
7061       # with network, for the latter activation in step 4
7062       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7063       if self.instance.primary_node == o_node1:
7064         p_minor = o_minor1
7065       else:
7066         assert self.instance.primary_node == o_node2, "Three-node instance?"
7067         p_minor = o_minor2
7068
7069       new_alone_id = (self.instance.primary_node, self.new_node, None,
7070                       p_minor, new_minor, o_secret)
7071       new_net_id = (self.instance.primary_node, self.new_node, o_port,
7072                     p_minor, new_minor, o_secret)
7073
7074       iv_names[idx] = (dev, dev.children, new_net_id)
7075       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7076                     new_net_id)
7077       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7078                               logical_id=new_alone_id,
7079                               children=dev.children,
7080                               size=dev.size)
7081       try:
7082         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7083                               _GetInstanceInfoText(self.instance), False)
7084       except errors.GenericError:
7085         self.cfg.ReleaseDRBDMinors(self.instance.name)
7086         raise
7087
7088     # We have new devices, shutdown the drbd on the old secondary
7089     for idx, dev in enumerate(self.instance.disks):
7090       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7091       self.cfg.SetDiskID(dev, self.target_node)
7092       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7093       if msg:
7094         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7095                            "node: %s" % (idx, msg),
7096                            hint=("Please cleanup this device manually as"
7097                                  " soon as possible"))
7098
7099     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7100     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7101                                                self.node_secondary_ip,
7102                                                self.instance.disks)\
7103                                               [self.instance.primary_node]
7104
7105     msg = result.fail_msg
7106     if msg:
7107       # detaches didn't succeed (unlikely)
7108       self.cfg.ReleaseDRBDMinors(self.instance.name)
7109       raise errors.OpExecError("Can't detach the disks from the network on"
7110                                " old node: %s" % (msg,))
7111
7112     # if we managed to detach at least one, we update all the disks of
7113     # the instance to point to the new secondary
7114     self.lu.LogInfo("Updating instance configuration")
7115     for dev, _, new_logical_id in iv_names.itervalues():
7116       dev.logical_id = new_logical_id
7117       self.cfg.SetDiskID(dev, self.instance.primary_node)
7118
7119     self.cfg.Update(self.instance, feedback_fn)
7120
7121     # and now perform the drbd attach
7122     self.lu.LogInfo("Attaching primary drbds to new secondary"
7123                     " (standalone => connected)")
7124     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7125                                             self.new_node],
7126                                            self.node_secondary_ip,
7127                                            self.instance.disks,
7128                                            self.instance.name,
7129                                            False)
7130     for to_node, to_result in result.items():
7131       msg = to_result.fail_msg
7132       if msg:
7133         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7134                            to_node, msg,
7135                            hint=("please do a gnt-instance info to see the"
7136                                  " status of disks"))
7137     cstep = 5
7138     if self.early_release:
7139       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7140       cstep += 1
7141       self._RemoveOldStorage(self.target_node, iv_names)
7142       self._ReleaseNodeLock([self.target_node, self.new_node])
7143
7144     # Wait for sync
7145     # This can fail as the old devices are degraded and _WaitForSync
7146     # does a combined result over all disks, so we don't check its return value
7147     self.lu.LogStep(cstep, steps_total, "Sync devices")
7148     cstep += 1
7149     _WaitForSync(self.lu, self.instance)
7150
7151     # Check all devices manually
7152     self._CheckDevices(self.instance.primary_node, iv_names)
7153
7154     # Step: remove old storage
7155     if not self.early_release:
7156       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7157       self._RemoveOldStorage(self.target_node, iv_names)
7158
7159
7160 class LURepairNodeStorage(NoHooksLU):
7161   """Repairs the volume group on a node.
7162
7163   """
7164   _OP_REQP = ["node_name"]
7165   REQ_BGL = False
7166
7167   def CheckArguments(self):
7168     node_name = self.cfg.ExpandNodeName(self.op.node_name)
7169     if node_name is None:
7170       raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
7171                                  errors.ECODE_NOENT)
7172
7173     self.op.node_name = node_name
7174
7175   def ExpandNames(self):
7176     self.needed_locks = {
7177       locking.LEVEL_NODE: [self.op.node_name],
7178       }
7179
7180   def _CheckFaultyDisks(self, instance, node_name):
7181     """Ensure faulty disks abort the opcode or at least warn."""
7182     try:
7183       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7184                                   node_name, True):
7185         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7186                                    " node '%s'" % (instance.name, node_name),
7187                                    errors.ECODE_STATE)
7188     except errors.OpPrereqError, err:
7189       if self.op.ignore_consistency:
7190         self.proc.LogWarning(str(err.args[0]))
7191       else:
7192         raise
7193
7194   def CheckPrereq(self):
7195     """Check prerequisites.
7196
7197     """
7198     storage_type = self.op.storage_type
7199
7200     if (constants.SO_FIX_CONSISTENCY not in
7201         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7202       raise errors.OpPrereqError("Storage units of type '%s' can not be"
7203                                  " repaired" % storage_type,
7204                                  errors.ECODE_INVAL)
7205
7206     # Check whether any instance on this node has faulty disks
7207     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7208       if not inst.admin_up:
7209         continue
7210       check_nodes = set(inst.all_nodes)
7211       check_nodes.discard(self.op.node_name)
7212       for inst_node_name in check_nodes:
7213         self._CheckFaultyDisks(inst, inst_node_name)
7214
7215   def Exec(self, feedback_fn):
7216     feedback_fn("Repairing storage unit '%s' on %s ..." %
7217                 (self.op.name, self.op.node_name))
7218
7219     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7220     result = self.rpc.call_storage_execute(self.op.node_name,
7221                                            self.op.storage_type, st_args,
7222                                            self.op.name,
7223                                            constants.SO_FIX_CONSISTENCY)
7224     result.Raise("Failed to repair storage unit '%s' on %s" %
7225                  (self.op.name, self.op.node_name))
7226
7227
7228 class LUGrowDisk(LogicalUnit):
7229   """Grow a disk of an instance.
7230
7231   """
7232   HPATH = "disk-grow"
7233   HTYPE = constants.HTYPE_INSTANCE
7234   _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7235   REQ_BGL = False
7236
7237   def ExpandNames(self):
7238     self._ExpandAndLockInstance()
7239     self.needed_locks[locking.LEVEL_NODE] = []
7240     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7241
7242   def DeclareLocks(self, level):
7243     if level == locking.LEVEL_NODE:
7244       self._LockInstancesNodes()
7245
7246   def BuildHooksEnv(self):
7247     """Build hooks env.
7248
7249     This runs on the master, the primary and all the secondaries.
7250
7251     """
7252     env = {
7253       "DISK": self.op.disk,
7254       "AMOUNT": self.op.amount,
7255       }
7256     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7257     nl = [
7258       self.cfg.GetMasterNode(),
7259       self.instance.primary_node,
7260       ]
7261     return env, nl, nl
7262
7263   def CheckPrereq(self):
7264     """Check prerequisites.
7265
7266     This checks that the instance is in the cluster.
7267
7268     """
7269     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7270     assert instance is not None, \
7271       "Cannot retrieve locked instance %s" % self.op.instance_name
7272     nodenames = list(instance.all_nodes)
7273     for node in nodenames:
7274       _CheckNodeOnline(self, node)
7275
7276
7277     self.instance = instance
7278
7279     if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
7280       raise errors.OpPrereqError("Instance's disk layout does not support"
7281                                  " growing.", errors.ECODE_INVAL)
7282
7283     self.disk = instance.FindDisk(self.op.disk)
7284
7285     nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
7286                                        instance.hypervisor)
7287     for node in nodenames:
7288       info = nodeinfo[node]
7289       info.Raise("Cannot get current information from node %s" % node)
7290       vg_free = info.payload.get('vg_free', None)
7291       if not isinstance(vg_free, int):
7292         raise errors.OpPrereqError("Can't compute free disk space on"
7293                                    " node %s" % node, errors.ECODE_ENVIRON)
7294       if self.op.amount > vg_free:
7295         raise errors.OpPrereqError("Not enough disk space on target node %s:"
7296                                    " %d MiB available, %d MiB required" %
7297                                    (node, vg_free, self.op.amount),
7298                                    errors.ECODE_NORES)
7299
7300   def Exec(self, feedback_fn):
7301     """Execute disk grow.
7302
7303     """
7304     instance = self.instance
7305     disk = self.disk
7306     for node in instance.all_nodes:
7307       self.cfg.SetDiskID(disk, node)
7308       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7309       result.Raise("Grow request failed to node %s" % node)
7310
7311       # TODO: Rewrite code to work properly
7312       # DRBD goes into sync mode for a short amount of time after executing the
7313       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7314       # calling "resize" in sync mode fails. Sleeping for a short amount of
7315       # time is a work-around.
7316       time.sleep(5)
7317
7318     disk.RecordGrow(self.op.amount)
7319     self.cfg.Update(instance, feedback_fn)
7320     if self.op.wait_for_sync:
7321       disk_abort = not _WaitForSync(self, instance)
7322       if disk_abort:
7323         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7324                              " status.\nPlease check the instance.")
7325
7326
7327 class LUQueryInstanceData(NoHooksLU):
7328   """Query runtime instance data.
7329
7330   """
7331   _OP_REQP = ["instances", "static"]
7332   REQ_BGL = False
7333
7334   def ExpandNames(self):
7335     self.needed_locks = {}
7336     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7337
7338     if not isinstance(self.op.instances, list):
7339       raise errors.OpPrereqError("Invalid argument type 'instances'",
7340                                  errors.ECODE_INVAL)
7341
7342     if self.op.instances:
7343       self.wanted_names = []
7344       for name in self.op.instances:
7345         full_name = self.cfg.ExpandInstanceName(name)
7346         if full_name is None:
7347           raise errors.OpPrereqError("Instance '%s' not known" % name,
7348                                      errors.ECODE_NOENT)
7349         self.wanted_names.append(full_name)
7350       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7351     else:
7352       self.wanted_names = None
7353       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7354
7355     self.needed_locks[locking.LEVEL_NODE] = []
7356     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7357
7358   def DeclareLocks(self, level):
7359     if level == locking.LEVEL_NODE:
7360       self._LockInstancesNodes()
7361
7362   def CheckPrereq(self):
7363     """Check prerequisites.
7364
7365     This only checks the optional instance list against the existing names.
7366
7367     """
7368     if self.wanted_names is None:
7369       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7370
7371     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7372                              in self.wanted_names]
7373     return
7374
7375   def _ComputeBlockdevStatus(self, node, instance_name, dev):
7376     """Returns the status of a block device
7377
7378     """
7379     if self.op.static or not node:
7380       return None
7381
7382     self.cfg.SetDiskID(dev, node)
7383
7384     result = self.rpc.call_blockdev_find(node, dev)
7385     if result.offline:
7386       return None
7387
7388     result.Raise("Can't compute disk status for %s" % instance_name)
7389
7390     status = result.payload
7391     if status is None:
7392       return None
7393
7394     return (status.dev_path, status.major, status.minor,
7395             status.sync_percent, status.estimated_time,
7396             status.is_degraded, status.ldisk_status)
7397
7398   def _ComputeDiskStatus(self, instance, snode, dev):
7399     """Compute block device status.
7400
7401     """
7402     if dev.dev_type in constants.LDS_DRBD:
7403       # we change the snode then (otherwise we use the one passed in)
7404       if dev.logical_id[0] == instance.primary_node:
7405         snode = dev.logical_id[1]
7406       else:
7407         snode = dev.logical_id[0]
7408
7409     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7410                                               instance.name, dev)
7411     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7412
7413     if dev.children:
7414       dev_children = [self._ComputeDiskStatus(instance, snode, child)
7415                       for child in dev.children]
7416     else:
7417       dev_children = []
7418
7419     data = {
7420       "iv_name": dev.iv_name,
7421       "dev_type": dev.dev_type,
7422       "logical_id": dev.logical_id,
7423       "physical_id": dev.physical_id,
7424       "pstatus": dev_pstatus,
7425       "sstatus": dev_sstatus,
7426       "children": dev_children,
7427       "mode": dev.mode,
7428       "size": dev.size,
7429       }
7430
7431     return data
7432
7433   def Exec(self, feedback_fn):
7434     """Gather and return data"""
7435     result = {}
7436
7437     cluster = self.cfg.GetClusterInfo()
7438
7439     for instance in self.wanted_instances:
7440       if not self.op.static:
7441         remote_info = self.rpc.call_instance_info(instance.primary_node,
7442                                                   instance.name,
7443                                                   instance.hypervisor)
7444         remote_info.Raise("Error checking node %s" % instance.primary_node)
7445         remote_info = remote_info.payload
7446         if remote_info and "state" in remote_info:
7447           remote_state = "up"
7448         else:
7449           remote_state = "down"
7450       else:
7451         remote_state = None
7452       if instance.admin_up:
7453         config_state = "up"
7454       else:
7455         config_state = "down"
7456
7457       disks = [self._ComputeDiskStatus(instance, None, device)
7458                for device in instance.disks]
7459
7460       idict = {
7461         "name": instance.name,
7462         "config_state": config_state,
7463         "run_state": remote_state,
7464         "pnode": instance.primary_node,
7465         "snodes": instance.secondary_nodes,
7466         "os": instance.os,
7467         # this happens to be the same format used for hooks
7468         "nics": _NICListToTuple(self, instance.nics),
7469         "disks": disks,
7470         "hypervisor": instance.hypervisor,
7471         "network_port": instance.network_port,
7472         "hv_instance": instance.hvparams,
7473         "hv_actual": cluster.FillHV(instance, skip_globals=True),
7474         "be_instance": instance.beparams,
7475         "be_actual": cluster.FillBE(instance),
7476         "serial_no": instance.serial_no,
7477         "mtime": instance.mtime,
7478         "ctime": instance.ctime,
7479         "uuid": instance.uuid,
7480         }
7481
7482       result[instance.name] = idict
7483
7484     return result
7485
7486
7487 class LUSetInstanceParams(LogicalUnit):
7488   """Modifies an instances's parameters.
7489
7490   """
7491   HPATH = "instance-modify"
7492   HTYPE = constants.HTYPE_INSTANCE
7493   _OP_REQP = ["instance_name"]
7494   REQ_BGL = False
7495
7496   def CheckArguments(self):
7497     if not hasattr(self.op, 'nics'):
7498       self.op.nics = []
7499     if not hasattr(self.op, 'disks'):
7500       self.op.disks = []
7501     if not hasattr(self.op, 'beparams'):
7502       self.op.beparams = {}
7503     if not hasattr(self.op, 'hvparams'):
7504       self.op.hvparams = {}
7505     self.op.force = getattr(self.op, "force", False)
7506     if not (self.op.nics or self.op.disks or
7507             self.op.hvparams or self.op.beparams):
7508       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7509
7510     if self.op.hvparams:
7511       _CheckGlobalHvParams(self.op.hvparams)
7512
7513     # Disk validation
7514     disk_addremove = 0
7515     for disk_op, disk_dict in self.op.disks:
7516       if disk_op == constants.DDM_REMOVE:
7517         disk_addremove += 1
7518         continue
7519       elif disk_op == constants.DDM_ADD:
7520         disk_addremove += 1
7521       else:
7522         if not isinstance(disk_op, int):
7523           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7524         if not isinstance(disk_dict, dict):
7525           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7526           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7527
7528       if disk_op == constants.DDM_ADD:
7529         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7530         if mode not in constants.DISK_ACCESS_SET:
7531           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7532                                      errors.ECODE_INVAL)
7533         size = disk_dict.get('size', None)
7534         if size is None:
7535           raise errors.OpPrereqError("Required disk parameter size missing",
7536                                      errors.ECODE_INVAL)
7537         try:
7538           size = int(size)
7539         except (TypeError, ValueError), err:
7540           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7541                                      str(err), errors.ECODE_INVAL)
7542         disk_dict['size'] = size
7543       else:
7544         # modification of disk
7545         if 'size' in disk_dict:
7546           raise errors.OpPrereqError("Disk size change not possible, use"
7547                                      " grow-disk", errors.ECODE_INVAL)
7548
7549     if disk_addremove > 1:
7550       raise errors.OpPrereqError("Only one disk add or remove operation"
7551                                  " supported at a time", errors.ECODE_INVAL)
7552
7553     # NIC validation
7554     nic_addremove = 0
7555     for nic_op, nic_dict in self.op.nics:
7556       if nic_op == constants.DDM_REMOVE:
7557         nic_addremove += 1
7558         continue
7559       elif nic_op == constants.DDM_ADD:
7560         nic_addremove += 1
7561       else:
7562         if not isinstance(nic_op, int):
7563           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
7564         if not isinstance(nic_dict, dict):
7565           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7566           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7567
7568       # nic_dict should be a dict
7569       nic_ip = nic_dict.get('ip', None)
7570       if nic_ip is not None:
7571         if nic_ip.lower() == constants.VALUE_NONE:
7572           nic_dict['ip'] = None
7573         else:
7574           if not utils.IsValidIP(nic_ip):
7575             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
7576                                        errors.ECODE_INVAL)
7577
7578       nic_bridge = nic_dict.get('bridge', None)
7579       nic_link = nic_dict.get('link', None)
7580       if nic_bridge and nic_link:
7581         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7582                                    " at the same time", errors.ECODE_INVAL)
7583       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7584         nic_dict['bridge'] = None
7585       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7586         nic_dict['link'] = None
7587
7588       if nic_op == constants.DDM_ADD:
7589         nic_mac = nic_dict.get('mac', None)
7590         if nic_mac is None:
7591           nic_dict['mac'] = constants.VALUE_AUTO
7592
7593       if 'mac' in nic_dict:
7594         nic_mac = nic_dict['mac']
7595         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7596           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
7597
7598         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7599           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7600                                      " modifying an existing nic",
7601                                      errors.ECODE_INVAL)
7602
7603     if nic_addremove > 1:
7604       raise errors.OpPrereqError("Only one NIC add or remove operation"
7605                                  " supported at a time", errors.ECODE_INVAL)
7606
7607   def ExpandNames(self):
7608     self._ExpandAndLockInstance()
7609     self.needed_locks[locking.LEVEL_NODE] = []
7610     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7611
7612   def DeclareLocks(self, level):
7613     if level == locking.LEVEL_NODE:
7614       self._LockInstancesNodes()
7615
7616   def BuildHooksEnv(self):
7617     """Build hooks env.
7618
7619     This runs on the master, primary and secondaries.
7620
7621     """
7622     args = dict()
7623     if constants.BE_MEMORY in self.be_new:
7624       args['memory'] = self.be_new[constants.BE_MEMORY]
7625     if constants.BE_VCPUS in self.be_new:
7626       args['vcpus'] = self.be_new[constants.BE_VCPUS]
7627     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7628     # information at all.
7629     if self.op.nics:
7630       args['nics'] = []
7631       nic_override = dict(self.op.nics)
7632       c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7633       for idx, nic in enumerate(self.instance.nics):
7634         if idx in nic_override:
7635           this_nic_override = nic_override[idx]
7636         else:
7637           this_nic_override = {}
7638         if 'ip' in this_nic_override:
7639           ip = this_nic_override['ip']
7640         else:
7641           ip = nic.ip
7642         if 'mac' in this_nic_override:
7643           mac = this_nic_override['mac']
7644         else:
7645           mac = nic.mac
7646         if idx in self.nic_pnew:
7647           nicparams = self.nic_pnew[idx]
7648         else:
7649           nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7650         mode = nicparams[constants.NIC_MODE]
7651         link = nicparams[constants.NIC_LINK]
7652         args['nics'].append((ip, mac, mode, link))
7653       if constants.DDM_ADD in nic_override:
7654         ip = nic_override[constants.DDM_ADD].get('ip', None)
7655         mac = nic_override[constants.DDM_ADD]['mac']
7656         nicparams = self.nic_pnew[constants.DDM_ADD]
7657         mode = nicparams[constants.NIC_MODE]
7658         link = nicparams[constants.NIC_LINK]
7659         args['nics'].append((ip, mac, mode, link))
7660       elif constants.DDM_REMOVE in nic_override:
7661         del args['nics'][-1]
7662
7663     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7664     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7665     return env, nl, nl
7666
7667   @staticmethod
7668   def _GetUpdatedParams(old_params, update_dict,
7669                         default_values, parameter_types):
7670     """Return the new params dict for the given params.
7671
7672     @type old_params: dict
7673     @param old_params: old parameters
7674     @type update_dict: dict
7675     @param update_dict: dict containing new parameter values,
7676                         or constants.VALUE_DEFAULT to reset the
7677                         parameter to its default value
7678     @type default_values: dict
7679     @param default_values: default values for the filled parameters
7680     @type parameter_types: dict
7681     @param parameter_types: dict mapping target dict keys to types
7682                             in constants.ENFORCEABLE_TYPES
7683     @rtype: (dict, dict)
7684     @return: (new_parameters, filled_parameters)
7685
7686     """
7687     params_copy = copy.deepcopy(old_params)
7688     for key, val in update_dict.iteritems():
7689       if val == constants.VALUE_DEFAULT:
7690         try:
7691           del params_copy[key]
7692         except KeyError:
7693           pass
7694       else:
7695         params_copy[key] = val
7696     utils.ForceDictType(params_copy, parameter_types)
7697     params_filled = objects.FillDict(default_values, params_copy)
7698     return (params_copy, params_filled)
7699
7700   def CheckPrereq(self):
7701     """Check prerequisites.
7702
7703     This only checks the instance list against the existing names.
7704
7705     """
7706     self.force = self.op.force
7707
7708     # checking the new params on the primary/secondary nodes
7709
7710     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7711     cluster = self.cluster = self.cfg.GetClusterInfo()
7712     assert self.instance is not None, \
7713       "Cannot retrieve locked instance %s" % self.op.instance_name
7714     pnode = instance.primary_node
7715     nodelist = list(instance.all_nodes)
7716
7717     # hvparams processing
7718     if self.op.hvparams:
7719       i_hvdict, hv_new = self._GetUpdatedParams(
7720                              instance.hvparams, self.op.hvparams,
7721                              cluster.hvparams[instance.hypervisor],
7722                              constants.HVS_PARAMETER_TYPES)
7723       # local check
7724       hypervisor.GetHypervisor(
7725         instance.hypervisor).CheckParameterSyntax(hv_new)
7726       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7727       self.hv_new = hv_new # the new actual values
7728       self.hv_inst = i_hvdict # the new dict (without defaults)
7729     else:
7730       self.hv_new = self.hv_inst = {}
7731
7732     # beparams processing
7733     if self.op.beparams:
7734       i_bedict, be_new = self._GetUpdatedParams(
7735                              instance.beparams, self.op.beparams,
7736                              cluster.beparams[constants.PP_DEFAULT],
7737                              constants.BES_PARAMETER_TYPES)
7738       self.be_new = be_new # the new actual values
7739       self.be_inst = i_bedict # the new dict (without defaults)
7740     else:
7741       self.be_new = self.be_inst = {}
7742
7743     self.warn = []
7744
7745     if constants.BE_MEMORY in self.op.beparams and not self.force:
7746       mem_check_list = [pnode]
7747       if be_new[constants.BE_AUTO_BALANCE]:
7748         # either we changed auto_balance to yes or it was from before
7749         mem_check_list.extend(instance.secondary_nodes)
7750       instance_info = self.rpc.call_instance_info(pnode, instance.name,
7751                                                   instance.hypervisor)
7752       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7753                                          instance.hypervisor)
7754       pninfo = nodeinfo[pnode]
7755       msg = pninfo.fail_msg
7756       if msg:
7757         # Assume the primary node is unreachable and go ahead
7758         self.warn.append("Can't get info from primary node %s: %s" %
7759                          (pnode,  msg))
7760       elif not isinstance(pninfo.payload.get('memory_free', None), int):
7761         self.warn.append("Node data from primary node %s doesn't contain"
7762                          " free memory information" % pnode)
7763       elif instance_info.fail_msg:
7764         self.warn.append("Can't get instance runtime information: %s" %
7765                         instance_info.fail_msg)
7766       else:
7767         if instance_info.payload:
7768           current_mem = int(instance_info.payload['memory'])
7769         else:
7770           # Assume instance not running
7771           # (there is a slight race condition here, but it's not very probable,
7772           # and we have no other way to check)
7773           current_mem = 0
7774         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7775                     pninfo.payload['memory_free'])
7776         if miss_mem > 0:
7777           raise errors.OpPrereqError("This change will prevent the instance"
7778                                      " from starting, due to %d MB of memory"
7779                                      " missing on its primary node" % miss_mem,
7780                                      errors.ECODE_NORES)
7781
7782       if be_new[constants.BE_AUTO_BALANCE]:
7783         for node, nres in nodeinfo.items():
7784           if node not in instance.secondary_nodes:
7785             continue
7786           msg = nres.fail_msg
7787           if msg:
7788             self.warn.append("Can't get info from secondary node %s: %s" %
7789                              (node, msg))
7790           elif not isinstance(nres.payload.get('memory_free', None), int):
7791             self.warn.append("Secondary node %s didn't return free"
7792                              " memory information" % node)
7793           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7794             self.warn.append("Not enough memory to failover instance to"
7795                              " secondary node %s" % node)
7796
7797     # NIC processing
7798     self.nic_pnew = {}
7799     self.nic_pinst = {}
7800     for nic_op, nic_dict in self.op.nics:
7801       if nic_op == constants.DDM_REMOVE:
7802         if not instance.nics:
7803           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
7804                                      errors.ECODE_INVAL)
7805         continue
7806       if nic_op != constants.DDM_ADD:
7807         # an existing nic
7808         if not instance.nics:
7809           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
7810                                      " no NICs" % nic_op,
7811                                      errors.ECODE_INVAL)
7812         if nic_op < 0 or nic_op >= len(instance.nics):
7813           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7814                                      " are 0 to %d" %
7815                                      (nic_op, len(instance.nics) - 1),
7816                                      errors.ECODE_INVAL)
7817         old_nic_params = instance.nics[nic_op].nicparams
7818         old_nic_ip = instance.nics[nic_op].ip
7819       else:
7820         old_nic_params = {}
7821         old_nic_ip = None
7822
7823       update_params_dict = dict([(key, nic_dict[key])
7824                                  for key in constants.NICS_PARAMETERS
7825                                  if key in nic_dict])
7826
7827       if 'bridge' in nic_dict:
7828         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7829
7830       new_nic_params, new_filled_nic_params = \
7831           self._GetUpdatedParams(old_nic_params, update_params_dict,
7832                                  cluster.nicparams[constants.PP_DEFAULT],
7833                                  constants.NICS_PARAMETER_TYPES)
7834       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7835       self.nic_pinst[nic_op] = new_nic_params
7836       self.nic_pnew[nic_op] = new_filled_nic_params
7837       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7838
7839       if new_nic_mode == constants.NIC_MODE_BRIDGED:
7840         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7841         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7842         if msg:
7843           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7844           if self.force:
7845             self.warn.append(msg)
7846           else:
7847             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
7848       if new_nic_mode == constants.NIC_MODE_ROUTED:
7849         if 'ip' in nic_dict:
7850           nic_ip = nic_dict['ip']
7851         else:
7852           nic_ip = old_nic_ip
7853         if nic_ip is None:
7854           raise errors.OpPrereqError('Cannot set the nic ip to None'
7855                                      ' on a routed nic', errors.ECODE_INVAL)
7856       if 'mac' in nic_dict:
7857         nic_mac = nic_dict['mac']
7858         if nic_mac is None:
7859           raise errors.OpPrereqError('Cannot set the nic mac to None',
7860                                      errors.ECODE_INVAL)
7861         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7862           # otherwise generate the mac
7863           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
7864         else:
7865           # or validate/reserve the current one
7866           try:
7867             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
7868           except errors.ReservationError:
7869             raise errors.OpPrereqError("MAC address %s already in use"
7870                                        " in cluster" % nic_mac,
7871                                        errors.ECODE_NOTUNIQUE)
7872
7873     # DISK processing
7874     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7875       raise errors.OpPrereqError("Disk operations not supported for"
7876                                  " diskless instances",
7877                                  errors.ECODE_INVAL)
7878     for disk_op, _ in self.op.disks:
7879       if disk_op == constants.DDM_REMOVE:
7880         if len(instance.disks) == 1:
7881           raise errors.OpPrereqError("Cannot remove the last disk of"
7882                                      " an instance",
7883                                      errors.ECODE_INVAL)
7884         ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7885         ins_l = ins_l[pnode]
7886         msg = ins_l.fail_msg
7887         if msg:
7888           raise errors.OpPrereqError("Can't contact node %s: %s" %
7889                                      (pnode, msg), errors.ECODE_ENVIRON)
7890         if instance.name in ins_l.payload:
7891           raise errors.OpPrereqError("Instance is running, can't remove"
7892                                      " disks.", errors.ECODE_STATE)
7893
7894       if (disk_op == constants.DDM_ADD and
7895           len(instance.nics) >= constants.MAX_DISKS):
7896         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7897                                    " add more" % constants.MAX_DISKS,
7898                                    errors.ECODE_STATE)
7899       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7900         # an existing disk
7901         if disk_op < 0 or disk_op >= len(instance.disks):
7902           raise errors.OpPrereqError("Invalid disk index %s, valid values"
7903                                      " are 0 to %d" %
7904                                      (disk_op, len(instance.disks)),
7905                                      errors.ECODE_INVAL)
7906
7907     return
7908
7909   def Exec(self, feedback_fn):
7910     """Modifies an instance.
7911
7912     All parameters take effect only at the next restart of the instance.
7913
7914     """
7915     # Process here the warnings from CheckPrereq, as we don't have a
7916     # feedback_fn there.
7917     for warn in self.warn:
7918       feedback_fn("WARNING: %s" % warn)
7919
7920     result = []
7921     instance = self.instance
7922     # disk changes
7923     for disk_op, disk_dict in self.op.disks:
7924       if disk_op == constants.DDM_REMOVE:
7925         # remove the last disk
7926         device = instance.disks.pop()
7927         device_idx = len(instance.disks)
7928         for node, disk in device.ComputeNodeTree(instance.primary_node):
7929           self.cfg.SetDiskID(disk, node)
7930           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7931           if msg:
7932             self.LogWarning("Could not remove disk/%d on node %s: %s,"
7933                             " continuing anyway", device_idx, node, msg)
7934         result.append(("disk/%d" % device_idx, "remove"))
7935       elif disk_op == constants.DDM_ADD:
7936         # add a new disk
7937         if instance.disk_template == constants.DT_FILE:
7938           file_driver, file_path = instance.disks[0].logical_id
7939           file_path = os.path.dirname(file_path)
7940         else:
7941           file_driver = file_path = None
7942         disk_idx_base = len(instance.disks)
7943         new_disk = _GenerateDiskTemplate(self,
7944                                          instance.disk_template,
7945                                          instance.name, instance.primary_node,
7946                                          instance.secondary_nodes,
7947                                          [disk_dict],
7948                                          file_path,
7949                                          file_driver,
7950                                          disk_idx_base)[0]
7951         instance.disks.append(new_disk)
7952         info = _GetInstanceInfoText(instance)
7953
7954         logging.info("Creating volume %s for instance %s",
7955                      new_disk.iv_name, instance.name)
7956         # Note: this needs to be kept in sync with _CreateDisks
7957         #HARDCODE
7958         for node in instance.all_nodes:
7959           f_create = node == instance.primary_node
7960           try:
7961             _CreateBlockDev(self, node, instance, new_disk,
7962                             f_create, info, f_create)
7963           except errors.OpExecError, err:
7964             self.LogWarning("Failed to create volume %s (%s) on"
7965                             " node %s: %s",
7966                             new_disk.iv_name, new_disk, node, err)
7967         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7968                        (new_disk.size, new_disk.mode)))
7969       else:
7970         # change a given disk
7971         instance.disks[disk_op].mode = disk_dict['mode']
7972         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7973     # NIC changes
7974     for nic_op, nic_dict in self.op.nics:
7975       if nic_op == constants.DDM_REMOVE:
7976         # remove the last nic
7977         del instance.nics[-1]
7978         result.append(("nic.%d" % len(instance.nics), "remove"))
7979       elif nic_op == constants.DDM_ADD:
7980         # mac and bridge should be set, by now
7981         mac = nic_dict['mac']
7982         ip = nic_dict.get('ip', None)
7983         nicparams = self.nic_pinst[constants.DDM_ADD]
7984         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7985         instance.nics.append(new_nic)
7986         result.append(("nic.%d" % (len(instance.nics) - 1),
7987                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
7988                        (new_nic.mac, new_nic.ip,
7989                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7990                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7991                        )))
7992       else:
7993         for key in 'mac', 'ip':
7994           if key in nic_dict:
7995             setattr(instance.nics[nic_op], key, nic_dict[key])
7996         if nic_op in self.nic_pinst:
7997           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
7998         for key, val in nic_dict.iteritems():
7999           result.append(("nic.%s/%d" % (key, nic_op), val))
8000
8001     # hvparams changes
8002     if self.op.hvparams:
8003       instance.hvparams = self.hv_inst
8004       for key, val in self.op.hvparams.iteritems():
8005         result.append(("hv/%s" % key, val))
8006
8007     # beparams changes
8008     if self.op.beparams:
8009       instance.beparams = self.be_inst
8010       for key, val in self.op.beparams.iteritems():
8011         result.append(("be/%s" % key, val))
8012
8013     self.cfg.Update(instance, feedback_fn)
8014
8015     return result
8016
8017
8018 class LUQueryExports(NoHooksLU):
8019   """Query the exports list
8020
8021   """
8022   _OP_REQP = ['nodes']
8023   REQ_BGL = False
8024
8025   def ExpandNames(self):
8026     self.needed_locks = {}
8027     self.share_locks[locking.LEVEL_NODE] = 1
8028     if not self.op.nodes:
8029       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8030     else:
8031       self.needed_locks[locking.LEVEL_NODE] = \
8032         _GetWantedNodes(self, self.op.nodes)
8033
8034   def CheckPrereq(self):
8035     """Check prerequisites.
8036
8037     """
8038     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8039
8040   def Exec(self, feedback_fn):
8041     """Compute the list of all the exported system images.
8042
8043     @rtype: dict
8044     @return: a dictionary with the structure node->(export-list)
8045         where export-list is a list of the instances exported on
8046         that node.
8047
8048     """
8049     rpcresult = self.rpc.call_export_list(self.nodes)
8050     result = {}
8051     for node in rpcresult:
8052       if rpcresult[node].fail_msg:
8053         result[node] = False
8054       else:
8055         result[node] = rpcresult[node].payload
8056
8057     return result
8058
8059
8060 class LUExportInstance(LogicalUnit):
8061   """Export an instance to an image in the cluster.
8062
8063   """
8064   HPATH = "instance-export"
8065   HTYPE = constants.HTYPE_INSTANCE
8066   _OP_REQP = ["instance_name", "target_node", "shutdown"]
8067   REQ_BGL = False
8068
8069   def CheckArguments(self):
8070     """Check the arguments.
8071
8072     """
8073     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8074                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
8075
8076   def ExpandNames(self):
8077     self._ExpandAndLockInstance()
8078     # FIXME: lock only instance primary and destination node
8079     #
8080     # Sad but true, for now we have do lock all nodes, as we don't know where
8081     # the previous export might be, and and in this LU we search for it and
8082     # remove it from its current node. In the future we could fix this by:
8083     #  - making a tasklet to search (share-lock all), then create the new one,
8084     #    then one to remove, after
8085     #  - removing the removal operation altogether
8086     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8087
8088   def DeclareLocks(self, level):
8089     """Last minute lock declaration."""
8090     # All nodes are locked anyway, so nothing to do here.
8091
8092   def BuildHooksEnv(self):
8093     """Build hooks env.
8094
8095     This will run on the master, primary node and target node.
8096
8097     """
8098     env = {
8099       "EXPORT_NODE": self.op.target_node,
8100       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8101       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8102       }
8103     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8104     nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8105           self.op.target_node]
8106     return env, nl, nl
8107
8108   def CheckPrereq(self):
8109     """Check prerequisites.
8110
8111     This checks that the instance and node names are valid.
8112
8113     """
8114     instance_name = self.op.instance_name
8115     self.instance = self.cfg.GetInstanceInfo(instance_name)
8116     assert self.instance is not None, \
8117           "Cannot retrieve locked instance %s" % self.op.instance_name
8118     _CheckNodeOnline(self, self.instance.primary_node)
8119
8120     self.dst_node = self.cfg.GetNodeInfo(
8121       self.cfg.ExpandNodeName(self.op.target_node))
8122
8123     if self.dst_node is None:
8124       # This is wrong node name, not a non-locked node
8125       raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node,
8126                                  errors.ECODE_NOENT)
8127     _CheckNodeOnline(self, self.dst_node.name)
8128     _CheckNodeNotDrained(self, self.dst_node.name)
8129
8130     # instance disk type verification
8131     for disk in self.instance.disks:
8132       if disk.dev_type == constants.LD_FILE:
8133         raise errors.OpPrereqError("Export not supported for instances with"
8134                                    " file-based disks", errors.ECODE_INVAL)
8135
8136   def Exec(self, feedback_fn):
8137     """Export an instance to an image in the cluster.
8138
8139     """
8140     instance = self.instance
8141     dst_node = self.dst_node
8142     src_node = instance.primary_node
8143
8144     if self.op.shutdown:
8145       # shutdown the instance, but not the disks
8146       feedback_fn("Shutting down instance %s" % instance.name)
8147       result = self.rpc.call_instance_shutdown(src_node, instance,
8148                                                self.shutdown_timeout)
8149       result.Raise("Could not shutdown instance %s on"
8150                    " node %s" % (instance.name, src_node))
8151
8152     vgname = self.cfg.GetVGName()
8153
8154     snap_disks = []
8155
8156     # set the disks ID correctly since call_instance_start needs the
8157     # correct drbd minor to create the symlinks
8158     for disk in instance.disks:
8159       self.cfg.SetDiskID(disk, src_node)
8160
8161     activate_disks = (not instance.admin_up)
8162
8163     if activate_disks:
8164       # Activate the instance disks if we'exporting a stopped instance
8165       feedback_fn("Activating disks for %s" % instance.name)
8166       _StartInstanceDisks(self, instance, None)
8167
8168     try:
8169       # per-disk results
8170       dresults = []
8171       try:
8172         for idx, disk in enumerate(instance.disks):
8173           feedback_fn("Creating a snapshot of disk/%s on node %s" %
8174                       (idx, src_node))
8175
8176           # result.payload will be a snapshot of an lvm leaf of the one we
8177           # passed
8178           result = self.rpc.call_blockdev_snapshot(src_node, disk)
8179           msg = result.fail_msg
8180           if msg:
8181             self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8182                             idx, src_node, msg)
8183             snap_disks.append(False)
8184           else:
8185             disk_id = (vgname, result.payload)
8186             new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8187                                    logical_id=disk_id, physical_id=disk_id,
8188                                    iv_name=disk.iv_name)
8189             snap_disks.append(new_dev)
8190
8191       finally:
8192         if self.op.shutdown and instance.admin_up:
8193           feedback_fn("Starting instance %s" % instance.name)
8194           result = self.rpc.call_instance_start(src_node, instance, None, None)
8195           msg = result.fail_msg
8196           if msg:
8197             _ShutdownInstanceDisks(self, instance)
8198             raise errors.OpExecError("Could not start instance: %s" % msg)
8199
8200       # TODO: check for size
8201
8202       cluster_name = self.cfg.GetClusterName()
8203       for idx, dev in enumerate(snap_disks):
8204         feedback_fn("Exporting snapshot %s from %s to %s" %
8205                     (idx, src_node, dst_node.name))
8206         if dev:
8207           # FIXME: pass debug from opcode to backend
8208           result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8209                                                  instance, cluster_name,
8210                                                  idx, 0)
8211           msg = result.fail_msg
8212           if msg:
8213             self.LogWarning("Could not export disk/%s from node %s to"
8214                             " node %s: %s", idx, src_node, dst_node.name, msg)
8215             dresults.append(False)
8216           else:
8217             dresults.append(True)
8218           msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8219           if msg:
8220             self.LogWarning("Could not remove snapshot for disk/%d from node"
8221                             " %s: %s", idx, src_node, msg)
8222         else:
8223           dresults.append(False)
8224
8225       feedback_fn("Finalizing export on %s" % dst_node.name)
8226       result = self.rpc.call_finalize_export(dst_node.name, instance,
8227                                              snap_disks)
8228       fin_resu = True
8229       msg = result.fail_msg
8230       if msg:
8231         self.LogWarning("Could not finalize export for instance %s"
8232                         " on node %s: %s", instance.name, dst_node.name, msg)
8233         fin_resu = False
8234
8235     finally:
8236       if activate_disks:
8237         feedback_fn("Deactivating disks for %s" % instance.name)
8238         _ShutdownInstanceDisks(self, instance)
8239
8240     nodelist = self.cfg.GetNodeList()
8241     nodelist.remove(dst_node.name)
8242
8243     # on one-node clusters nodelist will be empty after the removal
8244     # if we proceed the backup would be removed because OpQueryExports
8245     # substitutes an empty list with the full cluster node list.
8246     iname = instance.name
8247     if nodelist:
8248       feedback_fn("Removing old exports for instance %s" % iname)
8249       exportlist = self.rpc.call_export_list(nodelist)
8250       for node in exportlist:
8251         if exportlist[node].fail_msg:
8252           continue
8253         if iname in exportlist[node].payload:
8254           msg = self.rpc.call_export_remove(node, iname).fail_msg
8255           if msg:
8256             self.LogWarning("Could not remove older export for instance %s"
8257                             " on node %s: %s", iname, node, msg)
8258     return fin_resu, dresults
8259
8260
8261 class LURemoveExport(NoHooksLU):
8262   """Remove exports related to the named instance.
8263
8264   """
8265   _OP_REQP = ["instance_name"]
8266   REQ_BGL = False
8267
8268   def ExpandNames(self):
8269     self.needed_locks = {}
8270     # We need all nodes to be locked in order for RemoveExport to work, but we
8271     # don't need to lock the instance itself, as nothing will happen to it (and
8272     # we can remove exports also for a removed instance)
8273     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8274
8275   def CheckPrereq(self):
8276     """Check prerequisites.
8277     """
8278     pass
8279
8280   def Exec(self, feedback_fn):
8281     """Remove any export.
8282
8283     """
8284     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8285     # If the instance was not found we'll try with the name that was passed in.
8286     # This will only work if it was an FQDN, though.
8287     fqdn_warn = False
8288     if not instance_name:
8289       fqdn_warn = True
8290       instance_name = self.op.instance_name
8291
8292     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8293     exportlist = self.rpc.call_export_list(locked_nodes)
8294     found = False
8295     for node in exportlist:
8296       msg = exportlist[node].fail_msg
8297       if msg:
8298         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8299         continue
8300       if instance_name in exportlist[node].payload:
8301         found = True
8302         result = self.rpc.call_export_remove(node, instance_name)
8303         msg = result.fail_msg
8304         if msg:
8305           logging.error("Could not remove export for instance %s"
8306                         " on node %s: %s", instance_name, node, msg)
8307
8308     if fqdn_warn and not found:
8309       feedback_fn("Export not found. If trying to remove an export belonging"
8310                   " to a deleted instance please use its Fully Qualified"
8311                   " Domain Name.")
8312
8313
8314 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8315   """Generic tags LU.
8316
8317   This is an abstract class which is the parent of all the other tags LUs.
8318
8319   """
8320
8321   def ExpandNames(self):
8322     self.needed_locks = {}
8323     if self.op.kind == constants.TAG_NODE:
8324       name = self.cfg.ExpandNodeName(self.op.name)
8325       if name is None:
8326         raise errors.OpPrereqError("Invalid node name (%s)" %
8327                                    (self.op.name,), errors.ECODE_NOENT)
8328       self.op.name = name
8329       self.needed_locks[locking.LEVEL_NODE] = name
8330     elif self.op.kind == constants.TAG_INSTANCE:
8331       name = self.cfg.ExpandInstanceName(self.op.name)
8332       if name is None:
8333         raise errors.OpPrereqError("Invalid instance name (%s)" %
8334                                    (self.op.name,), errors.ECODE_NOENT)
8335       self.op.name = name
8336       self.needed_locks[locking.LEVEL_INSTANCE] = name
8337
8338   def CheckPrereq(self):
8339     """Check prerequisites.
8340
8341     """
8342     if self.op.kind == constants.TAG_CLUSTER:
8343       self.target = self.cfg.GetClusterInfo()
8344     elif self.op.kind == constants.TAG_NODE:
8345       self.target = self.cfg.GetNodeInfo(self.op.name)
8346     elif self.op.kind == constants.TAG_INSTANCE:
8347       self.target = self.cfg.GetInstanceInfo(self.op.name)
8348     else:
8349       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8350                                  str(self.op.kind), errors.ECODE_INVAL)
8351
8352
8353 class LUGetTags(TagsLU):
8354   """Returns the tags of a given object.
8355
8356   """
8357   _OP_REQP = ["kind", "name"]
8358   REQ_BGL = False
8359
8360   def Exec(self, feedback_fn):
8361     """Returns the tag list.
8362
8363     """
8364     return list(self.target.GetTags())
8365
8366
8367 class LUSearchTags(NoHooksLU):
8368   """Searches the tags for a given pattern.
8369
8370   """
8371   _OP_REQP = ["pattern"]
8372   REQ_BGL = False
8373
8374   def ExpandNames(self):
8375     self.needed_locks = {}
8376
8377   def CheckPrereq(self):
8378     """Check prerequisites.
8379
8380     This checks the pattern passed for validity by compiling it.
8381
8382     """
8383     try:
8384       self.re = re.compile(self.op.pattern)
8385     except re.error, err:
8386       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8387                                  (self.op.pattern, err), errors.ECODE_INVAL)
8388
8389   def Exec(self, feedback_fn):
8390     """Returns the tag list.
8391
8392     """
8393     cfg = self.cfg
8394     tgts = [("/cluster", cfg.GetClusterInfo())]
8395     ilist = cfg.GetAllInstancesInfo().values()
8396     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8397     nlist = cfg.GetAllNodesInfo().values()
8398     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8399     results = []
8400     for path, target in tgts:
8401       for tag in target.GetTags():
8402         if self.re.search(tag):
8403           results.append((path, tag))
8404     return results
8405
8406
8407 class LUAddTags(TagsLU):
8408   """Sets a tag on a given object.
8409
8410   """
8411   _OP_REQP = ["kind", "name", "tags"]
8412   REQ_BGL = False
8413
8414   def CheckPrereq(self):
8415     """Check prerequisites.
8416
8417     This checks the type and length of the tag name and value.
8418
8419     """
8420     TagsLU.CheckPrereq(self)
8421     for tag in self.op.tags:
8422       objects.TaggableObject.ValidateTag(tag)
8423
8424   def Exec(self, feedback_fn):
8425     """Sets the tag.
8426
8427     """
8428     try:
8429       for tag in self.op.tags:
8430         self.target.AddTag(tag)
8431     except errors.TagError, err:
8432       raise errors.OpExecError("Error while setting tag: %s" % str(err))
8433     self.cfg.Update(self.target, feedback_fn)
8434
8435
8436 class LUDelTags(TagsLU):
8437   """Delete a list of tags from a given object.
8438
8439   """
8440   _OP_REQP = ["kind", "name", "tags"]
8441   REQ_BGL = False
8442
8443   def CheckPrereq(self):
8444     """Check prerequisites.
8445
8446     This checks that we have the given tag.
8447
8448     """
8449     TagsLU.CheckPrereq(self)
8450     for tag in self.op.tags:
8451       objects.TaggableObject.ValidateTag(tag)
8452     del_tags = frozenset(self.op.tags)
8453     cur_tags = self.target.GetTags()
8454     if not del_tags <= cur_tags:
8455       diff_tags = del_tags - cur_tags
8456       diff_names = ["'%s'" % tag for tag in diff_tags]
8457       diff_names.sort()
8458       raise errors.OpPrereqError("Tag(s) %s not found" %
8459                                  (",".join(diff_names)), errors.ECODE_NOENT)
8460
8461   def Exec(self, feedback_fn):
8462     """Remove the tag from the object.
8463
8464     """
8465     for tag in self.op.tags:
8466       self.target.RemoveTag(tag)
8467     self.cfg.Update(self.target, feedback_fn)
8468
8469
8470 class LUTestDelay(NoHooksLU):
8471   """Sleep for a specified amount of time.
8472
8473   This LU sleeps on the master and/or nodes for a specified amount of
8474   time.
8475
8476   """
8477   _OP_REQP = ["duration", "on_master", "on_nodes"]
8478   REQ_BGL = False
8479
8480   def ExpandNames(self):
8481     """Expand names and set required locks.
8482
8483     This expands the node list, if any.
8484
8485     """
8486     self.needed_locks = {}
8487     if self.op.on_nodes:
8488       # _GetWantedNodes can be used here, but is not always appropriate to use
8489       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8490       # more information.
8491       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8492       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8493
8494   def CheckPrereq(self):
8495     """Check prerequisites.
8496
8497     """
8498
8499   def Exec(self, feedback_fn):
8500     """Do the actual sleep.
8501
8502     """
8503     if self.op.on_master:
8504       if not utils.TestDelay(self.op.duration):
8505         raise errors.OpExecError("Error during master delay test")
8506     if self.op.on_nodes:
8507       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8508       for node, node_result in result.items():
8509         node_result.Raise("Failure during rpc call to node %s" % node)
8510
8511
8512 class IAllocator(object):
8513   """IAllocator framework.
8514
8515   An IAllocator instance has three sets of attributes:
8516     - cfg that is needed to query the cluster
8517     - input data (all members of the _KEYS class attribute are required)
8518     - four buffer attributes (in|out_data|text), that represent the
8519       input (to the external script) in text and data structure format,
8520       and the output from it, again in two formats
8521     - the result variables from the script (success, info, nodes) for
8522       easy usage
8523
8524   """
8525   # pylint: disable-msg=R0902
8526   # lots of instance attributes
8527   _ALLO_KEYS = [
8528     "mem_size", "disks", "disk_template",
8529     "os", "tags", "nics", "vcpus", "hypervisor",
8530     ]
8531   _RELO_KEYS = [
8532     "relocate_from",
8533     ]
8534
8535   def __init__(self, cfg, rpc, mode, name, **kwargs):
8536     self.cfg = cfg
8537     self.rpc = rpc
8538     # init buffer variables
8539     self.in_text = self.out_text = self.in_data = self.out_data = None
8540     # init all input fields so that pylint is happy
8541     self.mode = mode
8542     self.name = name
8543     self.mem_size = self.disks = self.disk_template = None
8544     self.os = self.tags = self.nics = self.vcpus = None
8545     self.hypervisor = None
8546     self.relocate_from = None
8547     # computed fields
8548     self.required_nodes = None
8549     # init result fields
8550     self.success = self.info = self.nodes = None
8551     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8552       keyset = self._ALLO_KEYS
8553     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8554       keyset = self._RELO_KEYS
8555     else:
8556       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8557                                    " IAllocator" % self.mode)
8558     for key in kwargs:
8559       if key not in keyset:
8560         raise errors.ProgrammerError("Invalid input parameter '%s' to"
8561                                      " IAllocator" % key)
8562       setattr(self, key, kwargs[key])
8563     for key in keyset:
8564       if key not in kwargs:
8565         raise errors.ProgrammerError("Missing input parameter '%s' to"
8566                                      " IAllocator" % key)
8567     self._BuildInputData()
8568
8569   def _ComputeClusterData(self):
8570     """Compute the generic allocator input data.
8571
8572     This is the data that is independent of the actual operation.
8573
8574     """
8575     cfg = self.cfg
8576     cluster_info = cfg.GetClusterInfo()
8577     # cluster data
8578     data = {
8579       "version": constants.IALLOCATOR_VERSION,
8580       "cluster_name": cfg.GetClusterName(),
8581       "cluster_tags": list(cluster_info.GetTags()),
8582       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8583       # we don't have job IDs
8584       }
8585     iinfo = cfg.GetAllInstancesInfo().values()
8586     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8587
8588     # node data
8589     node_results = {}
8590     node_list = cfg.GetNodeList()
8591
8592     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8593       hypervisor_name = self.hypervisor
8594     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8595       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8596
8597     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8598                                         hypervisor_name)
8599     node_iinfo = \
8600       self.rpc.call_all_instances_info(node_list,
8601                                        cluster_info.enabled_hypervisors)
8602     for nname, nresult in node_data.items():
8603       # first fill in static (config-based) values
8604       ninfo = cfg.GetNodeInfo(nname)
8605       pnr = {
8606         "tags": list(ninfo.GetTags()),
8607         "primary_ip": ninfo.primary_ip,
8608         "secondary_ip": ninfo.secondary_ip,
8609         "offline": ninfo.offline,
8610         "drained": ninfo.drained,
8611         "master_candidate": ninfo.master_candidate,
8612         }
8613
8614       if not (ninfo.offline or ninfo.drained):
8615         nresult.Raise("Can't get data for node %s" % nname)
8616         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8617                                 nname)
8618         remote_info = nresult.payload
8619
8620         for attr in ['memory_total', 'memory_free', 'memory_dom0',
8621                      'vg_size', 'vg_free', 'cpu_total']:
8622           if attr not in remote_info:
8623             raise errors.OpExecError("Node '%s' didn't return attribute"
8624                                      " '%s'" % (nname, attr))
8625           if not isinstance(remote_info[attr], int):
8626             raise errors.OpExecError("Node '%s' returned invalid value"
8627                                      " for '%s': %s" %
8628                                      (nname, attr, remote_info[attr]))
8629         # compute memory used by primary instances
8630         i_p_mem = i_p_up_mem = 0
8631         for iinfo, beinfo in i_list:
8632           if iinfo.primary_node == nname:
8633             i_p_mem += beinfo[constants.BE_MEMORY]
8634             if iinfo.name not in node_iinfo[nname].payload:
8635               i_used_mem = 0
8636             else:
8637               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8638             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8639             remote_info['memory_free'] -= max(0, i_mem_diff)
8640
8641             if iinfo.admin_up:
8642               i_p_up_mem += beinfo[constants.BE_MEMORY]
8643
8644         # compute memory used by instances
8645         pnr_dyn = {
8646           "total_memory": remote_info['memory_total'],
8647           "reserved_memory": remote_info['memory_dom0'],
8648           "free_memory": remote_info['memory_free'],
8649           "total_disk": remote_info['vg_size'],
8650           "free_disk": remote_info['vg_free'],
8651           "total_cpus": remote_info['cpu_total'],
8652           "i_pri_memory": i_p_mem,
8653           "i_pri_up_memory": i_p_up_mem,
8654           }
8655         pnr.update(pnr_dyn)
8656
8657       node_results[nname] = pnr
8658     data["nodes"] = node_results
8659
8660     # instance data
8661     instance_data = {}
8662     for iinfo, beinfo in i_list:
8663       nic_data = []
8664       for nic in iinfo.nics:
8665         filled_params = objects.FillDict(
8666             cluster_info.nicparams[constants.PP_DEFAULT],
8667             nic.nicparams)
8668         nic_dict = {"mac": nic.mac,
8669                     "ip": nic.ip,
8670                     "mode": filled_params[constants.NIC_MODE],
8671                     "link": filled_params[constants.NIC_LINK],
8672                    }
8673         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8674           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8675         nic_data.append(nic_dict)
8676       pir = {
8677         "tags": list(iinfo.GetTags()),
8678         "admin_up": iinfo.admin_up,
8679         "vcpus": beinfo[constants.BE_VCPUS],
8680         "memory": beinfo[constants.BE_MEMORY],
8681         "os": iinfo.os,
8682         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8683         "nics": nic_data,
8684         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8685         "disk_template": iinfo.disk_template,
8686         "hypervisor": iinfo.hypervisor,
8687         }
8688       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8689                                                  pir["disks"])
8690       instance_data[iinfo.name] = pir
8691
8692     data["instances"] = instance_data
8693
8694     self.in_data = data
8695
8696   def _AddNewInstance(self):
8697     """Add new instance data to allocator structure.
8698
8699     This in combination with _AllocatorGetClusterData will create the
8700     correct structure needed as input for the allocator.
8701
8702     The checks for the completeness of the opcode must have already been
8703     done.
8704
8705     """
8706     data = self.in_data
8707
8708     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8709
8710     if self.disk_template in constants.DTS_NET_MIRROR:
8711       self.required_nodes = 2
8712     else:
8713       self.required_nodes = 1
8714     request = {
8715       "type": "allocate",
8716       "name": self.name,
8717       "disk_template": self.disk_template,
8718       "tags": self.tags,
8719       "os": self.os,
8720       "vcpus": self.vcpus,
8721       "memory": self.mem_size,
8722       "disks": self.disks,
8723       "disk_space_total": disk_space,
8724       "nics": self.nics,
8725       "required_nodes": self.required_nodes,
8726       }
8727     data["request"] = request
8728
8729   def _AddRelocateInstance(self):
8730     """Add relocate instance data to allocator structure.
8731
8732     This in combination with _IAllocatorGetClusterData will create the
8733     correct structure needed as input for the allocator.
8734
8735     The checks for the completeness of the opcode must have already been
8736     done.
8737
8738     """
8739     instance = self.cfg.GetInstanceInfo(self.name)
8740     if instance is None:
8741       raise errors.ProgrammerError("Unknown instance '%s' passed to"
8742                                    " IAllocator" % self.name)
8743
8744     if instance.disk_template not in constants.DTS_NET_MIRROR:
8745       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
8746                                  errors.ECODE_INVAL)
8747
8748     if len(instance.secondary_nodes) != 1:
8749       raise errors.OpPrereqError("Instance has not exactly one secondary node",
8750                                  errors.ECODE_STATE)
8751
8752     self.required_nodes = 1
8753     disk_sizes = [{'size': disk.size} for disk in instance.disks]
8754     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8755
8756     request = {
8757       "type": "relocate",
8758       "name": self.name,
8759       "disk_space_total": disk_space,
8760       "required_nodes": self.required_nodes,
8761       "relocate_from": self.relocate_from,
8762       }
8763     self.in_data["request"] = request
8764
8765   def _BuildInputData(self):
8766     """Build input data structures.
8767
8768     """
8769     self._ComputeClusterData()
8770
8771     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8772       self._AddNewInstance()
8773     else:
8774       self._AddRelocateInstance()
8775
8776     self.in_text = serializer.Dump(self.in_data)
8777
8778   def Run(self, name, validate=True, call_fn=None):
8779     """Run an instance allocator and return the results.
8780
8781     """
8782     if call_fn is None:
8783       call_fn = self.rpc.call_iallocator_runner
8784
8785     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8786     result.Raise("Failure while running the iallocator script")
8787
8788     self.out_text = result.payload
8789     if validate:
8790       self._ValidateResult()
8791
8792   def _ValidateResult(self):
8793     """Process the allocator results.
8794
8795     This will process and if successful save the result in
8796     self.out_data and the other parameters.
8797
8798     """
8799     try:
8800       rdict = serializer.Load(self.out_text)
8801     except Exception, err:
8802       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8803
8804     if not isinstance(rdict, dict):
8805       raise errors.OpExecError("Can't parse iallocator results: not a dict")
8806
8807     for key in "success", "info", "nodes":
8808       if key not in rdict:
8809         raise errors.OpExecError("Can't parse iallocator results:"
8810                                  " missing key '%s'" % key)
8811       setattr(self, key, rdict[key])
8812
8813     if not isinstance(rdict["nodes"], list):
8814       raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8815                                " is not a list")
8816     self.out_data = rdict
8817
8818
8819 class LUTestAllocator(NoHooksLU):
8820   """Run allocator tests.
8821
8822   This LU runs the allocator tests
8823
8824   """
8825   _OP_REQP = ["direction", "mode", "name"]
8826
8827   def CheckPrereq(self):
8828     """Check prerequisites.
8829
8830     This checks the opcode parameters depending on the director and mode test.
8831
8832     """
8833     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8834       for attr in ["name", "mem_size", "disks", "disk_template",
8835                    "os", "tags", "nics", "vcpus"]:
8836         if not hasattr(self.op, attr):
8837           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8838                                      attr, errors.ECODE_INVAL)
8839       iname = self.cfg.ExpandInstanceName(self.op.name)
8840       if iname is not None:
8841         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8842                                    iname, errors.ECODE_EXISTS)
8843       if not isinstance(self.op.nics, list):
8844         raise errors.OpPrereqError("Invalid parameter 'nics'",
8845                                    errors.ECODE_INVAL)
8846       for row in self.op.nics:
8847         if (not isinstance(row, dict) or
8848             "mac" not in row or
8849             "ip" not in row or
8850             "bridge" not in row):
8851           raise errors.OpPrereqError("Invalid contents of the 'nics'"
8852                                      " parameter", errors.ECODE_INVAL)
8853       if not isinstance(self.op.disks, list):
8854         raise errors.OpPrereqError("Invalid parameter 'disks'",
8855                                    errors.ECODE_INVAL)
8856       for row in self.op.disks:
8857         if (not isinstance(row, dict) or
8858             "size" not in row or
8859             not isinstance(row["size"], int) or
8860             "mode" not in row or
8861             row["mode"] not in ['r', 'w']):
8862           raise errors.OpPrereqError("Invalid contents of the 'disks'"
8863                                      " parameter", errors.ECODE_INVAL)
8864       if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8865         self.op.hypervisor = self.cfg.GetHypervisorType()
8866     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8867       if not hasattr(self.op, "name"):
8868         raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
8869                                    errors.ECODE_INVAL)
8870       fname = self.cfg.ExpandInstanceName(self.op.name)
8871       if fname is None:
8872         raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8873                                    self.op.name, errors.ECODE_NOENT)
8874       self.op.name = fname
8875       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8876     else:
8877       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8878                                  self.op.mode, errors.ECODE_INVAL)
8879
8880     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8881       if not hasattr(self.op, "allocator") or self.op.allocator is None:
8882         raise errors.OpPrereqError("Missing allocator name",
8883                                    errors.ECODE_INVAL)
8884     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8885       raise errors.OpPrereqError("Wrong allocator test '%s'" %
8886                                  self.op.direction, errors.ECODE_INVAL)
8887
8888   def Exec(self, feedback_fn):
8889     """Run the allocator test.
8890
8891     """
8892     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8893       ial = IAllocator(self.cfg, self.rpc,
8894                        mode=self.op.mode,
8895                        name=self.op.name,
8896                        mem_size=self.op.mem_size,
8897                        disks=self.op.disks,
8898                        disk_template=self.op.disk_template,
8899                        os=self.op.os,
8900                        tags=self.op.tags,
8901                        nics=self.op.nics,
8902                        vcpus=self.op.vcpus,
8903                        hypervisor=self.op.hypervisor,
8904                        )
8905     else:
8906       ial = IAllocator(self.cfg, self.rpc,
8907                        mode=self.op.mode,
8908                        name=self.op.name,
8909                        relocate_from=list(self.relocate_from),
8910                        )
8911
8912     if self.op.direction == constants.IALLOCATOR_DIR_IN:
8913       result = ial.in_text
8914     else:
8915       ial.Run(self.op.allocator, validate=False)
8916       result = ial.out_text
8917     return result