code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 import os
  30 import os.path
  31 import time
  32 import re
  33 import platform
  34 import logging
  35 import copy
  36 import OpenSSL
  37
  38 from ganeti import ssh
  39 from ganeti import utils
  40 from ganeti import errors
  41 from ganeti import hypervisor
  42 from ganeti import locking
  43 from ganeti import constants
  44 from ganeti import objects
  45 from ganeti import serializer
  46 from ganeti import ssconf
  47
  48
  49 class LogicalUnit(object):
  50   """Logical Unit base class.
  51
  52   Subclasses must follow these rules:
  53     - implement ExpandNames
  54     - implement CheckPrereq (except when tasklets are used)
  55     - implement Exec (except when tasklets are used)
  56     - implement BuildHooksEnv
  57     - redefine HPATH and HTYPE
  58     - optionally redefine their run requirements:
  59         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  60
  61   Note that all commands require root permissions.
  62
  63   @ivar dry_run_result: the value (if any) that will be returned to the caller
  64       in dry-run mode (signalled by opcode dry_run parameter)
  65
  66   """
  67   HPATH = None
  68   HTYPE = None
  69   _OP_REQP = []
  70   REQ_BGL = True
  71
  72   def __init__(self, processor, op, context, rpc):
  73     """Constructor for LogicalUnit.
  74
  75     This needs to be overridden in derived classes in order to check op
  76     validity.
  77
  78     """
  79     self.proc = processor
  80     self.op = op
  81     self.cfg = context.cfg
  82     self.context = context
  83     self.rpc = rpc
  84     # Dicts used to declare locking needs to mcpu
  85     self.needed_locks = None
  86     self.acquired_locks = {}
  87     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
  88     self.add_locks = {}
  89     self.remove_locks = {}
  90     # Used to force good behavior when calling helper functions
  91     self.recalculate_locks = {}
  92     self.__ssh = None
  93     # logging
  94     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
  95     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
  96     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
  97     # support for dry-run
  98     self.dry_run_result = None
  99     # support for generic debug attribute
 100     if (not hasattr(self.op, "debug_level") or
 101         not isinstance(self.op.debug_level, int)):
 102       self.op.debug_level = 0
 103
 104     # Tasklets
 105     self.tasklets = None
 106
 107     for attr_name in self._OP_REQP:
 108       attr_val = getattr(op, attr_name, None)
 109       if attr_val is None:
 110         raise errors.OpPrereqError("Required parameter '%s' missing" %
 111                                    attr_name, errors.ECODE_INVAL)
 112
 113     self.CheckArguments()
 114
 115   def __GetSSH(self):
 116     """Returns the SshRunner object
 117
 118     """
 119     if not self.__ssh:
 120       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 121     return self.__ssh
 122
 123   ssh = property(fget=__GetSSH)
 124
 125   def CheckArguments(self):
 126     """Check syntactic validity for the opcode arguments.
 127
 128     This method is for doing a simple syntactic check and ensure
 129     validity of opcode parameters, without any cluster-related
 130     checks. While the same can be accomplished in ExpandNames and/or
 131     CheckPrereq, doing these separate is better because:
 132
 133       - ExpandNames is left as as purely a lock-related function
 134       - CheckPrereq is run after we have acquired locks (and possible
 135         waited for them)
 136
 137     The function is allowed to change the self.op attribute so that
 138     later methods can no longer worry about missing parameters.
 139
 140     """
 141     pass
 142
 143   def ExpandNames(self):
 144     """Expand names for this LU.
 145
 146     This method is called before starting to execute the opcode, and it should
 147     update all the parameters of the opcode to their canonical form (e.g. a
 148     short node name must be fully expanded after this method has successfully
 149     completed). This way locking, hooks, logging, ecc. can work correctly.
 150
 151     LUs which implement this method must also populate the self.needed_locks
 152     member, as a dict with lock levels as keys, and a list of needed lock names
 153     as values. Rules:
 154
 155       - use an empty dict if you don't need any lock
 156       - if you don't need any lock at a particular level omit that level
 157       - don't put anything for the BGL level
 158       - if you want all locks at a level use locking.ALL_SET as a value
 159
 160     If you need to share locks (rather than acquire them exclusively) at one
 161     level you can modify self.share_locks, setting a true value (usually 1) for
 162     that level. By default locks are not shared.
 163
 164     This function can also define a list of tasklets, which then will be
 165     executed in order instead of the usual LU-level CheckPrereq and Exec
 166     functions, if those are not defined by the LU.
 167
 168     Examples::
 169
 170       # Acquire all nodes and one instance
 171       self.needed_locks = {
 172         locking.LEVEL_NODE: locking.ALL_SET,
 173         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 174       }
 175       # Acquire just two nodes
 176       self.needed_locks = {
 177         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 178       }
 179       # Acquire no locks
 180       self.needed_locks = {} # No, you can't leave it to the default value None
 181
 182     """
 183     # The implementation of this method is mandatory only if the new LU is
 184     # concurrent, so that old LUs don't need to be changed all at the same
 185     # time.
 186     if self.REQ_BGL:
 187       self.needed_locks = {} # Exclusive LUs don't need locks.
 188     else:
 189       raise NotImplementedError
 190
 191   def DeclareLocks(self, level):
 192     """Declare LU locking needs for a level
 193
 194     While most LUs can just declare their locking needs at ExpandNames time,
 195     sometimes there's the need to calculate some locks after having acquired
 196     the ones before. This function is called just before acquiring locks at a
 197     particular level, but after acquiring the ones at lower levels, and permits
 198     such calculations. It can be used to modify self.needed_locks, and by
 199     default it does nothing.
 200
 201     This function is only called if you have something already set in
 202     self.needed_locks for the level.
 203
 204     @param level: Locking level which is going to be locked
 205     @type level: member of ganeti.locking.LEVELS
 206
 207     """
 208
 209   def CheckPrereq(self):
 210     """Check prerequisites for this LU.
 211
 212     This method should check that the prerequisites for the execution
 213     of this LU are fulfilled. It can do internode communication, but
 214     it should be idempotent - no cluster or system changes are
 215     allowed.
 216
 217     The method should raise errors.OpPrereqError in case something is
 218     not fulfilled. Its return value is ignored.
 219
 220     This method should also update all the parameters of the opcode to
 221     their canonical form if it hasn't been done by ExpandNames before.
 222
 223     """
 224     if self.tasklets is not None:
 225       for (idx, tl) in enumerate(self.tasklets):
 226         logging.debug("Checking prerequisites for tasklet %s/%s",
 227                       idx + 1, len(self.tasklets))
 228         tl.CheckPrereq()
 229     else:
 230       raise NotImplementedError
 231
 232   def Exec(self, feedback_fn):
 233     """Execute the LU.
 234
 235     This method should implement the actual work. It should raise
 236     errors.OpExecError for failures that are somewhat dealt with in
 237     code, or expected.
 238
 239     """
 240     if self.tasklets is not None:
 241       for (idx, tl) in enumerate(self.tasklets):
 242         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 243         tl.Exec(feedback_fn)
 244     else:
 245       raise NotImplementedError
 246
 247   def BuildHooksEnv(self):
 248     """Build hooks environment for this LU.
 249
 250     This method should return a three-node tuple consisting of: a dict
 251     containing the environment that will be used for running the
 252     specific hook for this LU, a list of node names on which the hook
 253     should run before the execution, and a list of node names on which
 254     the hook should run after the execution.
 255
 256     The keys of the dict must not have 'GANETI_' prefixed as this will
 257     be handled in the hooks runner. Also note additional keys will be
 258     added by the hooks runner. If the LU doesn't define any
 259     environment, an empty dict (and not None) should be returned.
 260
 261     No nodes should be returned as an empty list (and not None).
 262
 263     Note that if the HPATH for a LU class is None, this function will
 264     not be called.
 265
 266     """
 267     raise NotImplementedError
 268
 269   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 270     """Notify the LU about the results of its hooks.
 271
 272     This method is called every time a hooks phase is executed, and notifies
 273     the Logical Unit about the hooks' result. The LU can then use it to alter
 274     its result based on the hooks.  By default the method does nothing and the
 275     previous result is passed back unchanged but any LU can define it if it
 276     wants to use the local cluster hook-scripts somehow.
 277
 278     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 279         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 280     @param hook_results: the results of the multi-node hooks rpc call
 281     @param feedback_fn: function used send feedback back to the caller
 282     @param lu_result: the previous Exec result this LU had, or None
 283         in the PRE phase
 284     @return: the new Exec result, based on the previous result
 285         and hook results
 286
 287     """
 288     # API must be kept, thus we ignore the unused argument and could
 289     # be a function warnings
 290     # pylint: disable-msg=W0613,R0201
 291     return lu_result
 292
 293   def _ExpandAndLockInstance(self):
 294     """Helper function to expand and lock an instance.
 295
 296     Many LUs that work on an instance take its name in self.op.instance_name
 297     and need to expand it and then declare the expanded name for locking. This
 298     function does it, and then updates self.op.instance_name to the expanded
 299     name. It also initializes needed_locks as a dict, if this hasn't been done
 300     before.
 301
 302     """
 303     if self.needed_locks is None:
 304       self.needed_locks = {}
 305     else:
 306       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 307         "_ExpandAndLockInstance called with instance-level locks set"
 308     self.op.instance_name = _ExpandInstanceName(self.cfg,
 309                                                 self.op.instance_name)
 310     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 311
 312   def _LockInstancesNodes(self, primary_only=False):
 313     """Helper function to declare instances' nodes for locking.
 314
 315     This function should be called after locking one or more instances to lock
 316     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 317     with all primary or secondary nodes for instances already locked and
 318     present in self.needed_locks[locking.LEVEL_INSTANCE].
 319
 320     It should be called from DeclareLocks, and for safety only works if
 321     self.recalculate_locks[locking.LEVEL_NODE] is set.
 322
 323     In the future it may grow parameters to just lock some instance's nodes, or
 324     to just lock primaries or secondary nodes, if needed.
 325
 326     If should be called in DeclareLocks in a way similar to::
 327
 328       if level == locking.LEVEL_NODE:
 329         self._LockInstancesNodes()
 330
 331     @type primary_only: boolean
 332     @param primary_only: only lock primary nodes of locked instances
 333
 334     """
 335     assert locking.LEVEL_NODE in self.recalculate_locks, \
 336       "_LockInstancesNodes helper function called with no nodes to recalculate"
 337
 338     # TODO: check if we're really been called with the instance locks held
 339
 340     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 341     # future we might want to have different behaviors depending on the value
 342     # of self.recalculate_locks[locking.LEVEL_NODE]
 343     wanted_nodes = []
 344     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 345       instance = self.context.cfg.GetInstanceInfo(instance_name)
 346       wanted_nodes.append(instance.primary_node)
 347       if not primary_only:
 348         wanted_nodes.extend(instance.secondary_nodes)
 349
 350     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 351       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 352     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 353       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 354
 355     del self.recalculate_locks[locking.LEVEL_NODE]
 356
 357
 358 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 359   """Simple LU which runs no hooks.
 360
 361   This LU is intended as a parent for other LogicalUnits which will
 362   run no hooks, in order to reduce duplicate code.
 363
 364   """
 365   HPATH = None
 366   HTYPE = None
 367
 368   def BuildHooksEnv(self):
 369     """Empty BuildHooksEnv for NoHooksLu.
 370
 371     This just raises an error.
 372
 373     """
 374     assert False, "BuildHooksEnv called for NoHooksLUs"
 375
 376
 377 class Tasklet:
 378   """Tasklet base class.
 379
 380   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 381   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 382   tasklets know nothing about locks.
 383
 384   Subclasses must follow these rules:
 385     - Implement CheckPrereq
 386     - Implement Exec
 387
 388   """
 389   def __init__(self, lu):
 390     self.lu = lu
 391
 392     # Shortcuts
 393     self.cfg = lu.cfg
 394     self.rpc = lu.rpc
 395
 396   def CheckPrereq(self):
 397     """Check prerequisites for this tasklets.
 398
 399     This method should check whether the prerequisites for the execution of
 400     this tasklet are fulfilled. It can do internode communication, but it
 401     should be idempotent - no cluster or system changes are allowed.
 402
 403     The method should raise errors.OpPrereqError in case something is not
 404     fulfilled. Its return value is ignored.
 405
 406     This method should also update all parameters to their canonical form if it
 407     hasn't been done before.
 408
 409     """
 410     raise NotImplementedError
 411
 412   def Exec(self, feedback_fn):
 413     """Execute the tasklet.
 414
 415     This method should implement the actual work. It should raise
 416     errors.OpExecError for failures that are somewhat dealt with in code, or
 417     expected.
 418
 419     """
 420     raise NotImplementedError
 421
 422
 423 def _GetWantedNodes(lu, nodes):
 424   """Returns list of checked and expanded node names.
 425
 426   @type lu: L{LogicalUnit}
 427   @param lu: the logical unit on whose behalf we execute
 428   @type nodes: list
 429   @param nodes: list of node names or None for all nodes
 430   @rtype: list
 431   @return: the list of nodes, sorted
 432   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 433
 434   """
 435   if not isinstance(nodes, list):
 436     raise errors.OpPrereqError("Invalid argument type 'nodes'",
 437                                errors.ECODE_INVAL)
 438
 439   if not nodes:
 440     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 441       " non-empty list of nodes whose name is to be expanded.")
 442
 443   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 444   return utils.NiceSort(wanted)
 445
 446
 447 def _GetWantedInstances(lu, instances):
 448   """Returns list of checked and expanded instance names.
 449
 450   @type lu: L{LogicalUnit}
 451   @param lu: the logical unit on whose behalf we execute
 452   @type instances: list
 453   @param instances: list of instance names or None for all instances
 454   @rtype: list
 455   @return: the list of instances, sorted
 456   @raise errors.OpPrereqError: if the instances parameter is wrong type
 457   @raise errors.OpPrereqError: if any of the passed instances is not found
 458
 459   """
 460   if not isinstance(instances, list):
 461     raise errors.OpPrereqError("Invalid argument type 'instances'",
 462                                errors.ECODE_INVAL)
 463
 464   if instances:
 465     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 466   else:
 467     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 468   return wanted
 469
 470
 471 def _CheckOutputFields(static, dynamic, selected):
 472   """Checks whether all selected fields are valid.
 473
 474   @type static: L{utils.FieldSet}
 475   @param static: static fields set
 476   @type dynamic: L{utils.FieldSet}
 477   @param dynamic: dynamic fields set
 478
 479   """
 480   f = utils.FieldSet()
 481   f.Extend(static)
 482   f.Extend(dynamic)
 483
 484   delta = f.NonMatching(selected)
 485   if delta:
 486     raise errors.OpPrereqError("Unknown output fields selected: %s"
 487                                % ",".join(delta), errors.ECODE_INVAL)
 488
 489
 490 def _CheckBooleanOpField(op, name):
 491   """Validates boolean opcode parameters.
 492
 493   This will ensure that an opcode parameter is either a boolean value,
 494   or None (but that it always exists).
 495
 496   """
 497   val = getattr(op, name, None)
 498   if not (val is None or isinstance(val, bool)):
 499     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 500                                (name, str(val)), errors.ECODE_INVAL)
 501   setattr(op, name, val)
 502
 503
 504 def _CheckGlobalHvParams(params):
 505   """Validates that given hypervisor params are not global ones.
 506
 507   This will ensure that instances don't get customised versions of
 508   global params.
 509
 510   """
 511   used_globals = constants.HVC_GLOBALS.intersection(params)
 512   if used_globals:
 513     msg = ("The following hypervisor parameters are global and cannot"
 514            " be customized at instance level, please modify them at"
 515            " cluster level: %s" % utils.CommaJoin(used_globals))
 516     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 517
 518
 519 def _CheckNodeOnline(lu, node):
 520   """Ensure that a given node is online.
 521
 522   @param lu: the LU on behalf of which we make the check
 523   @param node: the node to check
 524   @raise errors.OpPrereqError: if the node is offline
 525
 526   """
 527   if lu.cfg.GetNodeInfo(node).offline:
 528     raise errors.OpPrereqError("Can't use offline node %s" % node,
 529                                errors.ECODE_INVAL)
 530
 531
 532 def _CheckNodeNotDrained(lu, node):
 533   """Ensure that a given node is not drained.
 534
 535   @param lu: the LU on behalf of which we make the check
 536   @param node: the node to check
 537   @raise errors.OpPrereqError: if the node is drained
 538
 539   """
 540   if lu.cfg.GetNodeInfo(node).drained:
 541     raise errors.OpPrereqError("Can't use drained node %s" % node,
 542                                errors.ECODE_INVAL)
 543
 544
 545 def _CheckDiskTemplate(template):
 546   """Ensure a given disk template is valid.
 547
 548   """
 549   if template not in constants.DISK_TEMPLATES:
 550     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 551            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 552     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 553
 554
 555 def _ExpandItemName(fn, name, kind):
 556   """Expand an item name.
 557
 558   @param fn: the function to use for expansion
 559   @param name: requested item name
 560   @param kind: text description ('Node' or 'Instance')
 561   @return: the resolved (full) name
 562   @raise errors.OpPrereqError: if the item is not found
 563
 564   """
 565   full_name = fn(name)
 566   if full_name is None:
 567     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 568                                errors.ECODE_NOENT)
 569   return full_name
 570
 571
 572 def _ExpandNodeName(cfg, name):
 573   """Wrapper over L{_ExpandItemName} for nodes."""
 574   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 575
 576
 577 def _ExpandInstanceName(cfg, name):
 578   """Wrapper over L{_ExpandItemName} for instance."""
 579   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 580
 581
 582 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 583                           memory, vcpus, nics, disk_template, disks,
 584                           bep, hvp, hypervisor_name):
 585   """Builds instance related env variables for hooks
 586
 587   This builds the hook environment from individual variables.
 588
 589   @type name: string
 590   @param name: the name of the instance
 591   @type primary_node: string
 592   @param primary_node: the name of the instance's primary node
 593   @type secondary_nodes: list
 594   @param secondary_nodes: list of secondary nodes as strings
 595   @type os_type: string
 596   @param os_type: the name of the instance's OS
 597   @type status: boolean
 598   @param status: the should_run status of the instance
 599   @type memory: string
 600   @param memory: the memory size of the instance
 601   @type vcpus: string
 602   @param vcpus: the count of VCPUs the instance has
 603   @type nics: list
 604   @param nics: list of tuples (ip, mac, mode, link) representing
 605       the NICs the instance has
 606   @type disk_template: string
 607   @param disk_template: the disk template of the instance
 608   @type disks: list
 609   @param disks: the list of (size, mode) pairs
 610   @type bep: dict
 611   @param bep: the backend parameters for the instance
 612   @type hvp: dict
 613   @param hvp: the hypervisor parameters for the instance
 614   @type hypervisor_name: string
 615   @param hypervisor_name: the hypervisor for the instance
 616   @rtype: dict
 617   @return: the hook environment for this instance
 618
 619   """
 620   if status:
 621     str_status = "up"
 622   else:
 623     str_status = "down"
 624   env = {
 625     "OP_TARGET": name,
 626     "INSTANCE_NAME": name,
 627     "INSTANCE_PRIMARY": primary_node,
 628     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 629     "INSTANCE_OS_TYPE": os_type,
 630     "INSTANCE_STATUS": str_status,
 631     "INSTANCE_MEMORY": memory,
 632     "INSTANCE_VCPUS": vcpus,
 633     "INSTANCE_DISK_TEMPLATE": disk_template,
 634     "INSTANCE_HYPERVISOR": hypervisor_name,
 635   }
 636
 637   if nics:
 638     nic_count = len(nics)
 639     for idx, (ip, mac, mode, link) in enumerate(nics):
 640       if ip is None:
 641         ip = ""
 642       env["INSTANCE_NIC%d_IP" % idx] = ip
 643       env["INSTANCE_NIC%d_MAC" % idx] = mac
 644       env["INSTANCE_NIC%d_MODE" % idx] = mode
 645       env["INSTANCE_NIC%d_LINK" % idx] = link
 646       if mode == constants.NIC_MODE_BRIDGED:
 647         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 648   else:
 649     nic_count = 0
 650
 651   env["INSTANCE_NIC_COUNT"] = nic_count
 652
 653   if disks:
 654     disk_count = len(disks)
 655     for idx, (size, mode) in enumerate(disks):
 656       env["INSTANCE_DISK%d_SIZE" % idx] = size
 657       env["INSTANCE_DISK%d_MODE" % idx] = mode
 658   else:
 659     disk_count = 0
 660
 661   env["INSTANCE_DISK_COUNT"] = disk_count
 662
 663   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 664     for key, value in source.items():
 665       env["INSTANCE_%s_%s" % (kind, key)] = value
 666
 667   return env
 668
 669
 670 def _NICListToTuple(lu, nics):
 671   """Build a list of nic information tuples.
 672
 673   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 674   value in LUQueryInstanceData.
 675
 676   @type lu:  L{LogicalUnit}
 677   @param lu: the logical unit on whose behalf we execute
 678   @type nics: list of L{objects.NIC}
 679   @param nics: list of nics to convert to hooks tuples
 680
 681   """
 682   hooks_nics = []
 683   c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
 684   for nic in nics:
 685     ip = nic.ip
 686     mac = nic.mac
 687     filled_params = objects.FillDict(c_nicparams, nic.nicparams)
 688     mode = filled_params[constants.NIC_MODE]
 689     link = filled_params[constants.NIC_LINK]
 690     hooks_nics.append((ip, mac, mode, link))
 691   return hooks_nics
 692
 693
 694 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 695   """Builds instance related env variables for hooks from an object.
 696
 697   @type lu: L{LogicalUnit}
 698   @param lu: the logical unit on whose behalf we execute
 699   @type instance: L{objects.Instance}
 700   @param instance: the instance for which we should build the
 701       environment
 702   @type override: dict
 703   @param override: dictionary with key/values that will override
 704       our values
 705   @rtype: dict
 706   @return: the hook environment dictionary
 707
 708   """
 709   cluster = lu.cfg.GetClusterInfo()
 710   bep = cluster.FillBE(instance)
 711   hvp = cluster.FillHV(instance)
 712   args = {
 713     'name': instance.name,
 714     'primary_node': instance.primary_node,
 715     'secondary_nodes': instance.secondary_nodes,
 716     'os_type': instance.os,
 717     'status': instance.admin_up,
 718     'memory': bep[constants.BE_MEMORY],
 719     'vcpus': bep[constants.BE_VCPUS],
 720     'nics': _NICListToTuple(lu, instance.nics),
 721     'disk_template': instance.disk_template,
 722     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 723     'bep': bep,
 724     'hvp': hvp,
 725     'hypervisor_name': instance.hypervisor,
 726   }
 727   if override:
 728     args.update(override)
 729   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 730
 731
 732 def _AdjustCandidatePool(lu, exceptions):
 733   """Adjust the candidate pool after node operations.
 734
 735   """
 736   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 737   if mod_list:
 738     lu.LogInfo("Promoted nodes to master candidate role: %s",
 739                utils.CommaJoin(node.name for node in mod_list))
 740     for name in mod_list:
 741       lu.context.ReaddNode(name)
 742   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 743   if mc_now > mc_max:
 744     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 745                (mc_now, mc_max))
 746
 747
 748 def _DecideSelfPromotion(lu, exceptions=None):
 749   """Decide whether I should promote myself as a master candidate.
 750
 751   """
 752   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 753   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 754   # the new node will increase mc_max with one, so:
 755   mc_should = min(mc_should + 1, cp_size)
 756   return mc_now < mc_should
 757
 758
 759 def _CheckNicsBridgesExist(lu, target_nics, target_node,
 760                                profile=constants.PP_DEFAULT):
 761   """Check that the brigdes needed by a list of nics exist.
 762
 763   """
 764   c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
 765   paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
 766                 for nic in target_nics]
 767   brlist = [params[constants.NIC_LINK] for params in paramslist
 768             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 769   if brlist:
 770     result = lu.rpc.call_bridges_exist(target_node, brlist)
 771     result.Raise("Error checking bridges on destination node '%s'" %
 772                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 773
 774
 775 def _CheckInstanceBridgesExist(lu, instance, node=None):
 776   """Check that the brigdes needed by an instance exist.
 777
 778   """
 779   if node is None:
 780     node = instance.primary_node
 781   _CheckNicsBridgesExist(lu, instance.nics, node)
 782
 783
 784 def _CheckOSVariant(os_obj, name):
 785   """Check whether an OS name conforms to the os variants specification.
 786
 787   @type os_obj: L{objects.OS}
 788   @param os_obj: OS object to check
 789   @type name: string
 790   @param name: OS name passed by the user, to check for validity
 791
 792   """
 793   if not os_obj.supported_variants:
 794     return
 795   try:
 796     variant = name.split("+", 1)[1]
 797   except IndexError:
 798     raise errors.OpPrereqError("OS name must include a variant",
 799                                errors.ECODE_INVAL)
 800
 801   if variant not in os_obj.supported_variants:
 802     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
 803
 804
 805 def _GetNodeInstancesInner(cfg, fn):
 806   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 807
 808
 809 def _GetNodeInstances(cfg, node_name):
 810   """Returns a list of all primary and secondary instances on a node.
 811
 812   """
 813
 814   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
 815
 816
 817 def _GetNodePrimaryInstances(cfg, node_name):
 818   """Returns primary instances on a node.
 819
 820   """
 821   return _GetNodeInstancesInner(cfg,
 822                                 lambda inst: node_name == inst.primary_node)
 823
 824
 825 def _GetNodeSecondaryInstances(cfg, node_name):
 826   """Returns secondary instances on a node.
 827
 828   """
 829   return _GetNodeInstancesInner(cfg,
 830                                 lambda inst: node_name in inst.secondary_nodes)
 831
 832
 833 def _GetStorageTypeArgs(cfg, storage_type):
 834   """Returns the arguments for a storage type.
 835
 836   """
 837   # Special case for file storage
 838   if storage_type == constants.ST_FILE:
 839     # storage.FileStorage wants a list of storage directories
 840     return [[cfg.GetFileStorageDir()]]
 841
 842   return []
 843
 844
 845 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
 846   faulty = []
 847
 848   for dev in instance.disks:
 849     cfg.SetDiskID(dev, node_name)
 850
 851   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
 852   result.Raise("Failed to get disk status from node %s" % node_name,
 853                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 854
 855   for idx, bdev_status in enumerate(result.payload):
 856     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
 857       faulty.append(idx)
 858
 859   return faulty
 860
 861
 862 def _FormatTimestamp(secs):
 863   """Formats a Unix timestamp with the local timezone.
 864
 865   """
 866   return time.strftime("%F %T %Z", time.gmtime(secs))
 867
 868
 869 class LUPostInitCluster(LogicalUnit):
 870   """Logical unit for running hooks after cluster initialization.
 871
 872   """
 873   HPATH = "cluster-init"
 874   HTYPE = constants.HTYPE_CLUSTER
 875   _OP_REQP = []
 876
 877   def BuildHooksEnv(self):
 878     """Build hooks env.
 879
 880     """
 881     env = {"OP_TARGET": self.cfg.GetClusterName()}
 882     mn = self.cfg.GetMasterNode()
 883     return env, [], [mn]
 884
 885   def CheckPrereq(self):
 886     """No prerequisites to check.
 887
 888     """
 889     return True
 890
 891   def Exec(self, feedback_fn):
 892     """Nothing to do.
 893
 894     """
 895     return True
 896
 897
 898 class LUDestroyCluster(LogicalUnit):
 899   """Logical unit for destroying the cluster.
 900
 901   """
 902   HPATH = "cluster-destroy"
 903   HTYPE = constants.HTYPE_CLUSTER
 904   _OP_REQP = []
 905
 906   def BuildHooksEnv(self):
 907     """Build hooks env.
 908
 909     """
 910     env = {"OP_TARGET": self.cfg.GetClusterName()}
 911     return env, [], []
 912
 913   def CheckPrereq(self):
 914     """Check prerequisites.
 915
 916     This checks whether the cluster is empty.
 917
 918     Any errors are signaled by raising errors.OpPrereqError.
 919
 920     """
 921     master = self.cfg.GetMasterNode()
 922
 923     nodelist = self.cfg.GetNodeList()
 924     if len(nodelist) != 1 or nodelist[0] != master:
 925       raise errors.OpPrereqError("There are still %d node(s) in"
 926                                  " this cluster." % (len(nodelist) - 1),
 927                                  errors.ECODE_INVAL)
 928     instancelist = self.cfg.GetInstanceList()
 929     if instancelist:
 930       raise errors.OpPrereqError("There are still %d instance(s) in"
 931                                  " this cluster." % len(instancelist),
 932                                  errors.ECODE_INVAL)
 933
 934   def Exec(self, feedback_fn):
 935     """Destroys the cluster.
 936
 937     """
 938     master = self.cfg.GetMasterNode()
 939     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
 940
 941     # Run post hooks on master node before it's removed
 942     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
 943     try:
 944       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
 945     except:
 946       # pylint: disable-msg=W0702
 947       self.LogWarning("Errors occurred running hooks on %s" % master)
 948
 949     result = self.rpc.call_node_stop_master(master, False)
 950     result.Raise("Could not disable the master role")
 951
 952     if modify_ssh_setup:
 953       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
 954       utils.CreateBackup(priv_key)
 955       utils.CreateBackup(pub_key)
 956
 957     return master
 958
 959
 960 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
 961                             warn_days=constants.SSL_CERT_EXPIRATION_WARN,
 962                             error_days=constants.SSL_CERT_EXPIRATION_ERROR):
 963   """Verifies certificate details for LUVerifyCluster.
 964
 965   """
 966   if expired:
 967     msg = "Certificate %s is expired" % filename
 968
 969     if not_before is not None and not_after is not None:
 970       msg += (" (valid from %s to %s)" %
 971               (_FormatTimestamp(not_before),
 972                _FormatTimestamp(not_after)))
 973     elif not_before is not None:
 974       msg += " (valid from %s)" % _FormatTimestamp(not_before)
 975     elif not_after is not None:
 976       msg += " (valid until %s)" % _FormatTimestamp(not_after)
 977
 978     return (LUVerifyCluster.ETYPE_ERROR, msg)
 979
 980   elif not_before is not None and not_before > now:
 981     return (LUVerifyCluster.ETYPE_WARNING,
 982             "Certificate %s not yet valid (valid from %s)" %
 983             (filename, _FormatTimestamp(not_before)))
 984
 985   elif not_after is not None:
 986     remaining_days = int((not_after - now) / (24 * 3600))
 987
 988     msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
 989
 990     if remaining_days <= error_days:
 991       return (LUVerifyCluster.ETYPE_ERROR, msg)
 992
 993     if remaining_days <= warn_days:
 994       return (LUVerifyCluster.ETYPE_WARNING, msg)
 995
 996   return (None, None)
 997
 998
 999 def _VerifyCertificate(filename):
1000   """Verifies a certificate for LUVerifyCluster.
1001
1002   @type filename: string
1003   @param filename: Path to PEM file
1004
1005   """
1006   try:
1007     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1008                                            utils.ReadFile(filename))
1009   except Exception, err: # pylint: disable-msg=W0703
1010     return (LUVerifyCluster.ETYPE_ERROR,
1011             "Failed to load X509 certificate %s: %s" % (filename, err))
1012
1013   # Depending on the pyOpenSSL version, this can just return (None, None)
1014   (not_before, not_after) = utils.GetX509CertValidity(cert)
1015
1016   return _VerifyCertificateInner(filename, cert.has_expired(),
1017                                  not_before, not_after, time.time())
1018
1019
1020 class LUVerifyCluster(LogicalUnit):
1021   """Verifies the cluster status.
1022
1023   """
1024   HPATH = "cluster-verify"
1025   HTYPE = constants.HTYPE_CLUSTER
1026   _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1027   REQ_BGL = False
1028
1029   TCLUSTER = "cluster"
1030   TNODE = "node"
1031   TINSTANCE = "instance"
1032
1033   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1034   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1035   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1036   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1037   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1038   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1039   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1040   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1041   ENODEDRBD = (TNODE, "ENODEDRBD")
1042   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1043   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1044   ENODEHV = (TNODE, "ENODEHV")
1045   ENODELVM = (TNODE, "ENODELVM")
1046   ENODEN1 = (TNODE, "ENODEN1")
1047   ENODENET = (TNODE, "ENODENET")
1048   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1049   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1050   ENODERPC = (TNODE, "ENODERPC")
1051   ENODESSH = (TNODE, "ENODESSH")
1052   ENODEVERSION = (TNODE, "ENODEVERSION")
1053   ENODESETUP = (TNODE, "ENODESETUP")
1054   ENODETIME = (TNODE, "ENODETIME")
1055
1056   ETYPE_FIELD = "code"
1057   ETYPE_ERROR = "ERROR"
1058   ETYPE_WARNING = "WARNING"
1059
1060   def ExpandNames(self):
1061     self.needed_locks = {
1062       locking.LEVEL_NODE: locking.ALL_SET,
1063       locking.LEVEL_INSTANCE: locking.ALL_SET,
1064     }
1065     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1066
1067   def _Error(self, ecode, item, msg, *args, **kwargs):
1068     """Format an error message.
1069
1070     Based on the opcode's error_codes parameter, either format a
1071     parseable error code, or a simpler error string.
1072
1073     This must be called only from Exec and functions called from Exec.
1074
1075     """
1076     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1077     itype, etxt = ecode
1078     # first complete the msg
1079     if args:
1080       msg = msg % args
1081     # then format the whole message
1082     if self.op.error_codes:
1083       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1084     else:
1085       if item:
1086         item = " " + item
1087       else:
1088         item = ""
1089       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1090     # and finally report it via the feedback_fn
1091     self._feedback_fn("  - %s" % msg)
1092
1093   def _ErrorIf(self, cond, *args, **kwargs):
1094     """Log an error message if the passed condition is True.
1095
1096     """
1097     cond = bool(cond) or self.op.debug_simulate_errors
1098     if cond:
1099       self._Error(*args, **kwargs)
1100     # do not mark the operation as failed for WARN cases only
1101     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1102       self.bad = self.bad or cond
1103
1104   def _VerifyNode(self, nodeinfo, file_list, local_cksum,
1105                   node_result, master_files, drbd_map, vg_name):
1106     """Run multiple tests against a node.
1107
1108     Test list:
1109
1110       - compares ganeti version
1111       - checks vg existence and size > 20G
1112       - checks config file checksum
1113       - checks ssh to other nodes
1114
1115     @type nodeinfo: L{objects.Node}
1116     @param nodeinfo: the node to check
1117     @param file_list: required list of files
1118     @param local_cksum: dictionary of local files and their checksums
1119     @param node_result: the results from the node
1120     @param master_files: list of files that only masters should have
1121     @param drbd_map: the useddrbd minors for this node, in
1122         form of minor: (instance, must_exist) which correspond to instances
1123         and their running status
1124     @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1125
1126     """
1127     node = nodeinfo.name
1128     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1129
1130     # main result, node_result should be a non-empty dict
1131     test = not node_result or not isinstance(node_result, dict)
1132     _ErrorIf(test, self.ENODERPC, node,
1133                   "unable to verify node: no data returned")
1134     if test:
1135       return
1136
1137     # compares ganeti version
1138     local_version = constants.PROTOCOL_VERSION
1139     remote_version = node_result.get('version', None)
1140     test = not (remote_version and
1141                 isinstance(remote_version, (list, tuple)) and
1142                 len(remote_version) == 2)
1143     _ErrorIf(test, self.ENODERPC, node,
1144              "connection to node returned invalid data")
1145     if test:
1146       return
1147
1148     test = local_version != remote_version[0]
1149     _ErrorIf(test, self.ENODEVERSION, node,
1150              "incompatible protocol versions: master %s,"
1151              " node %s", local_version, remote_version[0])
1152     if test:
1153       return
1154
1155     # node seems compatible, we can actually try to look into its results
1156
1157     # full package version
1158     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1159                   self.ENODEVERSION, node,
1160                   "software version mismatch: master %s, node %s",
1161                   constants.RELEASE_VERSION, remote_version[1],
1162                   code=self.ETYPE_WARNING)
1163
1164     # checks vg existence and size > 20G
1165     if vg_name is not None:
1166       vglist = node_result.get(constants.NV_VGLIST, None)
1167       test = not vglist
1168       _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1169       if not test:
1170         vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1171                                               constants.MIN_VG_SIZE)
1172         _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1173
1174     # checks config file checksum
1175
1176     remote_cksum = node_result.get(constants.NV_FILELIST, None)
1177     test = not isinstance(remote_cksum, dict)
1178     _ErrorIf(test, self.ENODEFILECHECK, node,
1179              "node hasn't returned file checksum data")
1180     if not test:
1181       for file_name in file_list:
1182         node_is_mc = nodeinfo.master_candidate
1183         must_have = (file_name not in master_files) or node_is_mc
1184         # missing
1185         test1 = file_name not in remote_cksum
1186         # invalid checksum
1187         test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1188         # existing and good
1189         test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1190         _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1191                  "file '%s' missing", file_name)
1192         _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1193                  "file '%s' has wrong checksum", file_name)
1194         # not candidate and this is not a must-have file
1195         _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1196                  "file '%s' should not exist on non master"
1197                  " candidates (and the file is outdated)", file_name)
1198         # all good, except non-master/non-must have combination
1199         _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1200                  "file '%s' should not exist"
1201                  " on non master candidates", file_name)
1202
1203     # checks ssh to any
1204
1205     test = constants.NV_NODELIST not in node_result
1206     _ErrorIf(test, self.ENODESSH, node,
1207              "node hasn't returned node ssh connectivity data")
1208     if not test:
1209       if node_result[constants.NV_NODELIST]:
1210         for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1211           _ErrorIf(True, self.ENODESSH, node,
1212                    "ssh communication with node '%s': %s", a_node, a_msg)
1213
1214     test = constants.NV_NODENETTEST not in node_result
1215     _ErrorIf(test, self.ENODENET, node,
1216              "node hasn't returned node tcp connectivity data")
1217     if not test:
1218       if node_result[constants.NV_NODENETTEST]:
1219         nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1220         for anode in nlist:
1221           _ErrorIf(True, self.ENODENET, node,
1222                    "tcp communication with node '%s': %s",
1223                    anode, node_result[constants.NV_NODENETTEST][anode])
1224
1225     hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1226     if isinstance(hyp_result, dict):
1227       for hv_name, hv_result in hyp_result.iteritems():
1228         test = hv_result is not None
1229         _ErrorIf(test, self.ENODEHV, node,
1230                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1231
1232     # check used drbd list
1233     if vg_name is not None:
1234       used_minors = node_result.get(constants.NV_DRBDLIST, [])
1235       test = not isinstance(used_minors, (tuple, list))
1236       _ErrorIf(test, self.ENODEDRBD, node,
1237                "cannot parse drbd status file: %s", str(used_minors))
1238       if not test:
1239         for minor, (iname, must_exist) in drbd_map.items():
1240           test = minor not in used_minors and must_exist
1241           _ErrorIf(test, self.ENODEDRBD, node,
1242                    "drbd minor %d of instance %s is not active",
1243                    minor, iname)
1244         for minor in used_minors:
1245           test = minor not in drbd_map
1246           _ErrorIf(test, self.ENODEDRBD, node,
1247                    "unallocated drbd minor %d is in use", minor)
1248     test = node_result.get(constants.NV_NODESETUP,
1249                            ["Missing NODESETUP results"])
1250     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1251              "; ".join(test))
1252
1253     # check pv names
1254     if vg_name is not None:
1255       pvlist = node_result.get(constants.NV_PVLIST, None)
1256       test = pvlist is None
1257       _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1258       if not test:
1259         # check that ':' is not present in PV names, since it's a
1260         # special character for lvcreate (denotes the range of PEs to
1261         # use on the PV)
1262         for _, pvname, owner_vg in pvlist:
1263           test = ":" in pvname
1264           _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1265                    " '%s' of VG '%s'", pvname, owner_vg)
1266
1267   def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1268                       node_instance, n_offline):
1269     """Verify an instance.
1270
1271     This function checks to see if the required block devices are
1272     available on the instance's node.
1273
1274     """
1275     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1276     node_current = instanceconfig.primary_node
1277
1278     node_vol_should = {}
1279     instanceconfig.MapLVsByNode(node_vol_should)
1280
1281     for node in node_vol_should:
1282       if node in n_offline:
1283         # ignore missing volumes on offline nodes
1284         continue
1285       for volume in node_vol_should[node]:
1286         test = node not in node_vol_is or volume not in node_vol_is[node]
1287         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1288                  "volume %s missing on node %s", volume, node)
1289
1290     if instanceconfig.admin_up:
1291       test = ((node_current not in node_instance or
1292                not instance in node_instance[node_current]) and
1293               node_current not in n_offline)
1294       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1295                "instance not running on its primary node %s",
1296                node_current)
1297
1298     for node in node_instance:
1299       if (not node == node_current):
1300         test = instance in node_instance[node]
1301         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1302                  "instance should not run on node %s", node)
1303
1304   def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1305     """Verify if there are any unknown volumes in the cluster.
1306
1307     The .os, .swap and backup volumes are ignored. All other volumes are
1308     reported as unknown.
1309
1310     """
1311     for node in node_vol_is:
1312       for volume in node_vol_is[node]:
1313         test = (node not in node_vol_should or
1314                 volume not in node_vol_should[node])
1315         self._ErrorIf(test, self.ENODEORPHANLV, node,
1316                       "volume %s is unknown", volume)
1317
1318   def _VerifyOrphanInstances(self, instancelist, node_instance):
1319     """Verify the list of running instances.
1320
1321     This checks what instances are running but unknown to the cluster.
1322
1323     """
1324     for node in node_instance:
1325       for o_inst in node_instance[node]:
1326         test = o_inst not in instancelist
1327         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1328                       "instance %s on node %s should not exist", o_inst, node)
1329
1330   def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1331     """Verify N+1 Memory Resilience.
1332
1333     Check that if one single node dies we can still start all the instances it
1334     was primary for.
1335
1336     """
1337     for node, nodeinfo in node_info.iteritems():
1338       # This code checks that every node which is now listed as secondary has
1339       # enough memory to host all instances it is supposed to should a single
1340       # other node in the cluster fail.
1341       # FIXME: not ready for failover to an arbitrary node
1342       # FIXME: does not support file-backed instances
1343       # WARNING: we currently take into account down instances as well as up
1344       # ones, considering that even if they're down someone might want to start
1345       # them even in the event of a node failure.
1346       for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1347         needed_mem = 0
1348         for instance in instances:
1349           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1350           if bep[constants.BE_AUTO_BALANCE]:
1351             needed_mem += bep[constants.BE_MEMORY]
1352         test = nodeinfo['mfree'] < needed_mem
1353         self._ErrorIf(test, self.ENODEN1, node,
1354                       "not enough memory on to accommodate"
1355                       " failovers should peer node %s fail", prinode)
1356
1357   def CheckPrereq(self):
1358     """Check prerequisites.
1359
1360     Transform the list of checks we're going to skip into a set and check that
1361     all its members are valid.
1362
1363     """
1364     self.skip_set = frozenset(self.op.skip_checks)
1365     if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1366       raise errors.OpPrereqError("Invalid checks to be skipped specified",
1367                                  errors.ECODE_INVAL)
1368
1369   def BuildHooksEnv(self):
1370     """Build hooks env.
1371
1372     Cluster-Verify hooks just ran in the post phase and their failure makes
1373     the output be logged in the verify output and the verification to fail.
1374
1375     """
1376     all_nodes = self.cfg.GetNodeList()
1377     env = {
1378       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1379       }
1380     for node in self.cfg.GetAllNodesInfo().values():
1381       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1382
1383     return env, [], all_nodes
1384
1385   def Exec(self, feedback_fn):
1386     """Verify integrity of cluster, performing various test on nodes.
1387
1388     """
1389     self.bad = False
1390     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1391     verbose = self.op.verbose
1392     self._feedback_fn = feedback_fn
1393     feedback_fn("* Verifying global settings")
1394     for msg in self.cfg.VerifyConfig():
1395       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1396
1397     # Check the cluster certificates
1398     for cert_filename in constants.ALL_CERT_FILES:
1399       (errcode, msg) = _VerifyCertificate(cert_filename)
1400       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1401
1402     vg_name = self.cfg.GetVGName()
1403     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1404     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1405     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1406     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1407     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1408                         for iname in instancelist)
1409     i_non_redundant = [] # Non redundant instances
1410     i_non_a_balanced = [] # Non auto-balanced instances
1411     n_offline = [] # List of offline nodes
1412     n_drained = [] # List of nodes being drained
1413     node_volume = {}
1414     node_instance = {}
1415     node_info = {}
1416     instance_cfg = {}
1417
1418     # FIXME: verify OS list
1419     # do local checksums
1420     master_files = [constants.CLUSTER_CONF_FILE]
1421
1422     file_names = ssconf.SimpleStore().GetFileList()
1423     file_names.extend(constants.ALL_CERT_FILES)
1424     file_names.extend(master_files)
1425
1426     local_checksums = utils.FingerprintFiles(file_names)
1427
1428     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1429     node_verify_param = {
1430       constants.NV_FILELIST: file_names,
1431       constants.NV_NODELIST: [node.name for node in nodeinfo
1432                               if not node.offline],
1433       constants.NV_HYPERVISOR: hypervisors,
1434       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1435                                   node.secondary_ip) for node in nodeinfo
1436                                  if not node.offline],
1437       constants.NV_INSTANCELIST: hypervisors,
1438       constants.NV_VERSION: None,
1439       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1440       constants.NV_NODESETUP: None,
1441       constants.NV_TIME: None,
1442       }
1443
1444     if vg_name is not None:
1445       node_verify_param[constants.NV_VGLIST] = None
1446       node_verify_param[constants.NV_LVLIST] = vg_name
1447       node_verify_param[constants.NV_PVLIST] = [vg_name]
1448       node_verify_param[constants.NV_DRBDLIST] = None
1449
1450     # Due to the way our RPC system works, exact response times cannot be
1451     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1452     # time before and after executing the request, we can at least have a time
1453     # window.
1454     nvinfo_starttime = time.time()
1455     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1456                                            self.cfg.GetClusterName())
1457     nvinfo_endtime = time.time()
1458
1459     cluster = self.cfg.GetClusterInfo()
1460     master_node = self.cfg.GetMasterNode()
1461     all_drbd_map = self.cfg.ComputeDRBDMap()
1462
1463     feedback_fn("* Verifying node status")
1464     for node_i in nodeinfo:
1465       node = node_i.name
1466
1467       if node_i.offline:
1468         if verbose:
1469           feedback_fn("* Skipping offline node %s" % (node,))
1470         n_offline.append(node)
1471         continue
1472
1473       if node == master_node:
1474         ntype = "master"
1475       elif node_i.master_candidate:
1476         ntype = "master candidate"
1477       elif node_i.drained:
1478         ntype = "drained"
1479         n_drained.append(node)
1480       else:
1481         ntype = "regular"
1482       if verbose:
1483         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1484
1485       msg = all_nvinfo[node].fail_msg
1486       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1487       if msg:
1488         continue
1489
1490       nresult = all_nvinfo[node].payload
1491       node_drbd = {}
1492       for minor, instance in all_drbd_map[node].items():
1493         test = instance not in instanceinfo
1494         _ErrorIf(test, self.ECLUSTERCFG, None,
1495                  "ghost instance '%s' in temporary DRBD map", instance)
1496           # ghost instance should not be running, but otherwise we
1497           # don't give double warnings (both ghost instance and
1498           # unallocated minor in use)
1499         if test:
1500           node_drbd[minor] = (instance, False)
1501         else:
1502           instance = instanceinfo[instance]
1503           node_drbd[minor] = (instance.name, instance.admin_up)
1504
1505       self._VerifyNode(node_i, file_names, local_checksums,
1506                        nresult, master_files, node_drbd, vg_name)
1507
1508       lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1509       if vg_name is None:
1510         node_volume[node] = {}
1511       elif isinstance(lvdata, basestring):
1512         _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1513                  utils.SafeEncode(lvdata))
1514         node_volume[node] = {}
1515       elif not isinstance(lvdata, dict):
1516         _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1517         continue
1518       else:
1519         node_volume[node] = lvdata
1520
1521       # node_instance
1522       idata = nresult.get(constants.NV_INSTANCELIST, None)
1523       test = not isinstance(idata, list)
1524       _ErrorIf(test, self.ENODEHV, node,
1525                "rpc call to node failed (instancelist): %s",
1526                utils.SafeEncode(str(idata)))
1527       if test:
1528         continue
1529
1530       node_instance[node] = idata
1531
1532       # node_info
1533       nodeinfo = nresult.get(constants.NV_HVINFO, None)
1534       test = not isinstance(nodeinfo, dict)
1535       _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1536       if test:
1537         continue
1538
1539       # Node time
1540       ntime = nresult.get(constants.NV_TIME, None)
1541       try:
1542         ntime_merged = utils.MergeTime(ntime)
1543       except (ValueError, TypeError):
1544         _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1545
1546       if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1547         ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1548       elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1549         ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1550       else:
1551         ntime_diff = None
1552
1553       _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1554                "Node time diverges by at least %s from master node time",
1555                ntime_diff)
1556
1557       if ntime_diff is not None:
1558         continue
1559
1560       try:
1561         node_info[node] = {
1562           "mfree": int(nodeinfo['memory_free']),
1563           "pinst": [],
1564           "sinst": [],
1565           # dictionary holding all instances this node is secondary for,
1566           # grouped by their primary node. Each key is a cluster node, and each
1567           # value is a list of instances which have the key as primary and the
1568           # current node as secondary.  this is handy to calculate N+1 memory
1569           # availability if you can only failover from a primary to its
1570           # secondary.
1571           "sinst-by-pnode": {},
1572         }
1573         # FIXME: devise a free space model for file based instances as well
1574         if vg_name is not None:
1575           test = (constants.NV_VGLIST not in nresult or
1576                   vg_name not in nresult[constants.NV_VGLIST])
1577           _ErrorIf(test, self.ENODELVM, node,
1578                    "node didn't return data for the volume group '%s'"
1579                    " - it is either missing or broken", vg_name)
1580           if test:
1581             continue
1582           node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1583       except (ValueError, KeyError):
1584         _ErrorIf(True, self.ENODERPC, node,
1585                  "node returned invalid nodeinfo, check lvm/hypervisor")
1586         continue
1587
1588     node_vol_should = {}
1589
1590     feedback_fn("* Verifying instance status")
1591     for instance in instancelist:
1592       if verbose:
1593         feedback_fn("* Verifying instance %s" % instance)
1594       inst_config = instanceinfo[instance]
1595       self._VerifyInstance(instance, inst_config, node_volume,
1596                            node_instance, n_offline)
1597       inst_nodes_offline = []
1598
1599       inst_config.MapLVsByNode(node_vol_should)
1600
1601       instance_cfg[instance] = inst_config
1602
1603       pnode = inst_config.primary_node
1604       _ErrorIf(pnode not in node_info and pnode not in n_offline,
1605                self.ENODERPC, pnode, "instance %s, connection to"
1606                " primary node failed", instance)
1607       if pnode in node_info:
1608         node_info[pnode]['pinst'].append(instance)
1609
1610       if pnode in n_offline:
1611         inst_nodes_offline.append(pnode)
1612
1613       # If the instance is non-redundant we cannot survive losing its primary
1614       # node, so we are not N+1 compliant. On the other hand we have no disk
1615       # templates with more than one secondary so that situation is not well
1616       # supported either.
1617       # FIXME: does not support file-backed instances
1618       if len(inst_config.secondary_nodes) == 0:
1619         i_non_redundant.append(instance)
1620       _ErrorIf(len(inst_config.secondary_nodes) > 1,
1621                self.EINSTANCELAYOUT, instance,
1622                "instance has multiple secondary nodes", code="WARNING")
1623
1624       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1625         i_non_a_balanced.append(instance)
1626
1627       for snode in inst_config.secondary_nodes:
1628         _ErrorIf(snode not in node_info and snode not in n_offline,
1629                  self.ENODERPC, snode,
1630                  "instance %s, connection to secondary node"
1631                  " failed", instance)
1632
1633         if snode in node_info:
1634           node_info[snode]['sinst'].append(instance)
1635           if pnode not in node_info[snode]['sinst-by-pnode']:
1636             node_info[snode]['sinst-by-pnode'][pnode] = []
1637           node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1638
1639         if snode in n_offline:
1640           inst_nodes_offline.append(snode)
1641
1642       # warn that the instance lives on offline nodes
1643       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1644                "instance lives on offline node(s) %s",
1645                utils.CommaJoin(inst_nodes_offline))
1646
1647     feedback_fn("* Verifying orphan volumes")
1648     self._VerifyOrphanVolumes(node_vol_should, node_volume)
1649
1650     feedback_fn("* Verifying remaining instances")
1651     self._VerifyOrphanInstances(instancelist, node_instance)
1652
1653     if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1654       feedback_fn("* Verifying N+1 Memory redundancy")
1655       self._VerifyNPlusOneMemory(node_info, instance_cfg)
1656
1657     feedback_fn("* Other Notes")
1658     if i_non_redundant:
1659       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1660                   % len(i_non_redundant))
1661
1662     if i_non_a_balanced:
1663       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1664                   % len(i_non_a_balanced))
1665
1666     if n_offline:
1667       feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1668
1669     if n_drained:
1670       feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1671
1672     return not self.bad
1673
1674   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1675     """Analyze the post-hooks' result
1676
1677     This method analyses the hook result, handles it, and sends some
1678     nicely-formatted feedback back to the user.
1679
1680     @param phase: one of L{constants.HOOKS_PHASE_POST} or
1681         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1682     @param hooks_results: the results of the multi-node hooks rpc call
1683     @param feedback_fn: function used send feedback back to the caller
1684     @param lu_result: previous Exec result
1685     @return: the new Exec result, based on the previous result
1686         and hook results
1687
1688     """
1689     # We only really run POST phase hooks, and are only interested in
1690     # their results
1691     if phase == constants.HOOKS_PHASE_POST:
1692       # Used to change hooks' output to proper indentation
1693       indent_re = re.compile('^', re.M)
1694       feedback_fn("* Hooks Results")
1695       assert hooks_results, "invalid result from hooks"
1696
1697       for node_name in hooks_results:
1698         res = hooks_results[node_name]
1699         msg = res.fail_msg
1700         test = msg and not res.offline
1701         self._ErrorIf(test, self.ENODEHOOKS, node_name,
1702                       "Communication failure in hooks execution: %s", msg)
1703         if res.offline or msg:
1704           # No need to investigate payload if node is offline or gave an error.
1705           # override manually lu_result here as _ErrorIf only
1706           # overrides self.bad
1707           lu_result = 1
1708           continue
1709         for script, hkr, output in res.payload:
1710           test = hkr == constants.HKR_FAIL
1711           self._ErrorIf(test, self.ENODEHOOKS, node_name,
1712                         "Script %s failed, output:", script)
1713           if test:
1714             output = indent_re.sub('      ', output)
1715             feedback_fn("%s" % output)
1716             lu_result = 0
1717
1718       return lu_result
1719
1720
1721 class LUVerifyDisks(NoHooksLU):
1722   """Verifies the cluster disks status.
1723
1724   """
1725   _OP_REQP = []
1726   REQ_BGL = False
1727
1728   def ExpandNames(self):
1729     self.needed_locks = {
1730       locking.LEVEL_NODE: locking.ALL_SET,
1731       locking.LEVEL_INSTANCE: locking.ALL_SET,
1732     }
1733     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1734
1735   def CheckPrereq(self):
1736     """Check prerequisites.
1737
1738     This has no prerequisites.
1739
1740     """
1741     pass
1742
1743   def Exec(self, feedback_fn):
1744     """Verify integrity of cluster disks.
1745
1746     @rtype: tuple of three items
1747     @return: a tuple of (dict of node-to-node_error, list of instances
1748         which need activate-disks, dict of instance: (node, volume) for
1749         missing volumes
1750
1751     """
1752     result = res_nodes, res_instances, res_missing = {}, [], {}
1753
1754     vg_name = self.cfg.GetVGName()
1755     nodes = utils.NiceSort(self.cfg.GetNodeList())
1756     instances = [self.cfg.GetInstanceInfo(name)
1757                  for name in self.cfg.GetInstanceList()]
1758
1759     nv_dict = {}
1760     for inst in instances:
1761       inst_lvs = {}
1762       if (not inst.admin_up or
1763           inst.disk_template not in constants.DTS_NET_MIRROR):
1764         continue
1765       inst.MapLVsByNode(inst_lvs)
1766       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1767       for node, vol_list in inst_lvs.iteritems():
1768         for vol in vol_list:
1769           nv_dict[(node, vol)] = inst
1770
1771     if not nv_dict:
1772       return result
1773
1774     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1775
1776     for node in nodes:
1777       # node_volume
1778       node_res = node_lvs[node]
1779       if node_res.offline:
1780         continue
1781       msg = node_res.fail_msg
1782       if msg:
1783         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1784         res_nodes[node] = msg
1785         continue
1786
1787       lvs = node_res.payload
1788       for lv_name, (_, _, lv_online) in lvs.items():
1789         inst = nv_dict.pop((node, lv_name), None)
1790         if (not lv_online and inst is not None
1791             and inst.name not in res_instances):
1792           res_instances.append(inst.name)
1793
1794     # any leftover items in nv_dict are missing LVs, let's arrange the
1795     # data better
1796     for key, inst in nv_dict.iteritems():
1797       if inst.name not in res_missing:
1798         res_missing[inst.name] = []
1799       res_missing[inst.name].append(key)
1800
1801     return result
1802
1803
1804 class LURepairDiskSizes(NoHooksLU):
1805   """Verifies the cluster disks sizes.
1806
1807   """
1808   _OP_REQP = ["instances"]
1809   REQ_BGL = False
1810
1811   def ExpandNames(self):
1812     if not isinstance(self.op.instances, list):
1813       raise errors.OpPrereqError("Invalid argument type 'instances'",
1814                                  errors.ECODE_INVAL)
1815
1816     if self.op.instances:
1817       self.wanted_names = []
1818       for name in self.op.instances:
1819         full_name = _ExpandInstanceName(self.cfg, name)
1820         self.wanted_names.append(full_name)
1821       self.needed_locks = {
1822         locking.LEVEL_NODE: [],
1823         locking.LEVEL_INSTANCE: self.wanted_names,
1824         }
1825       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1826     else:
1827       self.wanted_names = None
1828       self.needed_locks = {
1829         locking.LEVEL_NODE: locking.ALL_SET,
1830         locking.LEVEL_INSTANCE: locking.ALL_SET,
1831         }
1832     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1833
1834   def DeclareLocks(self, level):
1835     if level == locking.LEVEL_NODE and self.wanted_names is not None:
1836       self._LockInstancesNodes(primary_only=True)
1837
1838   def CheckPrereq(self):
1839     """Check prerequisites.
1840
1841     This only checks the optional instance list against the existing names.
1842
1843     """
1844     if self.wanted_names is None:
1845       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1846
1847     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1848                              in self.wanted_names]
1849
1850   def _EnsureChildSizes(self, disk):
1851     """Ensure children of the disk have the needed disk size.
1852
1853     This is valid mainly for DRBD8 and fixes an issue where the
1854     children have smaller disk size.
1855
1856     @param disk: an L{ganeti.objects.Disk} object
1857
1858     """
1859     if disk.dev_type == constants.LD_DRBD8:
1860       assert disk.children, "Empty children for DRBD8?"
1861       fchild = disk.children[0]
1862       mismatch = fchild.size < disk.size
1863       if mismatch:
1864         self.LogInfo("Child disk has size %d, parent %d, fixing",
1865                      fchild.size, disk.size)
1866         fchild.size = disk.size
1867
1868       # and we recurse on this child only, not on the metadev
1869       return self._EnsureChildSizes(fchild) or mismatch
1870     else:
1871       return False
1872
1873   def Exec(self, feedback_fn):
1874     """Verify the size of cluster disks.
1875
1876     """
1877     # TODO: check child disks too
1878     # TODO: check differences in size between primary/secondary nodes
1879     per_node_disks = {}
1880     for instance in self.wanted_instances:
1881       pnode = instance.primary_node
1882       if pnode not in per_node_disks:
1883         per_node_disks[pnode] = []
1884       for idx, disk in enumerate(instance.disks):
1885         per_node_disks[pnode].append((instance, idx, disk))
1886
1887     changed = []
1888     for node, dskl in per_node_disks.items():
1889       newl = [v[2].Copy() for v in dskl]
1890       for dsk in newl:
1891         self.cfg.SetDiskID(dsk, node)
1892       result = self.rpc.call_blockdev_getsizes(node, newl)
1893       if result.fail_msg:
1894         self.LogWarning("Failure in blockdev_getsizes call to node"
1895                         " %s, ignoring", node)
1896         continue
1897       if len(result.data) != len(dskl):
1898         self.LogWarning("Invalid result from node %s, ignoring node results",
1899                         node)
1900         continue
1901       for ((instance, idx, disk), size) in zip(dskl, result.data):
1902         if size is None:
1903           self.LogWarning("Disk %d of instance %s did not return size"
1904                           " information, ignoring", idx, instance.name)
1905           continue
1906         if not isinstance(size, (int, long)):
1907           self.LogWarning("Disk %d of instance %s did not return valid"
1908                           " size information, ignoring", idx, instance.name)
1909           continue
1910         size = size >> 20
1911         if size != disk.size:
1912           self.LogInfo("Disk %d of instance %s has mismatched size,"
1913                        " correcting: recorded %d, actual %d", idx,
1914                        instance.name, disk.size, size)
1915           disk.size = size
1916           self.cfg.Update(instance, feedback_fn)
1917           changed.append((instance.name, idx, size))
1918         if self._EnsureChildSizes(disk):
1919           self.cfg.Update(instance, feedback_fn)
1920           changed.append((instance.name, idx, disk.size))
1921     return changed
1922
1923
1924 class LURenameCluster(LogicalUnit):
1925   """Rename the cluster.
1926
1927   """
1928   HPATH = "cluster-rename"
1929   HTYPE = constants.HTYPE_CLUSTER
1930   _OP_REQP = ["name"]
1931
1932   def BuildHooksEnv(self):
1933     """Build hooks env.
1934
1935     """
1936     env = {
1937       "OP_TARGET": self.cfg.GetClusterName(),
1938       "NEW_NAME": self.op.name,
1939       }
1940     mn = self.cfg.GetMasterNode()
1941     all_nodes = self.cfg.GetNodeList()
1942     return env, [mn], all_nodes
1943
1944   def CheckPrereq(self):
1945     """Verify that the passed name is a valid one.
1946
1947     """
1948     hostname = utils.GetHostInfo(self.op.name)
1949
1950     new_name = hostname.name
1951     self.ip = new_ip = hostname.ip
1952     old_name = self.cfg.GetClusterName()
1953     old_ip = self.cfg.GetMasterIP()
1954     if new_name == old_name and new_ip == old_ip:
1955       raise errors.OpPrereqError("Neither the name nor the IP address of the"
1956                                  " cluster has changed",
1957                                  errors.ECODE_INVAL)
1958     if new_ip != old_ip:
1959       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1960         raise errors.OpPrereqError("The given cluster IP address (%s) is"
1961                                    " reachable on the network. Aborting." %
1962                                    new_ip, errors.ECODE_NOTUNIQUE)
1963
1964     self.op.name = new_name
1965
1966   def Exec(self, feedback_fn):
1967     """Rename the cluster.
1968
1969     """
1970     clustername = self.op.name
1971     ip = self.ip
1972
1973     # shutdown the master IP
1974     master = self.cfg.GetMasterNode()
1975     result = self.rpc.call_node_stop_master(master, False)
1976     result.Raise("Could not disable the master role")
1977
1978     try:
1979       cluster = self.cfg.GetClusterInfo()
1980       cluster.cluster_name = clustername
1981       cluster.master_ip = ip
1982       self.cfg.Update(cluster, feedback_fn)
1983
1984       # update the known hosts file
1985       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1986       node_list = self.cfg.GetNodeList()
1987       try:
1988         node_list.remove(master)
1989       except ValueError:
1990         pass
1991       result = self.rpc.call_upload_file(node_list,
1992                                          constants.SSH_KNOWN_HOSTS_FILE)
1993       for to_node, to_result in result.iteritems():
1994         msg = to_result.fail_msg
1995         if msg:
1996           msg = ("Copy of file %s to node %s failed: %s" %
1997                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1998           self.proc.LogWarning(msg)
1999
2000     finally:
2001       result = self.rpc.call_node_start_master(master, False, False)
2002       msg = result.fail_msg
2003       if msg:
2004         self.LogWarning("Could not re-enable the master role on"
2005                         " the master, please restart manually: %s", msg)
2006
2007
2008 def _RecursiveCheckIfLVMBased(disk):
2009   """Check if the given disk or its children are lvm-based.
2010
2011   @type disk: L{objects.Disk}
2012   @param disk: the disk to check
2013   @rtype: boolean
2014   @return: boolean indicating whether a LD_LV dev_type was found or not
2015
2016   """
2017   if disk.children:
2018     for chdisk in disk.children:
2019       if _RecursiveCheckIfLVMBased(chdisk):
2020         return True
2021   return disk.dev_type == constants.LD_LV
2022
2023
2024 class LUSetClusterParams(LogicalUnit):
2025   """Change the parameters of the cluster.
2026
2027   """
2028   HPATH = "cluster-modify"
2029   HTYPE = constants.HTYPE_CLUSTER
2030   _OP_REQP = []
2031   REQ_BGL = False
2032
2033   def CheckArguments(self):
2034     """Check parameters
2035
2036     """
2037     if not hasattr(self.op, "candidate_pool_size"):
2038       self.op.candidate_pool_size = None
2039     if self.op.candidate_pool_size is not None:
2040       try:
2041         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2042       except (ValueError, TypeError), err:
2043         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2044                                    str(err), errors.ECODE_INVAL)
2045       if self.op.candidate_pool_size < 1:
2046         raise errors.OpPrereqError("At least one master candidate needed",
2047                                    errors.ECODE_INVAL)
2048
2049   def ExpandNames(self):
2050     # FIXME: in the future maybe other cluster params won't require checking on
2051     # all nodes to be modified.
2052     self.needed_locks = {
2053       locking.LEVEL_NODE: locking.ALL_SET,
2054     }
2055     self.share_locks[locking.LEVEL_NODE] = 1
2056
2057   def BuildHooksEnv(self):
2058     """Build hooks env.
2059
2060     """
2061     env = {
2062       "OP_TARGET": self.cfg.GetClusterName(),
2063       "NEW_VG_NAME": self.op.vg_name,
2064       }
2065     mn = self.cfg.GetMasterNode()
2066     return env, [mn], [mn]
2067
2068   def CheckPrereq(self):
2069     """Check prerequisites.
2070
2071     This checks whether the given params don't conflict and
2072     if the given volume group is valid.
2073
2074     """
2075     if self.op.vg_name is not None and not self.op.vg_name:
2076       instances = self.cfg.GetAllInstancesInfo().values()
2077       for inst in instances:
2078         for disk in inst.disks:
2079           if _RecursiveCheckIfLVMBased(disk):
2080             raise errors.OpPrereqError("Cannot disable lvm storage while"
2081                                        " lvm-based instances exist",
2082                                        errors.ECODE_INVAL)
2083
2084     node_list = self.acquired_locks[locking.LEVEL_NODE]
2085
2086     # if vg_name not None, checks given volume group on all nodes
2087     if self.op.vg_name:
2088       vglist = self.rpc.call_vg_list(node_list)
2089       for node in node_list:
2090         msg = vglist[node].fail_msg
2091         if msg:
2092           # ignoring down node
2093           self.LogWarning("Error while gathering data on node %s"
2094                           " (ignoring node): %s", node, msg)
2095           continue
2096         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2097                                               self.op.vg_name,
2098                                               constants.MIN_VG_SIZE)
2099         if vgstatus:
2100           raise errors.OpPrereqError("Error on node '%s': %s" %
2101                                      (node, vgstatus), errors.ECODE_ENVIRON)
2102
2103     self.cluster = cluster = self.cfg.GetClusterInfo()
2104     # validate params changes
2105     if self.op.beparams:
2106       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2107       self.new_beparams = objects.FillDict(
2108         cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2109
2110     if self.op.nicparams:
2111       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2112       self.new_nicparams = objects.FillDict(
2113         cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2114       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2115       nic_errors = []
2116
2117       # check all instances for consistency
2118       for instance in self.cfg.GetAllInstancesInfo().values():
2119         for nic_idx, nic in enumerate(instance.nics):
2120           params_copy = copy.deepcopy(nic.nicparams)
2121           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2122
2123           # check parameter syntax
2124           try:
2125             objects.NIC.CheckParameterSyntax(params_filled)
2126           except errors.ConfigurationError, err:
2127             nic_errors.append("Instance %s, nic/%d: %s" %
2128                               (instance.name, nic_idx, err))
2129
2130           # if we're moving instances to routed, check that they have an ip
2131           target_mode = params_filled[constants.NIC_MODE]
2132           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2133             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2134                               (instance.name, nic_idx))
2135       if nic_errors:
2136         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2137                                    "\n".join(nic_errors))
2138
2139     # hypervisor list/parameters
2140     self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2141     if self.op.hvparams:
2142       if not isinstance(self.op.hvparams, dict):
2143         raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2144                                    errors.ECODE_INVAL)
2145       for hv_name, hv_dict in self.op.hvparams.items():
2146         if hv_name not in self.new_hvparams:
2147           self.new_hvparams[hv_name] = hv_dict
2148         else:
2149           self.new_hvparams[hv_name].update(hv_dict)
2150
2151     # os hypervisor parameters
2152     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2153     if self.op.os_hvp:
2154       if not isinstance(self.op.os_hvp, dict):
2155         raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2156                                    errors.ECODE_INVAL)
2157       for os_name, hvs in self.op.os_hvp.items():
2158         if not isinstance(hvs, dict):
2159           raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2160                                       " input"), errors.ECODE_INVAL)
2161         if os_name not in self.new_os_hvp:
2162           self.new_os_hvp[os_name] = hvs
2163         else:
2164           for hv_name, hv_dict in hvs.items():
2165             if hv_name not in self.new_os_hvp[os_name]:
2166               self.new_os_hvp[os_name][hv_name] = hv_dict
2167             else:
2168               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2169
2170     if self.op.enabled_hypervisors is not None:
2171       self.hv_list = self.op.enabled_hypervisors
2172       if not self.hv_list:
2173         raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2174                                    " least one member",
2175                                    errors.ECODE_INVAL)
2176       invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2177       if invalid_hvs:
2178         raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2179                                    " entries: %s" %
2180                                    utils.CommaJoin(invalid_hvs),
2181                                    errors.ECODE_INVAL)
2182     else:
2183       self.hv_list = cluster.enabled_hypervisors
2184
2185     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2186       # either the enabled list has changed, or the parameters have, validate
2187       for hv_name, hv_params in self.new_hvparams.items():
2188         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2189             (self.op.enabled_hypervisors and
2190              hv_name in self.op.enabled_hypervisors)):
2191           # either this is a new hypervisor, or its parameters have changed
2192           hv_class = hypervisor.GetHypervisor(hv_name)
2193           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2194           hv_class.CheckParameterSyntax(hv_params)
2195           _CheckHVParams(self, node_list, hv_name, hv_params)
2196
2197     if self.op.os_hvp:
2198       # no need to check any newly-enabled hypervisors, since the
2199       # defaults have already been checked in the above code-block
2200       for os_name, os_hvp in self.new_os_hvp.items():
2201         for hv_name, hv_params in os_hvp.items():
2202           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2203           # we need to fill in the new os_hvp on top of the actual hv_p
2204           cluster_defaults = self.new_hvparams.get(hv_name, {})
2205           new_osp = objects.FillDict(cluster_defaults, hv_params)
2206           hv_class = hypervisor.GetHypervisor(hv_name)
2207           hv_class.CheckParameterSyntax(new_osp)
2208           _CheckHVParams(self, node_list, hv_name, new_osp)
2209
2210
2211   def Exec(self, feedback_fn):
2212     """Change the parameters of the cluster.
2213
2214     """
2215     if self.op.vg_name is not None:
2216       new_volume = self.op.vg_name
2217       if not new_volume:
2218         new_volume = None
2219       if new_volume != self.cfg.GetVGName():
2220         self.cfg.SetVGName(new_volume)
2221       else:
2222         feedback_fn("Cluster LVM configuration already in desired"
2223                     " state, not changing")
2224     if self.op.hvparams:
2225       self.cluster.hvparams = self.new_hvparams
2226     if self.op.os_hvp:
2227       self.cluster.os_hvp = self.new_os_hvp
2228     if self.op.enabled_hypervisors is not None:
2229       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2230     if self.op.beparams:
2231       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2232     if self.op.nicparams:
2233       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2234
2235     if self.op.candidate_pool_size is not None:
2236       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2237       # we need to update the pool size here, otherwise the save will fail
2238       _AdjustCandidatePool(self, [])
2239
2240     self.cfg.Update(self.cluster, feedback_fn)
2241
2242
2243 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2244   """Distribute additional files which are part of the cluster configuration.
2245
2246   ConfigWriter takes care of distributing the config and ssconf files, but
2247   there are more files which should be distributed to all nodes. This function
2248   makes sure those are copied.
2249
2250   @param lu: calling logical unit
2251   @param additional_nodes: list of nodes not in the config to distribute to
2252
2253   """
2254   # 1. Gather target nodes
2255   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2256   dist_nodes = lu.cfg.GetOnlineNodeList()
2257   if additional_nodes is not None:
2258     dist_nodes.extend(additional_nodes)
2259   if myself.name in dist_nodes:
2260     dist_nodes.remove(myself.name)
2261
2262   # 2. Gather files to distribute
2263   dist_files = set([constants.ETC_HOSTS,
2264                     constants.SSH_KNOWN_HOSTS_FILE,
2265                     constants.RAPI_CERT_FILE,
2266                     constants.RAPI_USERS_FILE,
2267                     constants.HMAC_CLUSTER_KEY,
2268                    ])
2269
2270   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2271   for hv_name in enabled_hypervisors:
2272     hv_class = hypervisor.GetHypervisor(hv_name)
2273     dist_files.update(hv_class.GetAncillaryFiles())
2274
2275   # 3. Perform the files upload
2276   for fname in dist_files:
2277     if os.path.exists(fname):
2278       result = lu.rpc.call_upload_file(dist_nodes, fname)
2279       for to_node, to_result in result.items():
2280         msg = to_result.fail_msg
2281         if msg:
2282           msg = ("Copy of file %s to node %s failed: %s" %
2283                  (fname, to_node, msg))
2284           lu.proc.LogWarning(msg)
2285
2286
2287 class LURedistributeConfig(NoHooksLU):
2288   """Force the redistribution of cluster configuration.
2289
2290   This is a very simple LU.
2291
2292   """
2293   _OP_REQP = []
2294   REQ_BGL = False
2295
2296   def ExpandNames(self):
2297     self.needed_locks = {
2298       locking.LEVEL_NODE: locking.ALL_SET,
2299     }
2300     self.share_locks[locking.LEVEL_NODE] = 1
2301
2302   def CheckPrereq(self):
2303     """Check prerequisites.
2304
2305     """
2306
2307   def Exec(self, feedback_fn):
2308     """Redistribute the configuration.
2309
2310     """
2311     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2312     _RedistributeAncillaryFiles(self)
2313
2314
2315 def _WaitForSync(lu, instance, oneshot=False):
2316   """Sleep and poll for an instance's disk to sync.
2317
2318   """
2319   if not instance.disks:
2320     return True
2321
2322   if not oneshot:
2323     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2324
2325   node = instance.primary_node
2326
2327   for dev in instance.disks:
2328     lu.cfg.SetDiskID(dev, node)
2329
2330   # TODO: Convert to utils.Retry
2331
2332   retries = 0
2333   degr_retries = 10 # in seconds, as we sleep 1 second each time
2334   while True:
2335     max_time = 0
2336     done = True
2337     cumul_degraded = False
2338     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2339     msg = rstats.fail_msg
2340     if msg:
2341       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2342       retries += 1
2343       if retries >= 10:
2344         raise errors.RemoteError("Can't contact node %s for mirror data,"
2345                                  " aborting." % node)
2346       time.sleep(6)
2347       continue
2348     rstats = rstats.payload
2349     retries = 0
2350     for i, mstat in enumerate(rstats):
2351       if mstat is None:
2352         lu.LogWarning("Can't compute data for node %s/%s",
2353                            node, instance.disks[i].iv_name)
2354         continue
2355
2356       cumul_degraded = (cumul_degraded or
2357                         (mstat.is_degraded and mstat.sync_percent is None))
2358       if mstat.sync_percent is not None:
2359         done = False
2360         if mstat.estimated_time is not None:
2361           rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2362           max_time = mstat.estimated_time
2363         else:
2364           rem_time = "no time estimate"
2365         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2366                         (instance.disks[i].iv_name, mstat.sync_percent,
2367                          rem_time))
2368
2369     # if we're done but degraded, let's do a few small retries, to
2370     # make sure we see a stable and not transient situation; therefore
2371     # we force restart of the loop
2372     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2373       logging.info("Degraded disks found, %d retries left", degr_retries)
2374       degr_retries -= 1
2375       time.sleep(1)
2376       continue
2377
2378     if done or oneshot:
2379       break
2380
2381     time.sleep(min(60, max_time))
2382
2383   if done:
2384     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2385   return not cumul_degraded
2386
2387
2388 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2389   """Check that mirrors are not degraded.
2390
2391   The ldisk parameter, if True, will change the test from the
2392   is_degraded attribute (which represents overall non-ok status for
2393   the device(s)) to the ldisk (representing the local storage status).
2394
2395   """
2396   lu.cfg.SetDiskID(dev, node)
2397
2398   result = True
2399
2400   if on_primary or dev.AssembleOnSecondary():
2401     rstats = lu.rpc.call_blockdev_find(node, dev)
2402     msg = rstats.fail_msg
2403     if msg:
2404       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2405       result = False
2406     elif not rstats.payload:
2407       lu.LogWarning("Can't find disk on node %s", node)
2408       result = False
2409     else:
2410       if ldisk:
2411         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2412       else:
2413         result = result and not rstats.payload.is_degraded
2414
2415   if dev.children:
2416     for child in dev.children:
2417       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2418
2419   return result
2420
2421
2422 class LUDiagnoseOS(NoHooksLU):
2423   """Logical unit for OS diagnose/query.
2424
2425   """
2426   _OP_REQP = ["output_fields", "names"]
2427   REQ_BGL = False
2428   _FIELDS_STATIC = utils.FieldSet()
2429   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2430   # Fields that need calculation of global os validity
2431   _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2432
2433   def ExpandNames(self):
2434     if self.op.names:
2435       raise errors.OpPrereqError("Selective OS query not supported",
2436                                  errors.ECODE_INVAL)
2437
2438     _CheckOutputFields(static=self._FIELDS_STATIC,
2439                        dynamic=self._FIELDS_DYNAMIC,
2440                        selected=self.op.output_fields)
2441
2442     # Lock all nodes, in shared mode
2443     # Temporary removal of locks, should be reverted later
2444     # TODO: reintroduce locks when they are lighter-weight
2445     self.needed_locks = {}
2446     #self.share_locks[locking.LEVEL_NODE] = 1
2447     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2448
2449   def CheckPrereq(self):
2450     """Check prerequisites.
2451
2452     """
2453
2454   @staticmethod
2455   def _DiagnoseByOS(rlist):
2456     """Remaps a per-node return list into an a per-os per-node dictionary
2457
2458     @param rlist: a map with node names as keys and OS objects as values
2459
2460     @rtype: dict
2461     @return: a dictionary with osnames as keys and as value another map, with
2462         nodes as keys and tuples of (path, status, diagnose) as values, eg::
2463
2464           {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2465                                      (/srv/..., False, "invalid api")],
2466                            "node2": [(/srv/..., True, "")]}
2467           }
2468
2469     """
2470     all_os = {}
2471     # we build here the list of nodes that didn't fail the RPC (at RPC
2472     # level), so that nodes with a non-responding node daemon don't
2473     # make all OSes invalid
2474     good_nodes = [node_name for node_name in rlist
2475                   if not rlist[node_name].fail_msg]
2476     for node_name, nr in rlist.items():
2477       if nr.fail_msg or not nr.payload:
2478         continue
2479       for name, path, status, diagnose, variants in nr.payload:
2480         if name not in all_os:
2481           # build a list of nodes for this os containing empty lists
2482           # for each node in node_list
2483           all_os[name] = {}
2484           for nname in good_nodes:
2485             all_os[name][nname] = []
2486         all_os[name][node_name].append((path, status, diagnose, variants))
2487     return all_os
2488
2489   def Exec(self, feedback_fn):
2490     """Compute the list of OSes.
2491
2492     """
2493     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2494     node_data = self.rpc.call_os_diagnose(valid_nodes)
2495     pol = self._DiagnoseByOS(node_data)
2496     output = []
2497     calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2498     calc_variants = "variants" in self.op.output_fields
2499
2500     for os_name, os_data in pol.items():
2501       row = []
2502       if calc_valid:
2503         valid = True
2504         variants = None
2505         for osl in os_data.values():
2506           valid = valid and osl and osl[0][1]
2507           if not valid:
2508             variants = None
2509             break
2510           if calc_variants:
2511             node_variants = osl[0][3]
2512             if variants is None:
2513               variants = node_variants
2514             else:
2515               variants = [v for v in variants if v in node_variants]
2516
2517       for field in self.op.output_fields:
2518         if field == "name":
2519           val = os_name
2520         elif field == "valid":
2521           val = valid
2522         elif field == "node_status":
2523           # this is just a copy of the dict
2524           val = {}
2525           for node_name, nos_list in os_data.items():
2526             val[node_name] = nos_list
2527         elif field == "variants":
2528           val =  variants
2529         else:
2530           raise errors.ParameterError(field)
2531         row.append(val)
2532       output.append(row)
2533
2534     return output
2535
2536
2537 class LURemoveNode(LogicalUnit):
2538   """Logical unit for removing a node.
2539
2540   """
2541   HPATH = "node-remove"
2542   HTYPE = constants.HTYPE_NODE
2543   _OP_REQP = ["node_name"]
2544
2545   def BuildHooksEnv(self):
2546     """Build hooks env.
2547
2548     This doesn't run on the target node in the pre phase as a failed
2549     node would then be impossible to remove.
2550
2551     """
2552     env = {
2553       "OP_TARGET": self.op.node_name,
2554       "NODE_NAME": self.op.node_name,
2555       }
2556     all_nodes = self.cfg.GetNodeList()
2557     try:
2558       all_nodes.remove(self.op.node_name)
2559     except ValueError:
2560       logging.warning("Node %s which is about to be removed not found"
2561                       " in the all nodes list", self.op.node_name)
2562     return env, all_nodes, all_nodes
2563
2564   def CheckPrereq(self):
2565     """Check prerequisites.
2566
2567     This checks:
2568      - the node exists in the configuration
2569      - it does not have primary or secondary instances
2570      - it's not the master
2571
2572     Any errors are signaled by raising errors.OpPrereqError.
2573
2574     """
2575     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2576     node = self.cfg.GetNodeInfo(self.op.node_name)
2577     assert node is not None
2578
2579     instance_list = self.cfg.GetInstanceList()
2580
2581     masternode = self.cfg.GetMasterNode()
2582     if node.name == masternode:
2583       raise errors.OpPrereqError("Node is the master node,"
2584                                  " you need to failover first.",
2585                                  errors.ECODE_INVAL)
2586
2587     for instance_name in instance_list:
2588       instance = self.cfg.GetInstanceInfo(instance_name)
2589       if node.name in instance.all_nodes:
2590         raise errors.OpPrereqError("Instance %s is still running on the node,"
2591                                    " please remove first." % instance_name,
2592                                    errors.ECODE_INVAL)
2593     self.op.node_name = node.name
2594     self.node = node
2595
2596   def Exec(self, feedback_fn):
2597     """Removes the node from the cluster.
2598
2599     """
2600     node = self.node
2601     logging.info("Stopping the node daemon and removing configs from node %s",
2602                  node.name)
2603
2604     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2605
2606     # Promote nodes to master candidate as needed
2607     _AdjustCandidatePool(self, exceptions=[node.name])
2608     self.context.RemoveNode(node.name)
2609
2610     # Run post hooks on the node before it's removed
2611     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2612     try:
2613       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2614     except:
2615       # pylint: disable-msg=W0702
2616       self.LogWarning("Errors occurred running hooks on %s" % node.name)
2617
2618     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2619     msg = result.fail_msg
2620     if msg:
2621       self.LogWarning("Errors encountered on the remote node while leaving"
2622                       " the cluster: %s", msg)
2623
2624
2625 class LUQueryNodes(NoHooksLU):
2626   """Logical unit for querying nodes.
2627
2628   """
2629   # pylint: disable-msg=W0142
2630   _OP_REQP = ["output_fields", "names", "use_locking"]
2631   REQ_BGL = False
2632
2633   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2634                     "master_candidate", "offline", "drained"]
2635
2636   _FIELDS_DYNAMIC = utils.FieldSet(
2637     "dtotal", "dfree",
2638     "mtotal", "mnode", "mfree",
2639     "bootid",
2640     "ctotal", "cnodes", "csockets",
2641     )
2642
2643   _FIELDS_STATIC = utils.FieldSet(*[
2644     "pinst_cnt", "sinst_cnt",
2645     "pinst_list", "sinst_list",
2646     "pip", "sip", "tags",
2647     "master",
2648     "role"] + _SIMPLE_FIELDS
2649     )
2650
2651   def ExpandNames(self):
2652     _CheckOutputFields(static=self._FIELDS_STATIC,
2653                        dynamic=self._FIELDS_DYNAMIC,
2654                        selected=self.op.output_fields)
2655
2656     self.needed_locks = {}
2657     self.share_locks[locking.LEVEL_NODE] = 1
2658
2659     if self.op.names:
2660       self.wanted = _GetWantedNodes(self, self.op.names)
2661     else:
2662       self.wanted = locking.ALL_SET
2663
2664     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2665     self.do_locking = self.do_node_query and self.op.use_locking
2666     if self.do_locking:
2667       # if we don't request only static fields, we need to lock the nodes
2668       self.needed_locks[locking.LEVEL_NODE] = self.wanted
2669
2670   def CheckPrereq(self):
2671     """Check prerequisites.
2672
2673     """
2674     # The validation of the node list is done in the _GetWantedNodes,
2675     # if non empty, and if empty, there's no validation to do
2676     pass
2677
2678   def Exec(self, feedback_fn):
2679     """Computes the list of nodes and their attributes.
2680
2681     """
2682     all_info = self.cfg.GetAllNodesInfo()
2683     if self.do_locking:
2684       nodenames = self.acquired_locks[locking.LEVEL_NODE]
2685     elif self.wanted != locking.ALL_SET:
2686       nodenames = self.wanted
2687       missing = set(nodenames).difference(all_info.keys())
2688       if missing:
2689         raise errors.OpExecError(
2690           "Some nodes were removed before retrieving their data: %s" % missing)
2691     else:
2692       nodenames = all_info.keys()
2693
2694     nodenames = utils.NiceSort(nodenames)
2695     nodelist = [all_info[name] for name in nodenames]
2696
2697     # begin data gathering
2698
2699     if self.do_node_query:
2700       live_data = {}
2701       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2702                                           self.cfg.GetHypervisorType())
2703       for name in nodenames:
2704         nodeinfo = node_data[name]
2705         if not nodeinfo.fail_msg and nodeinfo.payload:
2706           nodeinfo = nodeinfo.payload
2707           fn = utils.TryConvert
2708           live_data[name] = {
2709             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2710             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2711             "mfree": fn(int, nodeinfo.get('memory_free', None)),
2712             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2713             "dfree": fn(int, nodeinfo.get('vg_free', None)),
2714             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2715             "bootid": nodeinfo.get('bootid', None),
2716             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2717             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2718             }
2719         else:
2720           live_data[name] = {}
2721     else:
2722       live_data = dict.fromkeys(nodenames, {})
2723
2724     node_to_primary = dict([(name, set()) for name in nodenames])
2725     node_to_secondary = dict([(name, set()) for name in nodenames])
2726
2727     inst_fields = frozenset(("pinst_cnt", "pinst_list",
2728                              "sinst_cnt", "sinst_list"))
2729     if inst_fields & frozenset(self.op.output_fields):
2730       inst_data = self.cfg.GetAllInstancesInfo()
2731
2732       for inst in inst_data.values():
2733         if inst.primary_node in node_to_primary:
2734           node_to_primary[inst.primary_node].add(inst.name)
2735         for secnode in inst.secondary_nodes:
2736           if secnode in node_to_secondary:
2737             node_to_secondary[secnode].add(inst.name)
2738
2739     master_node = self.cfg.GetMasterNode()
2740
2741     # end data gathering
2742
2743     output = []
2744     for node in nodelist:
2745       node_output = []
2746       for field in self.op.output_fields:
2747         if field in self._SIMPLE_FIELDS:
2748           val = getattr(node, field)
2749         elif field == "pinst_list":
2750           val = list(node_to_primary[node.name])
2751         elif field == "sinst_list":
2752           val = list(node_to_secondary[node.name])
2753         elif field == "pinst_cnt":
2754           val = len(node_to_primary[node.name])
2755         elif field == "sinst_cnt":
2756           val = len(node_to_secondary[node.name])
2757         elif field == "pip":
2758           val = node.primary_ip
2759         elif field == "sip":
2760           val = node.secondary_ip
2761         elif field == "tags":
2762           val = list(node.GetTags())
2763         elif field == "master":
2764           val = node.name == master_node
2765         elif self._FIELDS_DYNAMIC.Matches(field):
2766           val = live_data[node.name].get(field, None)
2767         elif field == "role":
2768           if node.name == master_node:
2769             val = "M"
2770           elif node.master_candidate:
2771             val = "C"
2772           elif node.drained:
2773             val = "D"
2774           elif node.offline:
2775             val = "O"
2776           else:
2777             val = "R"
2778         else:
2779           raise errors.ParameterError(field)
2780         node_output.append(val)
2781       output.append(node_output)
2782
2783     return output
2784
2785
2786 class LUQueryNodeVolumes(NoHooksLU):
2787   """Logical unit for getting volumes on node(s).
2788
2789   """
2790   _OP_REQP = ["nodes", "output_fields"]
2791   REQ_BGL = False
2792   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2793   _FIELDS_STATIC = utils.FieldSet("node")
2794
2795   def ExpandNames(self):
2796     _CheckOutputFields(static=self._FIELDS_STATIC,
2797                        dynamic=self._FIELDS_DYNAMIC,
2798                        selected=self.op.output_fields)
2799
2800     self.needed_locks = {}
2801     self.share_locks[locking.LEVEL_NODE] = 1
2802     if not self.op.nodes:
2803       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2804     else:
2805       self.needed_locks[locking.LEVEL_NODE] = \
2806         _GetWantedNodes(self, self.op.nodes)
2807
2808   def CheckPrereq(self):
2809     """Check prerequisites.
2810
2811     This checks that the fields required are valid output fields.
2812
2813     """
2814     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2815
2816   def Exec(self, feedback_fn):
2817     """Computes the list of nodes and their attributes.
2818
2819     """
2820     nodenames = self.nodes
2821     volumes = self.rpc.call_node_volumes(nodenames)
2822
2823     ilist = [self.cfg.GetInstanceInfo(iname) for iname
2824              in self.cfg.GetInstanceList()]
2825
2826     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2827
2828     output = []
2829     for node in nodenames:
2830       nresult = volumes[node]
2831       if nresult.offline:
2832         continue
2833       msg = nresult.fail_msg
2834       if msg:
2835         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2836         continue
2837
2838       node_vols = nresult.payload[:]
2839       node_vols.sort(key=lambda vol: vol['dev'])
2840
2841       for vol in node_vols:
2842         node_output = []
2843         for field in self.op.output_fields:
2844           if field == "node":
2845             val = node
2846           elif field == "phys":
2847             val = vol['dev']
2848           elif field == "vg":
2849             val = vol['vg']
2850           elif field == "name":
2851             val = vol['name']
2852           elif field == "size":
2853             val = int(float(vol['size']))
2854           elif field == "instance":
2855             for inst in ilist:
2856               if node not in lv_by_node[inst]:
2857                 continue
2858               if vol['name'] in lv_by_node[inst][node]:
2859                 val = inst.name
2860                 break
2861             else:
2862               val = '-'
2863           else:
2864             raise errors.ParameterError(field)
2865           node_output.append(str(val))
2866
2867         output.append(node_output)
2868
2869     return output
2870
2871
2872 class LUQueryNodeStorage(NoHooksLU):
2873   """Logical unit for getting information on storage units on node(s).
2874
2875   """
2876   _OP_REQP = ["nodes", "storage_type", "output_fields"]
2877   REQ_BGL = False
2878   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2879
2880   def ExpandNames(self):
2881     storage_type = self.op.storage_type
2882
2883     if storage_type not in constants.VALID_STORAGE_TYPES:
2884       raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2885                                  errors.ECODE_INVAL)
2886
2887     _CheckOutputFields(static=self._FIELDS_STATIC,
2888                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2889                        selected=self.op.output_fields)
2890
2891     self.needed_locks = {}
2892     self.share_locks[locking.LEVEL_NODE] = 1
2893
2894     if self.op.nodes:
2895       self.needed_locks[locking.LEVEL_NODE] = \
2896         _GetWantedNodes(self, self.op.nodes)
2897     else:
2898       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2899
2900   def CheckPrereq(self):
2901     """Check prerequisites.
2902
2903     This checks that the fields required are valid output fields.
2904
2905     """
2906     self.op.name = getattr(self.op, "name", None)
2907
2908     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2909
2910   def Exec(self, feedback_fn):
2911     """Computes the list of nodes and their attributes.
2912
2913     """
2914     # Always get name to sort by
2915     if constants.SF_NAME in self.op.output_fields:
2916       fields = self.op.output_fields[:]
2917     else:
2918       fields = [constants.SF_NAME] + self.op.output_fields
2919
2920     # Never ask for node or type as it's only known to the LU
2921     for extra in [constants.SF_NODE, constants.SF_TYPE]:
2922       while extra in fields:
2923         fields.remove(extra)
2924
2925     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2926     name_idx = field_idx[constants.SF_NAME]
2927
2928     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2929     data = self.rpc.call_storage_list(self.nodes,
2930                                       self.op.storage_type, st_args,
2931                                       self.op.name, fields)
2932
2933     result = []
2934
2935     for node in utils.NiceSort(self.nodes):
2936       nresult = data[node]
2937       if nresult.offline:
2938         continue
2939
2940       msg = nresult.fail_msg
2941       if msg:
2942         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2943         continue
2944
2945       rows = dict([(row[name_idx], row) for row in nresult.payload])
2946
2947       for name in utils.NiceSort(rows.keys()):
2948         row = rows[name]
2949
2950         out = []
2951
2952         for field in self.op.output_fields:
2953           if field == constants.SF_NODE:
2954             val = node
2955           elif field == constants.SF_TYPE:
2956             val = self.op.storage_type
2957           elif field in field_idx:
2958             val = row[field_idx[field]]
2959           else:
2960             raise errors.ParameterError(field)
2961
2962           out.append(val)
2963
2964         result.append(out)
2965
2966     return result
2967
2968
2969 class LUModifyNodeStorage(NoHooksLU):
2970   """Logical unit for modifying a storage volume on a node.
2971
2972   """
2973   _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2974   REQ_BGL = False
2975
2976   def CheckArguments(self):
2977     self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
2978
2979     storage_type = self.op.storage_type
2980     if storage_type not in constants.VALID_STORAGE_TYPES:
2981       raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2982                                  errors.ECODE_INVAL)
2983
2984   def ExpandNames(self):
2985     self.needed_locks = {
2986       locking.LEVEL_NODE: self.op.node_name,
2987       }
2988
2989   def CheckPrereq(self):
2990     """Check prerequisites.
2991
2992     """
2993     storage_type = self.op.storage_type
2994
2995     try:
2996       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2997     except KeyError:
2998       raise errors.OpPrereqError("Storage units of type '%s' can not be"
2999                                  " modified" % storage_type,
3000                                  errors.ECODE_INVAL)
3001
3002     diff = set(self.op.changes.keys()) - modifiable
3003     if diff:
3004       raise errors.OpPrereqError("The following fields can not be modified for"
3005                                  " storage units of type '%s': %r" %
3006                                  (storage_type, list(diff)),
3007                                  errors.ECODE_INVAL)
3008
3009   def Exec(self, feedback_fn):
3010     """Computes the list of nodes and their attributes.
3011
3012     """
3013     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3014     result = self.rpc.call_storage_modify(self.op.node_name,
3015                                           self.op.storage_type, st_args,
3016                                           self.op.name, self.op.changes)
3017     result.Raise("Failed to modify storage unit '%s' on %s" %
3018                  (self.op.name, self.op.node_name))
3019
3020
3021 class LUAddNode(LogicalUnit):
3022   """Logical unit for adding node to the cluster.
3023
3024   """
3025   HPATH = "node-add"
3026   HTYPE = constants.HTYPE_NODE
3027   _OP_REQP = ["node_name"]
3028
3029   def CheckArguments(self):
3030     # validate/normalize the node name
3031     self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3032
3033   def BuildHooksEnv(self):
3034     """Build hooks env.
3035
3036     This will run on all nodes before, and on all nodes + the new node after.
3037
3038     """
3039     env = {
3040       "OP_TARGET": self.op.node_name,
3041       "NODE_NAME": self.op.node_name,
3042       "NODE_PIP": self.op.primary_ip,
3043       "NODE_SIP": self.op.secondary_ip,
3044       }
3045     nodes_0 = self.cfg.GetNodeList()
3046     nodes_1 = nodes_0 + [self.op.node_name, ]
3047     return env, nodes_0, nodes_1
3048
3049   def CheckPrereq(self):
3050     """Check prerequisites.
3051
3052     This checks:
3053      - the new node is not already in the config
3054      - it is resolvable
3055      - its parameters (single/dual homed) matches the cluster
3056
3057     Any errors are signaled by raising errors.OpPrereqError.
3058
3059     """
3060     node_name = self.op.node_name
3061     cfg = self.cfg
3062
3063     dns_data = utils.GetHostInfo(node_name)
3064
3065     node = dns_data.name
3066     primary_ip = self.op.primary_ip = dns_data.ip
3067     secondary_ip = getattr(self.op, "secondary_ip", None)
3068     if secondary_ip is None:
3069       secondary_ip = primary_ip
3070     if not utils.IsValidIP(secondary_ip):
3071       raise errors.OpPrereqError("Invalid secondary IP given",
3072                                  errors.ECODE_INVAL)
3073     self.op.secondary_ip = secondary_ip
3074
3075     node_list = cfg.GetNodeList()
3076     if not self.op.readd and node in node_list:
3077       raise errors.OpPrereqError("Node %s is already in the configuration" %
3078                                  node, errors.ECODE_EXISTS)
3079     elif self.op.readd and node not in node_list:
3080       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3081                                  errors.ECODE_NOENT)
3082
3083     for existing_node_name in node_list:
3084       existing_node = cfg.GetNodeInfo(existing_node_name)
3085
3086       if self.op.readd and node == existing_node_name:
3087         if (existing_node.primary_ip != primary_ip or
3088             existing_node.secondary_ip != secondary_ip):
3089           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3090                                      " address configuration as before",
3091                                      errors.ECODE_INVAL)
3092         continue
3093
3094       if (existing_node.primary_ip == primary_ip or
3095           existing_node.secondary_ip == primary_ip or
3096           existing_node.primary_ip == secondary_ip or
3097           existing_node.secondary_ip == secondary_ip):
3098         raise errors.OpPrereqError("New node ip address(es) conflict with"
3099                                    " existing node %s" % existing_node.name,
3100                                    errors.ECODE_NOTUNIQUE)
3101
3102     # check that the type of the node (single versus dual homed) is the
3103     # same as for the master
3104     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3105     master_singlehomed = myself.secondary_ip == myself.primary_ip
3106     newbie_singlehomed = secondary_ip == primary_ip
3107     if master_singlehomed != newbie_singlehomed:
3108       if master_singlehomed:
3109         raise errors.OpPrereqError("The master has no private ip but the"
3110                                    " new node has one",
3111                                    errors.ECODE_INVAL)
3112       else:
3113         raise errors.OpPrereqError("The master has a private ip but the"
3114                                    " new node doesn't have one",
3115                                    errors.ECODE_INVAL)
3116
3117     # checks reachability
3118     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3119       raise errors.OpPrereqError("Node not reachable by ping",
3120                                  errors.ECODE_ENVIRON)
3121
3122     if not newbie_singlehomed:
3123       # check reachability from my secondary ip to newbie's secondary ip
3124       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3125                            source=myself.secondary_ip):
3126         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3127                                    " based ping to noded port",
3128                                    errors.ECODE_ENVIRON)
3129
3130     if self.op.readd:
3131       exceptions = [node]
3132     else:
3133       exceptions = []
3134
3135     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3136
3137     if self.op.readd:
3138       self.new_node = self.cfg.GetNodeInfo(node)
3139       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3140     else:
3141       self.new_node = objects.Node(name=node,
3142                                    primary_ip=primary_ip,
3143                                    secondary_ip=secondary_ip,
3144                                    master_candidate=self.master_candidate,
3145                                    offline=False, drained=False)
3146
3147   def Exec(self, feedback_fn):
3148     """Adds the new node to the cluster.
3149
3150     """
3151     new_node = self.new_node
3152     node = new_node.name
3153
3154     # for re-adds, reset the offline/drained/master-candidate flags;
3155     # we need to reset here, otherwise offline would prevent RPC calls
3156     # later in the procedure; this also means that if the re-add
3157     # fails, we are left with a non-offlined, broken node
3158     if self.op.readd:
3159       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3160       self.LogInfo("Readding a node, the offline/drained flags were reset")
3161       # if we demote the node, we do cleanup later in the procedure
3162       new_node.master_candidate = self.master_candidate
3163
3164     # notify the user about any possible mc promotion
3165     if new_node.master_candidate:
3166       self.LogInfo("Node will be a master candidate")
3167
3168     # check connectivity
3169     result = self.rpc.call_version([node])[node]
3170     result.Raise("Can't get version information from node %s" % node)
3171     if constants.PROTOCOL_VERSION == result.payload:
3172       logging.info("Communication to node %s fine, sw version %s match",
3173                    node, result.payload)
3174     else:
3175       raise errors.OpExecError("Version mismatch master version %s,"
3176                                " node version %s" %
3177                                (constants.PROTOCOL_VERSION, result.payload))
3178
3179     # setup ssh on node
3180     if self.cfg.GetClusterInfo().modify_ssh_setup:
3181       logging.info("Copy ssh key to node %s", node)
3182       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3183       keyarray = []
3184       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3185                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3186                   priv_key, pub_key]
3187
3188       for i in keyfiles:
3189         keyarray.append(utils.ReadFile(i))
3190
3191       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3192                                       keyarray[2], keyarray[3], keyarray[4],
3193                                       keyarray[5])
3194       result.Raise("Cannot transfer ssh keys to the new node")
3195
3196     # Add node to our /etc/hosts, and add key to known_hosts
3197     if self.cfg.GetClusterInfo().modify_etc_hosts:
3198       utils.AddHostToEtcHosts(new_node.name)
3199
3200     if new_node.secondary_ip != new_node.primary_ip:
3201       result = self.rpc.call_node_has_ip_address(new_node.name,
3202                                                  new_node.secondary_ip)
3203       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3204                    prereq=True, ecode=errors.ECODE_ENVIRON)
3205       if not result.payload:
3206         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3207                                  " you gave (%s). Please fix and re-run this"
3208                                  " command." % new_node.secondary_ip)
3209
3210     node_verify_list = [self.cfg.GetMasterNode()]
3211     node_verify_param = {
3212       constants.NV_NODELIST: [node],
3213       # TODO: do a node-net-test as well?
3214     }
3215
3216     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3217                                        self.cfg.GetClusterName())
3218     for verifier in node_verify_list:
3219       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3220       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3221       if nl_payload:
3222         for failed in nl_payload:
3223           feedback_fn("ssh/hostname verification failed"
3224                       " (checking from %s): %s" %
3225                       (verifier, nl_payload[failed]))
3226         raise errors.OpExecError("ssh/hostname verification failed.")
3227
3228     if self.op.readd:
3229       _RedistributeAncillaryFiles(self)
3230       self.context.ReaddNode(new_node)
3231       # make sure we redistribute the config
3232       self.cfg.Update(new_node, feedback_fn)
3233       # and make sure the new node will not have old files around
3234       if not new_node.master_candidate:
3235         result = self.rpc.call_node_demote_from_mc(new_node.name)
3236         msg = result.fail_msg
3237         if msg:
3238           self.LogWarning("Node failed to demote itself from master"
3239                           " candidate status: %s" % msg)
3240     else:
3241       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3242       self.context.AddNode(new_node, self.proc.GetECId())
3243
3244
3245 class LUSetNodeParams(LogicalUnit):
3246   """Modifies the parameters of a node.
3247
3248   """
3249   HPATH = "node-modify"
3250   HTYPE = constants.HTYPE_NODE
3251   _OP_REQP = ["node_name"]
3252   REQ_BGL = False
3253
3254   def CheckArguments(self):
3255     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3256     _CheckBooleanOpField(self.op, 'master_candidate')
3257     _CheckBooleanOpField(self.op, 'offline')
3258     _CheckBooleanOpField(self.op, 'drained')
3259     _CheckBooleanOpField(self.op, 'auto_promote')
3260     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3261     if all_mods.count(None) == 3:
3262       raise errors.OpPrereqError("Please pass at least one modification",
3263                                  errors.ECODE_INVAL)
3264     if all_mods.count(True) > 1:
3265       raise errors.OpPrereqError("Can't set the node into more than one"
3266                                  " state at the same time",
3267                                  errors.ECODE_INVAL)
3268
3269     # Boolean value that tells us whether we're offlining or draining the node
3270     self.offline_or_drain = (self.op.offline == True or
3271                              self.op.drained == True)
3272     self.deoffline_or_drain = (self.op.offline == False or
3273                                self.op.drained == False)
3274     self.might_demote = (self.op.master_candidate == False or
3275                          self.offline_or_drain)
3276
3277     self.lock_all = self.op.auto_promote and self.might_demote
3278
3279
3280   def ExpandNames(self):
3281     if self.lock_all:
3282       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3283     else:
3284       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3285
3286   def BuildHooksEnv(self):
3287     """Build hooks env.
3288
3289     This runs on the master node.
3290
3291     """
3292     env = {
3293       "OP_TARGET": self.op.node_name,
3294       "MASTER_CANDIDATE": str(self.op.master_candidate),
3295       "OFFLINE": str(self.op.offline),
3296       "DRAINED": str(self.op.drained),
3297       }
3298     nl = [self.cfg.GetMasterNode(),
3299           self.op.node_name]
3300     return env, nl, nl
3301
3302   def CheckPrereq(self):
3303     """Check prerequisites.
3304
3305     This only checks the instance list against the existing names.
3306
3307     """
3308     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3309
3310     if (self.op.master_candidate is not None or
3311         self.op.drained is not None or
3312         self.op.offline is not None):
3313       # we can't change the master's node flags
3314       if self.op.node_name == self.cfg.GetMasterNode():
3315         raise errors.OpPrereqError("The master role can be changed"
3316                                    " only via masterfailover",
3317                                    errors.ECODE_INVAL)
3318
3319
3320     if node.master_candidate and self.might_demote and not self.lock_all:
3321       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3322       # check if after removing the current node, we're missing master
3323       # candidates
3324       (mc_remaining, mc_should, _) = \
3325           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3326       if mc_remaining != mc_should:
3327         raise errors.OpPrereqError("Not enough master candidates, please"
3328                                    " pass auto_promote to allow promotion",
3329                                    errors.ECODE_INVAL)
3330
3331     if (self.op.master_candidate == True and
3332         ((node.offline and not self.op.offline == False) or
3333          (node.drained and not self.op.drained == False))):
3334       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3335                                  " to master_candidate" % node.name,
3336                                  errors.ECODE_INVAL)
3337
3338     # If we're being deofflined/drained, we'll MC ourself if needed
3339     if (self.deoffline_or_drain and not self.offline_or_drain and not
3340         self.op.master_candidate == True and not node.master_candidate):
3341       self.op.master_candidate = _DecideSelfPromotion(self)
3342       if self.op.master_candidate:
3343         self.LogInfo("Autopromoting node to master candidate")
3344
3345     return
3346
3347   def Exec(self, feedback_fn):
3348     """Modifies a node.
3349
3350     """
3351     node = self.node
3352
3353     result = []
3354     changed_mc = False
3355
3356     if self.op.offline is not None:
3357       node.offline = self.op.offline
3358       result.append(("offline", str(self.op.offline)))
3359       if self.op.offline == True:
3360         if node.master_candidate:
3361           node.master_candidate = False
3362           changed_mc = True
3363           result.append(("master_candidate", "auto-demotion due to offline"))
3364         if node.drained:
3365           node.drained = False
3366           result.append(("drained", "clear drained status due to offline"))
3367
3368     if self.op.master_candidate is not None:
3369       node.master_candidate = self.op.master_candidate
3370       changed_mc = True
3371       result.append(("master_candidate", str(self.op.master_candidate)))
3372       if self.op.master_candidate == False:
3373         rrc = self.rpc.call_node_demote_from_mc(node.name)
3374         msg = rrc.fail_msg
3375         if msg:
3376           self.LogWarning("Node failed to demote itself: %s" % msg)
3377
3378     if self.op.drained is not None:
3379       node.drained = self.op.drained
3380       result.append(("drained", str(self.op.drained)))
3381       if self.op.drained == True:
3382         if node.master_candidate:
3383           node.master_candidate = False
3384           changed_mc = True
3385           result.append(("master_candidate", "auto-demotion due to drain"))
3386           rrc = self.rpc.call_node_demote_from_mc(node.name)
3387           msg = rrc.fail_msg
3388           if msg:
3389             self.LogWarning("Node failed to demote itself: %s" % msg)
3390         if node.offline:
3391           node.offline = False
3392           result.append(("offline", "clear offline status due to drain"))
3393
3394     # we locked all nodes, we adjust the CP before updating this node
3395     if self.lock_all:
3396       _AdjustCandidatePool(self, [node.name])
3397
3398     # this will trigger configuration file update, if needed
3399     self.cfg.Update(node, feedback_fn)
3400
3401     # this will trigger job queue propagation or cleanup
3402     if changed_mc:
3403       self.context.ReaddNode(node)
3404
3405     return result
3406
3407
3408 class LUPowercycleNode(NoHooksLU):
3409   """Powercycles a node.
3410
3411   """
3412   _OP_REQP = ["node_name", "force"]
3413   REQ_BGL = False
3414
3415   def CheckArguments(self):
3416     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3417     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3418       raise errors.OpPrereqError("The node is the master and the force"
3419                                  " parameter was not set",
3420                                  errors.ECODE_INVAL)
3421
3422   def ExpandNames(self):
3423     """Locking for PowercycleNode.
3424
3425     This is a last-resort option and shouldn't block on other
3426     jobs. Therefore, we grab no locks.
3427
3428     """
3429     self.needed_locks = {}
3430
3431   def CheckPrereq(self):
3432     """Check prerequisites.
3433
3434     This LU has no prereqs.
3435
3436     """
3437     pass
3438
3439   def Exec(self, feedback_fn):
3440     """Reboots a node.
3441
3442     """
3443     result = self.rpc.call_node_powercycle(self.op.node_name,
3444                                            self.cfg.GetHypervisorType())
3445     result.Raise("Failed to schedule the reboot")
3446     return result.payload
3447
3448
3449 class LUQueryClusterInfo(NoHooksLU):
3450   """Query cluster configuration.
3451
3452   """
3453   _OP_REQP = []
3454   REQ_BGL = False
3455
3456   def ExpandNames(self):
3457     self.needed_locks = {}
3458
3459   def CheckPrereq(self):
3460     """No prerequsites needed for this LU.
3461
3462     """
3463     pass
3464
3465   def Exec(self, feedback_fn):
3466     """Return cluster config.
3467
3468     """
3469     cluster = self.cfg.GetClusterInfo()
3470     os_hvp = {}
3471
3472     # Filter just for enabled hypervisors
3473     for os_name, hv_dict in cluster.os_hvp.items():
3474       os_hvp[os_name] = {}
3475       for hv_name, hv_params in hv_dict.items():
3476         if hv_name in cluster.enabled_hypervisors:
3477           os_hvp[os_name][hv_name] = hv_params
3478
3479     result = {
3480       "software_version": constants.RELEASE_VERSION,
3481       "protocol_version": constants.PROTOCOL_VERSION,
3482       "config_version": constants.CONFIG_VERSION,
3483       "os_api_version": max(constants.OS_API_VERSIONS),
3484       "export_version": constants.EXPORT_VERSION,
3485       "architecture": (platform.architecture()[0], platform.machine()),
3486       "name": cluster.cluster_name,
3487       "master": cluster.master_node,
3488       "default_hypervisor": cluster.enabled_hypervisors[0],
3489       "enabled_hypervisors": cluster.enabled_hypervisors,
3490       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3491                         for hypervisor_name in cluster.enabled_hypervisors]),
3492       "os_hvp": os_hvp,
3493       "beparams": cluster.beparams,
3494       "nicparams": cluster.nicparams,
3495       "candidate_pool_size": cluster.candidate_pool_size,
3496       "master_netdev": cluster.master_netdev,
3497       "volume_group_name": cluster.volume_group_name,
3498       "file_storage_dir": cluster.file_storage_dir,
3499       "ctime": cluster.ctime,
3500       "mtime": cluster.mtime,
3501       "uuid": cluster.uuid,
3502       "tags": list(cluster.GetTags()),
3503       }
3504
3505     return result
3506
3507
3508 class LUQueryConfigValues(NoHooksLU):
3509   """Return configuration values.
3510
3511   """
3512   _OP_REQP = []
3513   REQ_BGL = False
3514   _FIELDS_DYNAMIC = utils.FieldSet()
3515   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3516                                   "watcher_pause")
3517
3518   def ExpandNames(self):
3519     self.needed_locks = {}
3520
3521     _CheckOutputFields(static=self._FIELDS_STATIC,
3522                        dynamic=self._FIELDS_DYNAMIC,
3523                        selected=self.op.output_fields)
3524
3525   def CheckPrereq(self):
3526     """No prerequisites.
3527
3528     """
3529     pass
3530
3531   def Exec(self, feedback_fn):
3532     """Dump a representation of the cluster config to the standard output.
3533
3534     """
3535     values = []
3536     for field in self.op.output_fields:
3537       if field == "cluster_name":
3538         entry = self.cfg.GetClusterName()
3539       elif field == "master_node":
3540         entry = self.cfg.GetMasterNode()
3541       elif field == "drain_flag":
3542         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3543       elif field == "watcher_pause":
3544         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3545       else:
3546         raise errors.ParameterError(field)
3547       values.append(entry)
3548     return values
3549
3550
3551 class LUActivateInstanceDisks(NoHooksLU):
3552   """Bring up an instance's disks.
3553
3554   """
3555   _OP_REQP = ["instance_name"]
3556   REQ_BGL = False
3557
3558   def ExpandNames(self):
3559     self._ExpandAndLockInstance()
3560     self.needed_locks[locking.LEVEL_NODE] = []
3561     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3562
3563   def DeclareLocks(self, level):
3564     if level == locking.LEVEL_NODE:
3565       self._LockInstancesNodes()
3566
3567   def CheckPrereq(self):
3568     """Check prerequisites.
3569
3570     This checks that the instance is in the cluster.
3571
3572     """
3573     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3574     assert self.instance is not None, \
3575       "Cannot retrieve locked instance %s" % self.op.instance_name
3576     _CheckNodeOnline(self, self.instance.primary_node)
3577     if not hasattr(self.op, "ignore_size"):
3578       self.op.ignore_size = False
3579
3580   def Exec(self, feedback_fn):
3581     """Activate the disks.
3582
3583     """
3584     disks_ok, disks_info = \
3585               _AssembleInstanceDisks(self, self.instance,
3586                                      ignore_size=self.op.ignore_size)
3587     if not disks_ok:
3588       raise errors.OpExecError("Cannot activate block devices")
3589
3590     return disks_info
3591
3592
3593 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3594                            ignore_size=False):
3595   """Prepare the block devices for an instance.
3596
3597   This sets up the block devices on all nodes.
3598
3599   @type lu: L{LogicalUnit}
3600   @param lu: the logical unit on whose behalf we execute
3601   @type instance: L{objects.Instance}
3602   @param instance: the instance for whose disks we assemble
3603   @type ignore_secondaries: boolean
3604   @param ignore_secondaries: if true, errors on secondary nodes
3605       won't result in an error return from the function
3606   @type ignore_size: boolean
3607   @param ignore_size: if true, the current known size of the disk
3608       will not be used during the disk activation, useful for cases
3609       when the size is wrong
3610   @return: False if the operation failed, otherwise a list of
3611       (host, instance_visible_name, node_visible_name)
3612       with the mapping from node devices to instance devices
3613
3614   """
3615   device_info = []
3616   disks_ok = True
3617   iname = instance.name
3618   # With the two passes mechanism we try to reduce the window of
3619   # opportunity for the race condition of switching DRBD to primary
3620   # before handshaking occured, but we do not eliminate it
3621
3622   # The proper fix would be to wait (with some limits) until the
3623   # connection has been made and drbd transitions from WFConnection
3624   # into any other network-connected state (Connected, SyncTarget,
3625   # SyncSource, etc.)
3626
3627   # 1st pass, assemble on all nodes in secondary mode
3628   for inst_disk in instance.disks:
3629     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3630       if ignore_size:
3631         node_disk = node_disk.Copy()
3632         node_disk.UnsetSize()
3633       lu.cfg.SetDiskID(node_disk, node)
3634       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3635       msg = result.fail_msg
3636       if msg:
3637         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3638                            " (is_primary=False, pass=1): %s",
3639                            inst_disk.iv_name, node, msg)
3640         if not ignore_secondaries:
3641           disks_ok = False
3642
3643   # FIXME: race condition on drbd migration to primary
3644
3645   # 2nd pass, do only the primary node
3646   for inst_disk in instance.disks:
3647     dev_path = None
3648
3649     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3650       if node != instance.primary_node:
3651         continue
3652       if ignore_size:
3653         node_disk = node_disk.Copy()
3654         node_disk.UnsetSize()
3655       lu.cfg.SetDiskID(node_disk, node)
3656       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3657       msg = result.fail_msg
3658       if msg:
3659         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3660                            " (is_primary=True, pass=2): %s",
3661                            inst_disk.iv_name, node, msg)
3662         disks_ok = False
3663       else:
3664         dev_path = result.payload
3665
3666     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3667
3668   # leave the disks configured for the primary node
3669   # this is a workaround that would be fixed better by
3670   # improving the logical/physical id handling
3671   for disk in instance.disks:
3672     lu.cfg.SetDiskID(disk, instance.primary_node)
3673
3674   return disks_ok, device_info
3675
3676
3677 def _StartInstanceDisks(lu, instance, force):
3678   """Start the disks of an instance.
3679
3680   """
3681   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3682                                            ignore_secondaries=force)
3683   if not disks_ok:
3684     _ShutdownInstanceDisks(lu, instance)
3685     if force is not None and not force:
3686       lu.proc.LogWarning("", hint="If the message above refers to a"
3687                          " secondary node,"
3688                          " you can retry the operation using '--force'.")
3689     raise errors.OpExecError("Disk consistency error")
3690
3691
3692 class LUDeactivateInstanceDisks(NoHooksLU):
3693   """Shutdown an instance's disks.
3694
3695   """
3696   _OP_REQP = ["instance_name"]
3697   REQ_BGL = False
3698
3699   def ExpandNames(self):
3700     self._ExpandAndLockInstance()
3701     self.needed_locks[locking.LEVEL_NODE] = []
3702     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3703
3704   def DeclareLocks(self, level):
3705     if level == locking.LEVEL_NODE:
3706       self._LockInstancesNodes()
3707
3708   def CheckPrereq(self):
3709     """Check prerequisites.
3710
3711     This checks that the instance is in the cluster.
3712
3713     """
3714     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3715     assert self.instance is not None, \
3716       "Cannot retrieve locked instance %s" % self.op.instance_name
3717
3718   def Exec(self, feedback_fn):
3719     """Deactivate the disks
3720
3721     """
3722     instance = self.instance
3723     _SafeShutdownInstanceDisks(self, instance)
3724
3725
3726 def _SafeShutdownInstanceDisks(lu, instance):
3727   """Shutdown block devices of an instance.
3728
3729   This function checks if an instance is running, before calling
3730   _ShutdownInstanceDisks.
3731
3732   """
3733   pnode = instance.primary_node
3734   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3735   ins_l.Raise("Can't contact node %s" % pnode)
3736
3737   if instance.name in ins_l.payload:
3738     raise errors.OpExecError("Instance is running, can't shutdown"
3739                              " block devices.")
3740
3741   _ShutdownInstanceDisks(lu, instance)
3742
3743
3744 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3745   """Shutdown block devices of an instance.
3746
3747   This does the shutdown on all nodes of the instance.
3748
3749   If the ignore_primary is false, errors on the primary node are
3750   ignored.
3751
3752   """
3753   all_result = True
3754   for disk in instance.disks:
3755     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3756       lu.cfg.SetDiskID(top_disk, node)
3757       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3758       msg = result.fail_msg
3759       if msg:
3760         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3761                       disk.iv_name, node, msg)
3762         if not ignore_primary or node != instance.primary_node:
3763           all_result = False
3764   return all_result
3765
3766
3767 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3768   """Checks if a node has enough free memory.
3769
3770   This function check if a given node has the needed amount of free
3771   memory. In case the node has less memory or we cannot get the
3772   information from the node, this function raise an OpPrereqError
3773   exception.
3774
3775   @type lu: C{LogicalUnit}
3776   @param lu: a logical unit from which we get configuration data
3777   @type node: C{str}
3778   @param node: the node to check
3779   @type reason: C{str}
3780   @param reason: string to use in the error message
3781   @type requested: C{int}
3782   @param requested: the amount of memory in MiB to check for
3783   @type hypervisor_name: C{str}
3784   @param hypervisor_name: the hypervisor to ask for memory stats
3785   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3786       we cannot check the node
3787
3788   """
3789   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3790   nodeinfo[node].Raise("Can't get data from node %s" % node,
3791                        prereq=True, ecode=errors.ECODE_ENVIRON)
3792   free_mem = nodeinfo[node].payload.get('memory_free', None)
3793   if not isinstance(free_mem, int):
3794     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3795                                " was '%s'" % (node, free_mem),
3796                                errors.ECODE_ENVIRON)
3797   if requested > free_mem:
3798     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3799                                " needed %s MiB, available %s MiB" %
3800                                (node, reason, requested, free_mem),
3801                                errors.ECODE_NORES)
3802
3803
3804 def _CheckNodesFreeDisk(lu, nodenames, requested):
3805   """Checks if nodes have enough free disk space in the default VG.
3806
3807   This function check if all given nodes have the needed amount of
3808   free disk. In case any node has less disk or we cannot get the
3809   information from the node, this function raise an OpPrereqError
3810   exception.
3811
3812   @type lu: C{LogicalUnit}
3813   @param lu: a logical unit from which we get configuration data
3814   @type nodenames: C{list}
3815   @param node: the list of node names to check
3816   @type requested: C{int}
3817   @param requested: the amount of disk in MiB to check for
3818   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
3819       we cannot check the node
3820
3821   """
3822   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3823                                    lu.cfg.GetHypervisorType())
3824   for node in nodenames:
3825     info = nodeinfo[node]
3826     info.Raise("Cannot get current information from node %s" % node,
3827                prereq=True, ecode=errors.ECODE_ENVIRON)
3828     vg_free = info.payload.get("vg_free", None)
3829     if not isinstance(vg_free, int):
3830       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
3831                                  " result was '%s'" % (node, vg_free),
3832                                  errors.ECODE_ENVIRON)
3833     if requested > vg_free:
3834       raise errors.OpPrereqError("Not enough disk space on target node %s:"
3835                                  " required %d MiB, available %d MiB" %
3836                                  (node, requested, vg_free),
3837                                  errors.ECODE_NORES)
3838
3839
3840 class LUStartupInstance(LogicalUnit):
3841   """Starts an instance.
3842
3843   """
3844   HPATH = "instance-start"
3845   HTYPE = constants.HTYPE_INSTANCE
3846   _OP_REQP = ["instance_name", "force"]
3847   REQ_BGL = False
3848
3849   def ExpandNames(self):
3850     self._ExpandAndLockInstance()
3851
3852   def BuildHooksEnv(self):
3853     """Build hooks env.
3854
3855     This runs on master, primary and secondary nodes of the instance.
3856
3857     """
3858     env = {
3859       "FORCE": self.op.force,
3860       }
3861     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3862     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3863     return env, nl, nl
3864
3865   def CheckPrereq(self):
3866     """Check prerequisites.
3867
3868     This checks that the instance is in the cluster.
3869
3870     """
3871     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3872     assert self.instance is not None, \
3873       "Cannot retrieve locked instance %s" % self.op.instance_name
3874
3875     # extra beparams
3876     self.beparams = getattr(self.op, "beparams", {})
3877     if self.beparams:
3878       if not isinstance(self.beparams, dict):
3879         raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3880                                    " dict" % (type(self.beparams), ),
3881                                    errors.ECODE_INVAL)
3882       # fill the beparams dict
3883       utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3884       self.op.beparams = self.beparams
3885
3886     # extra hvparams
3887     self.hvparams = getattr(self.op, "hvparams", {})
3888     if self.hvparams:
3889       if not isinstance(self.hvparams, dict):
3890         raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3891                                    " dict" % (type(self.hvparams), ),
3892                                    errors.ECODE_INVAL)
3893
3894       # check hypervisor parameter syntax (locally)
3895       cluster = self.cfg.GetClusterInfo()
3896       utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3897       filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3898                                     instance.hvparams)
3899       filled_hvp.update(self.hvparams)
3900       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3901       hv_type.CheckParameterSyntax(filled_hvp)
3902       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3903       self.op.hvparams = self.hvparams
3904
3905     _CheckNodeOnline(self, instance.primary_node)
3906
3907     bep = self.cfg.GetClusterInfo().FillBE(instance)
3908     # check bridges existence
3909     _CheckInstanceBridgesExist(self, instance)
3910
3911     remote_info = self.rpc.call_instance_info(instance.primary_node,
3912                                               instance.name,
3913                                               instance.hypervisor)
3914     remote_info.Raise("Error checking node %s" % instance.primary_node,
3915                       prereq=True, ecode=errors.ECODE_ENVIRON)
3916     if not remote_info.payload: # not running already
3917       _CheckNodeFreeMemory(self, instance.primary_node,
3918                            "starting instance %s" % instance.name,
3919                            bep[constants.BE_MEMORY], instance.hypervisor)
3920
3921   def Exec(self, feedback_fn):
3922     """Start the instance.
3923
3924     """
3925     instance = self.instance
3926     force = self.op.force
3927
3928     self.cfg.MarkInstanceUp(instance.name)
3929
3930     node_current = instance.primary_node
3931
3932     _StartInstanceDisks(self, instance, force)
3933
3934     result = self.rpc.call_instance_start(node_current, instance,
3935                                           self.hvparams, self.beparams)
3936     msg = result.fail_msg
3937     if msg:
3938       _ShutdownInstanceDisks(self, instance)
3939       raise errors.OpExecError("Could not start instance: %s" % msg)
3940
3941
3942 class LURebootInstance(LogicalUnit):
3943   """Reboot an instance.
3944
3945   """
3946   HPATH = "instance-reboot"
3947   HTYPE = constants.HTYPE_INSTANCE
3948   _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3949   REQ_BGL = False
3950
3951   def CheckArguments(self):
3952     """Check the arguments.
3953
3954     """
3955     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3956                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
3957
3958   def ExpandNames(self):
3959     if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3960                                    constants.INSTANCE_REBOOT_HARD,
3961                                    constants.INSTANCE_REBOOT_FULL]:
3962       raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3963                                   (constants.INSTANCE_REBOOT_SOFT,
3964                                    constants.INSTANCE_REBOOT_HARD,
3965                                    constants.INSTANCE_REBOOT_FULL))
3966     self._ExpandAndLockInstance()
3967
3968   def BuildHooksEnv(self):
3969     """Build hooks env.
3970
3971     This runs on master, primary and secondary nodes of the instance.
3972
3973     """
3974     env = {
3975       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3976       "REBOOT_TYPE": self.op.reboot_type,
3977       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3978       }
3979     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3980     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3981     return env, nl, nl
3982
3983   def CheckPrereq(self):
3984     """Check prerequisites.
3985
3986     This checks that the instance is in the cluster.
3987
3988     """
3989     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3990     assert self.instance is not None, \
3991       "Cannot retrieve locked instance %s" % self.op.instance_name
3992
3993     _CheckNodeOnline(self, instance.primary_node)
3994
3995     # check bridges existence
3996     _CheckInstanceBridgesExist(self, instance)
3997
3998   def Exec(self, feedback_fn):
3999     """Reboot the instance.
4000
4001     """
4002     instance = self.instance
4003     ignore_secondaries = self.op.ignore_secondaries
4004     reboot_type = self.op.reboot_type
4005
4006     node_current = instance.primary_node
4007
4008     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4009                        constants.INSTANCE_REBOOT_HARD]:
4010       for disk in instance.disks:
4011         self.cfg.SetDiskID(disk, node_current)
4012       result = self.rpc.call_instance_reboot(node_current, instance,
4013                                              reboot_type,
4014                                              self.shutdown_timeout)
4015       result.Raise("Could not reboot instance")
4016     else:
4017       result = self.rpc.call_instance_shutdown(node_current, instance,
4018                                                self.shutdown_timeout)
4019       result.Raise("Could not shutdown instance for full reboot")
4020       _ShutdownInstanceDisks(self, instance)
4021       _StartInstanceDisks(self, instance, ignore_secondaries)
4022       result = self.rpc.call_instance_start(node_current, instance, None, None)
4023       msg = result.fail_msg
4024       if msg:
4025         _ShutdownInstanceDisks(self, instance)
4026         raise errors.OpExecError("Could not start instance for"
4027                                  " full reboot: %s" % msg)
4028
4029     self.cfg.MarkInstanceUp(instance.name)
4030
4031
4032 class LUShutdownInstance(LogicalUnit):
4033   """Shutdown an instance.
4034
4035   """
4036   HPATH = "instance-stop"
4037   HTYPE = constants.HTYPE_INSTANCE
4038   _OP_REQP = ["instance_name"]
4039   REQ_BGL = False
4040
4041   def CheckArguments(self):
4042     """Check the arguments.
4043
4044     """
4045     self.timeout = getattr(self.op, "timeout",
4046                            constants.DEFAULT_SHUTDOWN_TIMEOUT)
4047
4048   def ExpandNames(self):
4049     self._ExpandAndLockInstance()
4050
4051   def BuildHooksEnv(self):
4052     """Build hooks env.
4053
4054     This runs on master, primary and secondary nodes of the instance.
4055
4056     """
4057     env = _BuildInstanceHookEnvByObject(self, self.instance)
4058     env["TIMEOUT"] = self.timeout
4059     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4060     return env, nl, nl
4061
4062   def CheckPrereq(self):
4063     """Check prerequisites.
4064
4065     This checks that the instance is in the cluster.
4066
4067     """
4068     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4069     assert self.instance is not None, \
4070       "Cannot retrieve locked instance %s" % self.op.instance_name
4071     _CheckNodeOnline(self, self.instance.primary_node)
4072
4073   def Exec(self, feedback_fn):
4074     """Shutdown the instance.
4075
4076     """
4077     instance = self.instance
4078     node_current = instance.primary_node
4079     timeout = self.timeout
4080     self.cfg.MarkInstanceDown(instance.name)
4081     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4082     msg = result.fail_msg
4083     if msg:
4084       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4085
4086     _ShutdownInstanceDisks(self, instance)
4087
4088
4089 class LUReinstallInstance(LogicalUnit):
4090   """Reinstall an instance.
4091
4092   """
4093   HPATH = "instance-reinstall"
4094   HTYPE = constants.HTYPE_INSTANCE
4095   _OP_REQP = ["instance_name"]
4096   REQ_BGL = False
4097
4098   def ExpandNames(self):
4099     self._ExpandAndLockInstance()
4100
4101   def BuildHooksEnv(self):
4102     """Build hooks env.
4103
4104     This runs on master, primary and secondary nodes of the instance.
4105
4106     """
4107     env = _BuildInstanceHookEnvByObject(self, self.instance)
4108     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4109     return env, nl, nl
4110
4111   def CheckPrereq(self):
4112     """Check prerequisites.
4113
4114     This checks that the instance is in the cluster and is not running.
4115
4116     """
4117     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4118     assert instance is not None, \
4119       "Cannot retrieve locked instance %s" % self.op.instance_name
4120     _CheckNodeOnline(self, instance.primary_node)
4121
4122     if instance.disk_template == constants.DT_DISKLESS:
4123       raise errors.OpPrereqError("Instance '%s' has no disks" %
4124                                  self.op.instance_name,
4125                                  errors.ECODE_INVAL)
4126     if instance.admin_up:
4127       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4128                                  self.op.instance_name,
4129                                  errors.ECODE_STATE)
4130     remote_info = self.rpc.call_instance_info(instance.primary_node,
4131                                               instance.name,
4132                                               instance.hypervisor)
4133     remote_info.Raise("Error checking node %s" % instance.primary_node,
4134                       prereq=True, ecode=errors.ECODE_ENVIRON)
4135     if remote_info.payload:
4136       raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4137                                  (self.op.instance_name,
4138                                   instance.primary_node),
4139                                  errors.ECODE_STATE)
4140
4141     self.op.os_type = getattr(self.op, "os_type", None)
4142     self.op.force_variant = getattr(self.op, "force_variant", False)
4143     if self.op.os_type is not None:
4144       # OS verification
4145       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4146       result = self.rpc.call_os_get(pnode, self.op.os_type)
4147       result.Raise("OS '%s' not in supported OS list for primary node %s" %
4148                    (self.op.os_type, pnode),
4149                    prereq=True, ecode=errors.ECODE_INVAL)
4150       if not self.op.force_variant:
4151         _CheckOSVariant(result.payload, self.op.os_type)
4152
4153     self.instance = instance
4154
4155   def Exec(self, feedback_fn):
4156     """Reinstall the instance.
4157
4158     """
4159     inst = self.instance
4160
4161     if self.op.os_type is not None:
4162       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4163       inst.os = self.op.os_type
4164       self.cfg.Update(inst, feedback_fn)
4165
4166     _StartInstanceDisks(self, inst, None)
4167     try:
4168       feedback_fn("Running the instance OS create scripts...")
4169       # FIXME: pass debug option from opcode to backend
4170       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4171                                              self.op.debug_level)
4172       result.Raise("Could not install OS for instance %s on node %s" %
4173                    (inst.name, inst.primary_node))
4174     finally:
4175       _ShutdownInstanceDisks(self, inst)
4176
4177
4178 class LURecreateInstanceDisks(LogicalUnit):
4179   """Recreate an instance's missing disks.
4180
4181   """
4182   HPATH = "instance-recreate-disks"
4183   HTYPE = constants.HTYPE_INSTANCE
4184   _OP_REQP = ["instance_name", "disks"]
4185   REQ_BGL = False
4186
4187   def CheckArguments(self):
4188     """Check the arguments.
4189
4190     """
4191     if not isinstance(self.op.disks, list):
4192       raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4193     for item in self.op.disks:
4194       if (not isinstance(item, int) or
4195           item < 0):
4196         raise errors.OpPrereqError("Invalid disk specification '%s'" %
4197                                    str(item), errors.ECODE_INVAL)
4198
4199   def ExpandNames(self):
4200     self._ExpandAndLockInstance()
4201
4202   def BuildHooksEnv(self):
4203     """Build hooks env.
4204
4205     This runs on master, primary and secondary nodes of the instance.
4206
4207     """
4208     env = _BuildInstanceHookEnvByObject(self, self.instance)
4209     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4210     return env, nl, nl
4211
4212   def CheckPrereq(self):
4213     """Check prerequisites.
4214
4215     This checks that the instance is in the cluster and is not running.
4216
4217     """
4218     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4219     assert instance is not None, \
4220       "Cannot retrieve locked instance %s" % self.op.instance_name
4221     _CheckNodeOnline(self, instance.primary_node)
4222
4223     if instance.disk_template == constants.DT_DISKLESS:
4224       raise errors.OpPrereqError("Instance '%s' has no disks" %
4225                                  self.op.instance_name, errors.ECODE_INVAL)
4226     if instance.admin_up:
4227       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4228                                  self.op.instance_name, errors.ECODE_STATE)
4229     remote_info = self.rpc.call_instance_info(instance.primary_node,
4230                                               instance.name,
4231                                               instance.hypervisor)
4232     remote_info.Raise("Error checking node %s" % instance.primary_node,
4233                       prereq=True, ecode=errors.ECODE_ENVIRON)
4234     if remote_info.payload:
4235       raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4236                                  (self.op.instance_name,
4237                                   instance.primary_node), errors.ECODE_STATE)
4238
4239     if not self.op.disks:
4240       self.op.disks = range(len(instance.disks))
4241     else:
4242       for idx in self.op.disks:
4243         if idx >= len(instance.disks):
4244           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4245                                      errors.ECODE_INVAL)
4246
4247     self.instance = instance
4248
4249   def Exec(self, feedback_fn):
4250     """Recreate the disks.
4251
4252     """
4253     to_skip = []
4254     for idx, _ in enumerate(self.instance.disks):
4255       if idx not in self.op.disks: # disk idx has not been passed in
4256         to_skip.append(idx)
4257         continue
4258
4259     _CreateDisks(self, self.instance, to_skip=to_skip)
4260
4261
4262 class LURenameInstance(LogicalUnit):
4263   """Rename an instance.
4264
4265   """
4266   HPATH = "instance-rename"
4267   HTYPE = constants.HTYPE_INSTANCE
4268   _OP_REQP = ["instance_name", "new_name"]
4269
4270   def BuildHooksEnv(self):
4271     """Build hooks env.
4272
4273     This runs on master, primary and secondary nodes of the instance.
4274
4275     """
4276     env = _BuildInstanceHookEnvByObject(self, self.instance)
4277     env["INSTANCE_NEW_NAME"] = self.op.new_name
4278     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4279     return env, nl, nl
4280
4281   def CheckPrereq(self):
4282     """Check prerequisites.
4283
4284     This checks that the instance is in the cluster and is not running.
4285
4286     """
4287     self.op.instance_name = _ExpandInstanceName(self.cfg,
4288                                                 self.op.instance_name)
4289     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4290     assert instance is not None
4291     _CheckNodeOnline(self, instance.primary_node)
4292
4293     if instance.admin_up:
4294       raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4295                                  self.op.instance_name, errors.ECODE_STATE)
4296     remote_info = self.rpc.call_instance_info(instance.primary_node,
4297                                               instance.name,
4298                                               instance.hypervisor)
4299     remote_info.Raise("Error checking node %s" % instance.primary_node,
4300                       prereq=True, ecode=errors.ECODE_ENVIRON)
4301     if remote_info.payload:
4302       raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4303                                  (self.op.instance_name,
4304                                   instance.primary_node), errors.ECODE_STATE)
4305     self.instance = instance
4306
4307     # new name verification
4308     name_info = utils.GetHostInfo(self.op.new_name)
4309
4310     self.op.new_name = new_name = name_info.name
4311     instance_list = self.cfg.GetInstanceList()
4312     if new_name in instance_list:
4313       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4314                                  new_name, errors.ECODE_EXISTS)
4315
4316     if not getattr(self.op, "ignore_ip", False):
4317       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4318         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4319                                    (name_info.ip, new_name),
4320                                    errors.ECODE_NOTUNIQUE)
4321
4322
4323   def Exec(self, feedback_fn):
4324     """Reinstall the instance.
4325
4326     """
4327     inst = self.instance
4328     old_name = inst.name
4329
4330     if inst.disk_template == constants.DT_FILE:
4331       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4332
4333     self.cfg.RenameInstance(inst.name, self.op.new_name)
4334     # Change the instance lock. This is definitely safe while we hold the BGL
4335     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4336     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4337
4338     # re-read the instance from the configuration after rename
4339     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4340
4341     if inst.disk_template == constants.DT_FILE:
4342       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4343       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4344                                                      old_file_storage_dir,
4345                                                      new_file_storage_dir)
4346       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4347                    " (but the instance has been renamed in Ganeti)" %
4348                    (inst.primary_node, old_file_storage_dir,
4349                     new_file_storage_dir))
4350
4351     _StartInstanceDisks(self, inst, None)
4352     try:
4353       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4354                                                  old_name, self.op.debug_level)
4355       msg = result.fail_msg
4356       if msg:
4357         msg = ("Could not run OS rename script for instance %s on node %s"
4358                " (but the instance has been renamed in Ganeti): %s" %
4359                (inst.name, inst.primary_node, msg))
4360         self.proc.LogWarning(msg)
4361     finally:
4362       _ShutdownInstanceDisks(self, inst)
4363
4364
4365 class LURemoveInstance(LogicalUnit):
4366   """Remove an instance.
4367
4368   """
4369   HPATH = "instance-remove"
4370   HTYPE = constants.HTYPE_INSTANCE
4371   _OP_REQP = ["instance_name", "ignore_failures"]
4372   REQ_BGL = False
4373
4374   def CheckArguments(self):
4375     """Check the arguments.
4376
4377     """
4378     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4379                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4380
4381   def ExpandNames(self):
4382     self._ExpandAndLockInstance()
4383     self.needed_locks[locking.LEVEL_NODE] = []
4384     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4385
4386   def DeclareLocks(self, level):
4387     if level == locking.LEVEL_NODE:
4388       self._LockInstancesNodes()
4389
4390   def BuildHooksEnv(self):
4391     """Build hooks env.
4392
4393     This runs on master, primary and secondary nodes of the instance.
4394
4395     """
4396     env = _BuildInstanceHookEnvByObject(self, self.instance)
4397     env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4398     nl = [self.cfg.GetMasterNode()]
4399     nl_post = list(self.instance.all_nodes) + nl
4400     return env, nl, nl_post
4401
4402   def CheckPrereq(self):
4403     """Check prerequisites.
4404
4405     This checks that the instance is in the cluster.
4406
4407     """
4408     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4409     assert self.instance is not None, \
4410       "Cannot retrieve locked instance %s" % self.op.instance_name
4411
4412   def Exec(self, feedback_fn):
4413     """Remove the instance.
4414
4415     """
4416     instance = self.instance
4417     logging.info("Shutting down instance %s on node %s",
4418                  instance.name, instance.primary_node)
4419
4420     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4421                                              self.shutdown_timeout)
4422     msg = result.fail_msg
4423     if msg:
4424       if self.op.ignore_failures:
4425         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4426       else:
4427         raise errors.OpExecError("Could not shutdown instance %s on"
4428                                  " node %s: %s" %
4429                                  (instance.name, instance.primary_node, msg))
4430
4431     logging.info("Removing block devices for instance %s", instance.name)
4432
4433     if not _RemoveDisks(self, instance):
4434       if self.op.ignore_failures:
4435         feedback_fn("Warning: can't remove instance's disks")
4436       else:
4437         raise errors.OpExecError("Can't remove instance's disks")
4438
4439     logging.info("Removing instance %s out of cluster config", instance.name)
4440
4441     self.cfg.RemoveInstance(instance.name)
4442     self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4443
4444
4445 class LUQueryInstances(NoHooksLU):
4446   """Logical unit for querying instances.
4447
4448   """
4449   # pylint: disable-msg=W0142
4450   _OP_REQP = ["output_fields", "names", "use_locking"]
4451   REQ_BGL = False
4452   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4453                     "serial_no", "ctime", "mtime", "uuid"]
4454   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4455                                     "admin_state",
4456                                     "disk_template", "ip", "mac", "bridge",
4457                                     "nic_mode", "nic_link",
4458                                     "sda_size", "sdb_size", "vcpus", "tags",
4459                                     "network_port", "beparams",
4460                                     r"(disk)\.(size)/([0-9]+)",
4461                                     r"(disk)\.(sizes)", "disk_usage",
4462                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4463                                     r"(nic)\.(bridge)/([0-9]+)",
4464                                     r"(nic)\.(macs|ips|modes|links|bridges)",
4465                                     r"(disk|nic)\.(count)",
4466                                     "hvparams",
4467                                     ] + _SIMPLE_FIELDS +
4468                                   ["hv/%s" % name
4469                                    for name in constants.HVS_PARAMETERS
4470                                    if name not in constants.HVC_GLOBALS] +
4471                                   ["be/%s" % name
4472                                    for name in constants.BES_PARAMETERS])
4473   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4474
4475
4476   def ExpandNames(self):
4477     _CheckOutputFields(static=self._FIELDS_STATIC,
4478                        dynamic=self._FIELDS_DYNAMIC,
4479                        selected=self.op.output_fields)
4480
4481     self.needed_locks = {}
4482     self.share_locks[locking.LEVEL_INSTANCE] = 1
4483     self.share_locks[locking.LEVEL_NODE] = 1
4484
4485     if self.op.names:
4486       self.wanted = _GetWantedInstances(self, self.op.names)
4487     else:
4488       self.wanted = locking.ALL_SET
4489
4490     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4491     self.do_locking = self.do_node_query and self.op.use_locking
4492     if self.do_locking:
4493       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4494       self.needed_locks[locking.LEVEL_NODE] = []
4495       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4496
4497   def DeclareLocks(self, level):
4498     if level == locking.LEVEL_NODE and self.do_locking:
4499       self._LockInstancesNodes()
4500
4501   def CheckPrereq(self):
4502     """Check prerequisites.
4503
4504     """
4505     pass
4506
4507   def Exec(self, feedback_fn):
4508     """Computes the list of nodes and their attributes.
4509
4510     """
4511     # pylint: disable-msg=R0912
4512     # way too many branches here
4513     all_info = self.cfg.GetAllInstancesInfo()
4514     if self.wanted == locking.ALL_SET:
4515       # caller didn't specify instance names, so ordering is not important
4516       if self.do_locking:
4517         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4518       else:
4519         instance_names = all_info.keys()
4520       instance_names = utils.NiceSort(instance_names)
4521     else:
4522       # caller did specify names, so we must keep the ordering
4523       if self.do_locking:
4524         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4525       else:
4526         tgt_set = all_info.keys()
4527       missing = set(self.wanted).difference(tgt_set)
4528       if missing:
4529         raise errors.OpExecError("Some instances were removed before"
4530                                  " retrieving their data: %s" % missing)
4531       instance_names = self.wanted
4532
4533     instance_list = [all_info[iname] for iname in instance_names]
4534
4535     # begin data gathering
4536
4537     nodes = frozenset([inst.primary_node for inst in instance_list])
4538     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4539
4540     bad_nodes = []
4541     off_nodes = []
4542     if self.do_node_query:
4543       live_data = {}
4544       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4545       for name in nodes:
4546         result = node_data[name]
4547         if result.offline:
4548           # offline nodes will be in both lists
4549           off_nodes.append(name)
4550         if result.fail_msg:
4551           bad_nodes.append(name)
4552         else:
4553           if result.payload:
4554             live_data.update(result.payload)
4555           # else no instance is alive
4556     else:
4557       live_data = dict([(name, {}) for name in instance_names])
4558
4559     # end data gathering
4560
4561     HVPREFIX = "hv/"
4562     BEPREFIX = "be/"
4563     output = []
4564     cluster = self.cfg.GetClusterInfo()
4565     for instance in instance_list:
4566       iout = []
4567       i_hv = cluster.FillHV(instance, skip_globals=True)
4568       i_be = cluster.FillBE(instance)
4569       i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4570                                  nic.nicparams) for nic in instance.nics]
4571       for field in self.op.output_fields:
4572         st_match = self._FIELDS_STATIC.Matches(field)
4573         if field in self._SIMPLE_FIELDS:
4574           val = getattr(instance, field)
4575         elif field == "pnode":
4576           val = instance.primary_node
4577         elif field == "snodes":
4578           val = list(instance.secondary_nodes)
4579         elif field == "admin_state":
4580           val = instance.admin_up
4581         elif field == "oper_state":
4582           if instance.primary_node in bad_nodes:
4583             val = None
4584           else:
4585             val = bool(live_data.get(instance.name))
4586         elif field == "status":
4587           if instance.primary_node in off_nodes:
4588             val = "ERROR_nodeoffline"
4589           elif instance.primary_node in bad_nodes:
4590             val = "ERROR_nodedown"
4591           else:
4592             running = bool(live_data.get(instance.name))
4593             if running:
4594               if instance.admin_up:
4595                 val = "running"
4596               else:
4597                 val = "ERROR_up"
4598             else:
4599               if instance.admin_up:
4600                 val = "ERROR_down"
4601               else:
4602                 val = "ADMIN_down"
4603         elif field == "oper_ram":
4604           if instance.primary_node in bad_nodes:
4605             val = None
4606           elif instance.name in live_data:
4607             val = live_data[instance.name].get("memory", "?")
4608           else:
4609             val = "-"
4610         elif field == "vcpus":
4611           val = i_be[constants.BE_VCPUS]
4612         elif field == "disk_template":
4613           val = instance.disk_template
4614         elif field == "ip":
4615           if instance.nics:
4616             val = instance.nics[0].ip
4617           else:
4618             val = None
4619         elif field == "nic_mode":
4620           if instance.nics:
4621             val = i_nicp[0][constants.NIC_MODE]
4622           else:
4623             val = None
4624         elif field == "nic_link":
4625           if instance.nics:
4626             val = i_nicp[0][constants.NIC_LINK]
4627           else:
4628             val = None
4629         elif field == "bridge":
4630           if (instance.nics and
4631               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4632             val = i_nicp[0][constants.NIC_LINK]
4633           else:
4634             val = None
4635         elif field == "mac":
4636           if instance.nics:
4637             val = instance.nics[0].mac
4638           else:
4639             val = None
4640         elif field == "sda_size" or field == "sdb_size":
4641           idx = ord(field[2]) - ord('a')
4642           try:
4643             val = instance.FindDisk(idx).size
4644           except errors.OpPrereqError:
4645             val = None
4646         elif field == "disk_usage": # total disk usage per node
4647           disk_sizes = [{'size': disk.size} for disk in instance.disks]
4648           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4649         elif field == "tags":
4650           val = list(instance.GetTags())
4651         elif field == "hvparams":
4652           val = i_hv
4653         elif (field.startswith(HVPREFIX) and
4654               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4655               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4656           val = i_hv.get(field[len(HVPREFIX):], None)
4657         elif field == "beparams":
4658           val = i_be
4659         elif (field.startswith(BEPREFIX) and
4660               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4661           val = i_be.get(field[len(BEPREFIX):], None)
4662         elif st_match and st_match.groups():
4663           # matches a variable list
4664           st_groups = st_match.groups()
4665           if st_groups and st_groups[0] == "disk":
4666             if st_groups[1] == "count":
4667               val = len(instance.disks)
4668             elif st_groups[1] == "sizes":
4669               val = [disk.size for disk in instance.disks]
4670             elif st_groups[1] == "size":
4671               try:
4672                 val = instance.FindDisk(st_groups[2]).size
4673               except errors.OpPrereqError:
4674                 val = None
4675             else:
4676               assert False, "Unhandled disk parameter"
4677           elif st_groups[0] == "nic":
4678             if st_groups[1] == "count":
4679               val = len(instance.nics)
4680             elif st_groups[1] == "macs":
4681               val = [nic.mac for nic in instance.nics]
4682             elif st_groups[1] == "ips":
4683               val = [nic.ip for nic in instance.nics]
4684             elif st_groups[1] == "modes":
4685               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4686             elif st_groups[1] == "links":
4687               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4688             elif st_groups[1] == "bridges":
4689               val = []
4690               for nicp in i_nicp:
4691                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4692                   val.append(nicp[constants.NIC_LINK])
4693                 else:
4694                   val.append(None)
4695             else:
4696               # index-based item
4697               nic_idx = int(st_groups[2])
4698               if nic_idx >= len(instance.nics):
4699                 val = None
4700               else:
4701                 if st_groups[1] == "mac":
4702                   val = instance.nics[nic_idx].mac
4703                 elif st_groups[1] == "ip":
4704                   val = instance.nics[nic_idx].ip
4705                 elif st_groups[1] == "mode":
4706                   val = i_nicp[nic_idx][constants.NIC_MODE]
4707                 elif st_groups[1] == "link":
4708                   val = i_nicp[nic_idx][constants.NIC_LINK]
4709                 elif st_groups[1] == "bridge":
4710                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4711                   if nic_mode == constants.NIC_MODE_BRIDGED:
4712                     val = i_nicp[nic_idx][constants.NIC_LINK]
4713                   else:
4714                     val = None
4715                 else:
4716                   assert False, "Unhandled NIC parameter"
4717           else:
4718             assert False, ("Declared but unhandled variable parameter '%s'" %
4719                            field)
4720         else:
4721           assert False, "Declared but unhandled parameter '%s'" % field
4722         iout.append(val)
4723       output.append(iout)
4724
4725     return output
4726
4727
4728 class LUFailoverInstance(LogicalUnit):
4729   """Failover an instance.
4730
4731   """
4732   HPATH = "instance-failover"
4733   HTYPE = constants.HTYPE_INSTANCE
4734   _OP_REQP = ["instance_name", "ignore_consistency"]
4735   REQ_BGL = False
4736
4737   def CheckArguments(self):
4738     """Check the arguments.
4739
4740     """
4741     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4742                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4743
4744   def ExpandNames(self):
4745     self._ExpandAndLockInstance()
4746     self.needed_locks[locking.LEVEL_NODE] = []
4747     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4748
4749   def DeclareLocks(self, level):
4750     if level == locking.LEVEL_NODE:
4751       self._LockInstancesNodes()
4752
4753   def BuildHooksEnv(self):
4754     """Build hooks env.
4755
4756     This runs on master, primary and secondary nodes of the instance.
4757
4758     """
4759     instance = self.instance
4760     source_node = instance.primary_node
4761     target_node = instance.secondary_nodes[0]
4762     env = {
4763       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4764       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4765       "OLD_PRIMARY": source_node,
4766       "OLD_SECONDARY": target_node,
4767       "NEW_PRIMARY": target_node,
4768       "NEW_SECONDARY": source_node,
4769       }
4770     env.update(_BuildInstanceHookEnvByObject(self, instance))
4771     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4772     nl_post = list(nl)
4773     nl_post.append(source_node)
4774     return env, nl, nl_post
4775
4776   def CheckPrereq(self):
4777     """Check prerequisites.
4778
4779     This checks that the instance is in the cluster.
4780
4781     """
4782     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4783     assert self.instance is not None, \
4784       "Cannot retrieve locked instance %s" % self.op.instance_name
4785
4786     bep = self.cfg.GetClusterInfo().FillBE(instance)
4787     if instance.disk_template not in constants.DTS_NET_MIRROR:
4788       raise errors.OpPrereqError("Instance's disk layout is not"
4789                                  " network mirrored, cannot failover.",
4790                                  errors.ECODE_STATE)
4791
4792     secondary_nodes = instance.secondary_nodes
4793     if not secondary_nodes:
4794       raise errors.ProgrammerError("no secondary node but using "
4795                                    "a mirrored disk template")
4796
4797     target_node = secondary_nodes[0]
4798     _CheckNodeOnline(self, target_node)
4799     _CheckNodeNotDrained(self, target_node)
4800     if instance.admin_up:
4801       # check memory requirements on the secondary node
4802       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4803                            instance.name, bep[constants.BE_MEMORY],
4804                            instance.hypervisor)
4805     else:
4806       self.LogInfo("Not checking memory on the secondary node as"
4807                    " instance will not be started")
4808
4809     # check bridge existance
4810     _CheckInstanceBridgesExist(self, instance, node=target_node)
4811
4812   def Exec(self, feedback_fn):
4813     """Failover an instance.
4814
4815     The failover is done by shutting it down on its present node and
4816     starting it on the secondary.
4817
4818     """
4819     instance = self.instance
4820
4821     source_node = instance.primary_node
4822     target_node = instance.secondary_nodes[0]
4823
4824     if instance.admin_up:
4825       feedback_fn("* checking disk consistency between source and target")
4826       for dev in instance.disks:
4827         # for drbd, these are drbd over lvm
4828         if not _CheckDiskConsistency(self, dev, target_node, False):
4829           if not self.op.ignore_consistency:
4830             raise errors.OpExecError("Disk %s is degraded on target node,"
4831                                      " aborting failover." % dev.iv_name)
4832     else:
4833       feedback_fn("* not checking disk consistency as instance is not running")
4834
4835     feedback_fn("* shutting down instance on source node")
4836     logging.info("Shutting down instance %s on node %s",
4837                  instance.name, source_node)
4838
4839     result = self.rpc.call_instance_shutdown(source_node, instance,
4840                                              self.shutdown_timeout)
4841     msg = result.fail_msg
4842     if msg:
4843       if self.op.ignore_consistency:
4844         self.proc.LogWarning("Could not shutdown instance %s on node %s."
4845                              " Proceeding anyway. Please make sure node"
4846                              " %s is down. Error details: %s",
4847                              instance.name, source_node, source_node, msg)
4848       else:
4849         raise errors.OpExecError("Could not shutdown instance %s on"
4850                                  " node %s: %s" %
4851                                  (instance.name, source_node, msg))
4852
4853     feedback_fn("* deactivating the instance's disks on source node")
4854     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4855       raise errors.OpExecError("Can't shut down the instance's disks.")
4856
4857     instance.primary_node = target_node
4858     # distribute new instance config to the other nodes
4859     self.cfg.Update(instance, feedback_fn)
4860
4861     # Only start the instance if it's marked as up
4862     if instance.admin_up:
4863       feedback_fn("* activating the instance's disks on target node")
4864       logging.info("Starting instance %s on node %s",
4865                    instance.name, target_node)
4866
4867       disks_ok, _ = _AssembleInstanceDisks(self, instance,
4868                                                ignore_secondaries=True)
4869       if not disks_ok:
4870         _ShutdownInstanceDisks(self, instance)
4871         raise errors.OpExecError("Can't activate the instance's disks")
4872
4873       feedback_fn("* starting the instance on the target node")
4874       result = self.rpc.call_instance_start(target_node, instance, None, None)
4875       msg = result.fail_msg
4876       if msg:
4877         _ShutdownInstanceDisks(self, instance)
4878         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4879                                  (instance.name, target_node, msg))
4880
4881
4882 class LUMigrateInstance(LogicalUnit):
4883   """Migrate an instance.
4884
4885   This is migration without shutting down, compared to the failover,
4886   which is done with shutdown.
4887
4888   """
4889   HPATH = "instance-migrate"
4890   HTYPE = constants.HTYPE_INSTANCE
4891   _OP_REQP = ["instance_name", "live", "cleanup"]
4892
4893   REQ_BGL = False
4894
4895   def ExpandNames(self):
4896     self._ExpandAndLockInstance()
4897
4898     self.needed_locks[locking.LEVEL_NODE] = []
4899     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4900
4901     self._migrater = TLMigrateInstance(self, self.op.instance_name,
4902                                        self.op.live, self.op.cleanup)
4903     self.tasklets = [self._migrater]
4904
4905   def DeclareLocks(self, level):
4906     if level == locking.LEVEL_NODE:
4907       self._LockInstancesNodes()
4908
4909   def BuildHooksEnv(self):
4910     """Build hooks env.
4911
4912     This runs on master, primary and secondary nodes of the instance.
4913
4914     """
4915     instance = self._migrater.instance
4916     source_node = instance.primary_node
4917     target_node = instance.secondary_nodes[0]
4918     env = _BuildInstanceHookEnvByObject(self, instance)
4919     env["MIGRATE_LIVE"] = self.op.live
4920     env["MIGRATE_CLEANUP"] = self.op.cleanup
4921     env.update({
4922         "OLD_PRIMARY": source_node,
4923         "OLD_SECONDARY": target_node,
4924         "NEW_PRIMARY": target_node,
4925         "NEW_SECONDARY": source_node,
4926         })
4927     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4928     nl_post = list(nl)
4929     nl_post.append(source_node)
4930     return env, nl, nl_post
4931
4932
4933 class LUMoveInstance(LogicalUnit):
4934   """Move an instance by data-copying.
4935
4936   """
4937   HPATH = "instance-move"
4938   HTYPE = constants.HTYPE_INSTANCE
4939   _OP_REQP = ["instance_name", "target_node"]
4940   REQ_BGL = False
4941
4942   def CheckArguments(self):
4943     """Check the arguments.
4944
4945     """
4946     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4947                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4948
4949   def ExpandNames(self):
4950     self._ExpandAndLockInstance()
4951     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
4952     self.op.target_node = target_node
4953     self.needed_locks[locking.LEVEL_NODE] = [target_node]
4954     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4955
4956   def DeclareLocks(self, level):
4957     if level == locking.LEVEL_NODE:
4958       self._LockInstancesNodes(primary_only=True)
4959
4960   def BuildHooksEnv(self):
4961     """Build hooks env.
4962
4963     This runs on master, primary and secondary nodes of the instance.
4964
4965     """
4966     env = {
4967       "TARGET_NODE": self.op.target_node,
4968       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4969       }
4970     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4971     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4972                                        self.op.target_node]
4973     return env, nl, nl
4974
4975   def CheckPrereq(self):
4976     """Check prerequisites.
4977
4978     This checks that the instance is in the cluster.
4979
4980     """
4981     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4982     assert self.instance is not None, \
4983       "Cannot retrieve locked instance %s" % self.op.instance_name
4984
4985     node = self.cfg.GetNodeInfo(self.op.target_node)
4986     assert node is not None, \
4987       "Cannot retrieve locked node %s" % self.op.target_node
4988
4989     self.target_node = target_node = node.name
4990
4991     if target_node == instance.primary_node:
4992       raise errors.OpPrereqError("Instance %s is already on the node %s" %
4993                                  (instance.name, target_node),
4994                                  errors.ECODE_STATE)
4995
4996     bep = self.cfg.GetClusterInfo().FillBE(instance)
4997
4998     for idx, dsk in enumerate(instance.disks):
4999       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5000         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5001                                    " cannot copy" % idx, errors.ECODE_STATE)
5002
5003     _CheckNodeOnline(self, target_node)
5004     _CheckNodeNotDrained(self, target_node)
5005
5006     if instance.admin_up:
5007       # check memory requirements on the secondary node
5008       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5009                            instance.name, bep[constants.BE_MEMORY],
5010                            instance.hypervisor)
5011     else:
5012       self.LogInfo("Not checking memory on the secondary node as"
5013                    " instance will not be started")
5014
5015     # check bridge existance
5016     _CheckInstanceBridgesExist(self, instance, node=target_node)
5017
5018   def Exec(self, feedback_fn):
5019     """Move an instance.
5020
5021     The move is done by shutting it down on its present node, copying
5022     the data over (slow) and starting it on the new node.
5023
5024     """
5025     instance = self.instance
5026
5027     source_node = instance.primary_node
5028     target_node = self.target_node
5029
5030     self.LogInfo("Shutting down instance %s on source node %s",
5031                  instance.name, source_node)
5032
5033     result = self.rpc.call_instance_shutdown(source_node, instance,
5034                                              self.shutdown_timeout)
5035     msg = result.fail_msg
5036     if msg:
5037       if self.op.ignore_consistency:
5038         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5039                              " Proceeding anyway. Please make sure node"
5040                              " %s is down. Error details: %s",
5041                              instance.name, source_node, source_node, msg)
5042       else:
5043         raise errors.OpExecError("Could not shutdown instance %s on"
5044                                  " node %s: %s" %
5045                                  (instance.name, source_node, msg))
5046
5047     # create the target disks
5048     try:
5049       _CreateDisks(self, instance, target_node=target_node)
5050     except errors.OpExecError:
5051       self.LogWarning("Device creation failed, reverting...")
5052       try:
5053         _RemoveDisks(self, instance, target_node=target_node)
5054       finally:
5055         self.cfg.ReleaseDRBDMinors(instance.name)
5056         raise
5057
5058     cluster_name = self.cfg.GetClusterInfo().cluster_name
5059
5060     errs = []
5061     # activate, get path, copy the data over
5062     for idx, disk in enumerate(instance.disks):
5063       self.LogInfo("Copying data for disk %d", idx)
5064       result = self.rpc.call_blockdev_assemble(target_node, disk,
5065                                                instance.name, True)
5066       if result.fail_msg:
5067         self.LogWarning("Can't assemble newly created disk %d: %s",
5068                         idx, result.fail_msg)
5069         errs.append(result.fail_msg)
5070         break
5071       dev_path = result.payload
5072       result = self.rpc.call_blockdev_export(source_node, disk,
5073                                              target_node, dev_path,
5074                                              cluster_name)
5075       if result.fail_msg:
5076         self.LogWarning("Can't copy data over for disk %d: %s",
5077                         idx, result.fail_msg)
5078         errs.append(result.fail_msg)
5079         break
5080
5081     if errs:
5082       self.LogWarning("Some disks failed to copy, aborting")
5083       try:
5084         _RemoveDisks(self, instance, target_node=target_node)
5085       finally:
5086         self.cfg.ReleaseDRBDMinors(instance.name)
5087         raise errors.OpExecError("Errors during disk copy: %s" %
5088                                  (",".join(errs),))
5089
5090     instance.primary_node = target_node
5091     self.cfg.Update(instance, feedback_fn)
5092
5093     self.LogInfo("Removing the disks on the original node")
5094     _RemoveDisks(self, instance, target_node=source_node)
5095
5096     # Only start the instance if it's marked as up
5097     if instance.admin_up:
5098       self.LogInfo("Starting instance %s on node %s",
5099                    instance.name, target_node)
5100
5101       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5102                                            ignore_secondaries=True)
5103       if not disks_ok:
5104         _ShutdownInstanceDisks(self, instance)
5105         raise errors.OpExecError("Can't activate the instance's disks")
5106
5107       result = self.rpc.call_instance_start(target_node, instance, None, None)
5108       msg = result.fail_msg
5109       if msg:
5110         _ShutdownInstanceDisks(self, instance)
5111         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5112                                  (instance.name, target_node, msg))
5113
5114
5115 class LUMigrateNode(LogicalUnit):
5116   """Migrate all instances from a node.
5117
5118   """
5119   HPATH = "node-migrate"
5120   HTYPE = constants.HTYPE_NODE
5121   _OP_REQP = ["node_name", "live"]
5122   REQ_BGL = False
5123
5124   def ExpandNames(self):
5125     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5126
5127     self.needed_locks = {
5128       locking.LEVEL_NODE: [self.op.node_name],
5129       }
5130
5131     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5132
5133     # Create tasklets for migrating instances for all instances on this node
5134     names = []
5135     tasklets = []
5136
5137     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5138       logging.debug("Migrating instance %s", inst.name)
5139       names.append(inst.name)
5140
5141       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5142
5143     self.tasklets = tasklets
5144
5145     # Declare instance locks
5146     self.needed_locks[locking.LEVEL_INSTANCE] = names
5147
5148   def DeclareLocks(self, level):
5149     if level == locking.LEVEL_NODE:
5150       self._LockInstancesNodes()
5151
5152   def BuildHooksEnv(self):
5153     """Build hooks env.
5154
5155     This runs on the master, the primary and all the secondaries.
5156
5157     """
5158     env = {
5159       "NODE_NAME": self.op.node_name,
5160       }
5161
5162     nl = [self.cfg.GetMasterNode()]
5163
5164     return (env, nl, nl)
5165
5166
5167 class TLMigrateInstance(Tasklet):
5168   def __init__(self, lu, instance_name, live, cleanup):
5169     """Initializes this class.
5170
5171     """
5172     Tasklet.__init__(self, lu)
5173
5174     # Parameters
5175     self.instance_name = instance_name
5176     self.live = live
5177     self.cleanup = cleanup
5178
5179   def CheckPrereq(self):
5180     """Check prerequisites.
5181
5182     This checks that the instance is in the cluster.
5183
5184     """
5185     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5186     instance = self.cfg.GetInstanceInfo(instance_name)
5187     assert instance is not None
5188
5189     if instance.disk_template != constants.DT_DRBD8:
5190       raise errors.OpPrereqError("Instance's disk layout is not"
5191                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5192
5193     secondary_nodes = instance.secondary_nodes
5194     if not secondary_nodes:
5195       raise errors.ConfigurationError("No secondary node but using"
5196                                       " drbd8 disk template")
5197
5198     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5199
5200     target_node = secondary_nodes[0]
5201     # check memory requirements on the secondary node
5202     _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5203                          instance.name, i_be[constants.BE_MEMORY],
5204                          instance.hypervisor)
5205
5206     # check bridge existance
5207     _CheckInstanceBridgesExist(self, instance, node=target_node)
5208
5209     if not self.cleanup:
5210       _CheckNodeNotDrained(self, target_node)
5211       result = self.rpc.call_instance_migratable(instance.primary_node,
5212                                                  instance)
5213       result.Raise("Can't migrate, please use failover",
5214                    prereq=True, ecode=errors.ECODE_STATE)
5215
5216     self.instance = instance
5217
5218   def _WaitUntilSync(self):
5219     """Poll with custom rpc for disk sync.
5220
5221     This uses our own step-based rpc call.
5222
5223     """
5224     self.feedback_fn("* wait until resync is done")
5225     all_done = False
5226     while not all_done:
5227       all_done = True
5228       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5229                                             self.nodes_ip,
5230                                             self.instance.disks)
5231       min_percent = 100
5232       for node, nres in result.items():
5233         nres.Raise("Cannot resync disks on node %s" % node)
5234         node_done, node_percent = nres.payload
5235         all_done = all_done and node_done
5236         if node_percent is not None:
5237           min_percent = min(min_percent, node_percent)
5238       if not all_done:
5239         if min_percent < 100:
5240           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5241         time.sleep(2)
5242
5243   def _EnsureSecondary(self, node):
5244     """Demote a node to secondary.
5245
5246     """
5247     self.feedback_fn("* switching node %s to secondary mode" % node)
5248
5249     for dev in self.instance.disks:
5250       self.cfg.SetDiskID(dev, node)
5251
5252     result = self.rpc.call_blockdev_close(node, self.instance.name,
5253                                           self.instance.disks)
5254     result.Raise("Cannot change disk to secondary on node %s" % node)
5255
5256   def _GoStandalone(self):
5257     """Disconnect from the network.
5258
5259     """
5260     self.feedback_fn("* changing into standalone mode")
5261     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5262                                                self.instance.disks)
5263     for node, nres in result.items():
5264       nres.Raise("Cannot disconnect disks node %s" % node)
5265
5266   def _GoReconnect(self, multimaster):
5267     """Reconnect to the network.
5268
5269     """
5270     if multimaster:
5271       msg = "dual-master"
5272     else:
5273       msg = "single-master"
5274     self.feedback_fn("* changing disks into %s mode" % msg)
5275     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5276                                            self.instance.disks,
5277                                            self.instance.name, multimaster)
5278     for node, nres in result.items():
5279       nres.Raise("Cannot change disks config on node %s" % node)
5280
5281   def _ExecCleanup(self):
5282     """Try to cleanup after a failed migration.
5283
5284     The cleanup is done by:
5285       - check that the instance is running only on one node
5286         (and update the config if needed)
5287       - change disks on its secondary node to secondary
5288       - wait until disks are fully synchronized
5289       - disconnect from the network
5290       - change disks into single-master mode
5291       - wait again until disks are fully synchronized
5292
5293     """
5294     instance = self.instance
5295     target_node = self.target_node
5296     source_node = self.source_node
5297
5298     # check running on only one node
5299     self.feedback_fn("* checking where the instance actually runs"
5300                      " (if this hangs, the hypervisor might be in"
5301                      " a bad state)")
5302     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5303     for node, result in ins_l.items():
5304       result.Raise("Can't contact node %s" % node)
5305
5306     runningon_source = instance.name in ins_l[source_node].payload
5307     runningon_target = instance.name in ins_l[target_node].payload
5308
5309     if runningon_source and runningon_target:
5310       raise errors.OpExecError("Instance seems to be running on two nodes,"
5311                                " or the hypervisor is confused. You will have"
5312                                " to ensure manually that it runs only on one"
5313                                " and restart this operation.")
5314
5315     if not (runningon_source or runningon_target):
5316       raise errors.OpExecError("Instance does not seem to be running at all."
5317                                " In this case, it's safer to repair by"
5318                                " running 'gnt-instance stop' to ensure disk"
5319                                " shutdown, and then restarting it.")
5320
5321     if runningon_target:
5322       # the migration has actually succeeded, we need to update the config
5323       self.feedback_fn("* instance running on secondary node (%s),"
5324                        " updating config" % target_node)
5325       instance.primary_node = target_node
5326       self.cfg.Update(instance, self.feedback_fn)
5327       demoted_node = source_node
5328     else:
5329       self.feedback_fn("* instance confirmed to be running on its"
5330                        " primary node (%s)" % source_node)
5331       demoted_node = target_node
5332
5333     self._EnsureSecondary(demoted_node)
5334     try:
5335       self._WaitUntilSync()
5336     except errors.OpExecError:
5337       # we ignore here errors, since if the device is standalone, it
5338       # won't be able to sync
5339       pass
5340     self._GoStandalone()
5341     self._GoReconnect(False)
5342     self._WaitUntilSync()
5343
5344     self.feedback_fn("* done")
5345
5346   def _RevertDiskStatus(self):
5347     """Try to revert the disk status after a failed migration.
5348
5349     """
5350     target_node = self.target_node
5351     try:
5352       self._EnsureSecondary(target_node)
5353       self._GoStandalone()
5354       self._GoReconnect(False)
5355       self._WaitUntilSync()
5356     except errors.OpExecError, err:
5357       self.lu.LogWarning("Migration failed and I can't reconnect the"
5358                          " drives: error '%s'\n"
5359                          "Please look and recover the instance status" %
5360                          str(err))
5361
5362   def _AbortMigration(self):
5363     """Call the hypervisor code to abort a started migration.
5364
5365     """
5366     instance = self.instance
5367     target_node = self.target_node
5368     migration_info = self.migration_info
5369
5370     abort_result = self.rpc.call_finalize_migration(target_node,
5371                                                     instance,
5372                                                     migration_info,
5373                                                     False)
5374     abort_msg = abort_result.fail_msg
5375     if abort_msg:
5376       logging.error("Aborting migration failed on target node %s: %s",
5377                     target_node, abort_msg)
5378       # Don't raise an exception here, as we stil have to try to revert the
5379       # disk status, even if this step failed.
5380
5381   def _ExecMigration(self):
5382     """Migrate an instance.
5383
5384     The migrate is done by:
5385       - change the disks into dual-master mode
5386       - wait until disks are fully synchronized again
5387       - migrate the instance
5388       - change disks on the new secondary node (the old primary) to secondary
5389       - wait until disks are fully synchronized
5390       - change disks into single-master mode
5391
5392     """
5393     instance = self.instance
5394     target_node = self.target_node
5395     source_node = self.source_node
5396
5397     self.feedback_fn("* checking disk consistency between source and target")
5398     for dev in instance.disks:
5399       if not _CheckDiskConsistency(self, dev, target_node, False):
5400         raise errors.OpExecError("Disk %s is degraded or not fully"
5401                                  " synchronized on target node,"
5402                                  " aborting migrate." % dev.iv_name)
5403
5404     # First get the migration information from the remote node
5405     result = self.rpc.call_migration_info(source_node, instance)
5406     msg = result.fail_msg
5407     if msg:
5408       log_err = ("Failed fetching source migration information from %s: %s" %
5409                  (source_node, msg))
5410       logging.error(log_err)
5411       raise errors.OpExecError(log_err)
5412
5413     self.migration_info = migration_info = result.payload
5414
5415     # Then switch the disks to master/master mode
5416     self._EnsureSecondary(target_node)
5417     self._GoStandalone()
5418     self._GoReconnect(True)
5419     self._WaitUntilSync()
5420
5421     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5422     result = self.rpc.call_accept_instance(target_node,
5423                                            instance,
5424                                            migration_info,
5425                                            self.nodes_ip[target_node])
5426
5427     msg = result.fail_msg
5428     if msg:
5429       logging.error("Instance pre-migration failed, trying to revert"
5430                     " disk status: %s", msg)
5431       self.feedback_fn("Pre-migration failed, aborting")
5432       self._AbortMigration()
5433       self._RevertDiskStatus()
5434       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5435                                (instance.name, msg))
5436
5437     self.feedback_fn("* migrating instance to %s" % target_node)
5438     time.sleep(10)
5439     result = self.rpc.call_instance_migrate(source_node, instance,
5440                                             self.nodes_ip[target_node],
5441                                             self.live)
5442     msg = result.fail_msg
5443     if msg:
5444       logging.error("Instance migration failed, trying to revert"
5445                     " disk status: %s", msg)
5446       self.feedback_fn("Migration failed, aborting")
5447       self._AbortMigration()
5448       self._RevertDiskStatus()
5449       raise errors.OpExecError("Could not migrate instance %s: %s" %
5450                                (instance.name, msg))
5451     time.sleep(10)
5452
5453     instance.primary_node = target_node
5454     # distribute new instance config to the other nodes
5455     self.cfg.Update(instance, self.feedback_fn)
5456
5457     result = self.rpc.call_finalize_migration(target_node,
5458                                               instance,
5459                                               migration_info,
5460                                               True)
5461     msg = result.fail_msg
5462     if msg:
5463       logging.error("Instance migration succeeded, but finalization failed:"
5464                     " %s", msg)
5465       raise errors.OpExecError("Could not finalize instance migration: %s" %
5466                                msg)
5467
5468     self._EnsureSecondary(source_node)
5469     self._WaitUntilSync()
5470     self._GoStandalone()
5471     self._GoReconnect(False)
5472     self._WaitUntilSync()
5473
5474     self.feedback_fn("* done")
5475
5476   def Exec(self, feedback_fn):
5477     """Perform the migration.
5478
5479     """
5480     feedback_fn("Migrating instance %s" % self.instance.name)
5481
5482     self.feedback_fn = feedback_fn
5483
5484     self.source_node = self.instance.primary_node
5485     self.target_node = self.instance.secondary_nodes[0]
5486     self.all_nodes = [self.source_node, self.target_node]
5487     self.nodes_ip = {
5488       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5489       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5490       }
5491
5492     if self.cleanup:
5493       return self._ExecCleanup()
5494     else:
5495       return self._ExecMigration()
5496
5497
5498 def _CreateBlockDev(lu, node, instance, device, force_create,
5499                     info, force_open):
5500   """Create a tree of block devices on a given node.
5501
5502   If this device type has to be created on secondaries, create it and
5503   all its children.
5504
5505   If not, just recurse to children keeping the same 'force' value.
5506
5507   @param lu: the lu on whose behalf we execute
5508   @param node: the node on which to create the device
5509   @type instance: L{objects.Instance}
5510   @param instance: the instance which owns the device
5511   @type device: L{objects.Disk}
5512   @param device: the device to create
5513   @type force_create: boolean
5514   @param force_create: whether to force creation of this device; this
5515       will be change to True whenever we find a device which has
5516       CreateOnSecondary() attribute
5517   @param info: the extra 'metadata' we should attach to the device
5518       (this will be represented as a LVM tag)
5519   @type force_open: boolean
5520   @param force_open: this parameter will be passes to the
5521       L{backend.BlockdevCreate} function where it specifies
5522       whether we run on primary or not, and it affects both
5523       the child assembly and the device own Open() execution
5524
5525   """
5526   if device.CreateOnSecondary():
5527     force_create = True
5528
5529   if device.children:
5530     for child in device.children:
5531       _CreateBlockDev(lu, node, instance, child, force_create,
5532                       info, force_open)
5533
5534   if not force_create:
5535     return
5536
5537   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5538
5539
5540 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5541   """Create a single block device on a given node.
5542
5543   This will not recurse over children of the device, so they must be
5544   created in advance.
5545
5546   @param lu: the lu on whose behalf we execute
5547   @param node: the node on which to create the device
5548   @type instance: L{objects.Instance}
5549   @param instance: the instance which owns the device
5550   @type device: L{objects.Disk}
5551   @param device: the device to create
5552   @param info: the extra 'metadata' we should attach to the device
5553       (this will be represented as a LVM tag)
5554   @type force_open: boolean
5555   @param force_open: this parameter will be passes to the
5556       L{backend.BlockdevCreate} function where it specifies
5557       whether we run on primary or not, and it affects both
5558       the child assembly and the device own Open() execution
5559
5560   """
5561   lu.cfg.SetDiskID(device, node)
5562   result = lu.rpc.call_blockdev_create(node, device, device.size,
5563                                        instance.name, force_open, info)
5564   result.Raise("Can't create block device %s on"
5565                " node %s for instance %s" % (device, node, instance.name))
5566   if device.physical_id is None:
5567     device.physical_id = result.payload
5568
5569
5570 def _GenerateUniqueNames(lu, exts):
5571   """Generate a suitable LV name.
5572
5573   This will generate a logical volume name for the given instance.
5574
5575   """
5576   results = []
5577   for val in exts:
5578     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5579     results.append("%s%s" % (new_id, val))
5580   return results
5581
5582
5583 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5584                          p_minor, s_minor):
5585   """Generate a drbd8 device complete with its children.
5586
5587   """
5588   port = lu.cfg.AllocatePort()
5589   vgname = lu.cfg.GetVGName()
5590   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5591   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5592                           logical_id=(vgname, names[0]))
5593   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5594                           logical_id=(vgname, names[1]))
5595   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5596                           logical_id=(primary, secondary, port,
5597                                       p_minor, s_minor,
5598                                       shared_secret),
5599                           children=[dev_data, dev_meta],
5600                           iv_name=iv_name)
5601   return drbd_dev
5602
5603
5604 def _GenerateDiskTemplate(lu, template_name,
5605                           instance_name, primary_node,
5606                           secondary_nodes, disk_info,
5607                           file_storage_dir, file_driver,
5608                           base_index):
5609   """Generate the entire disk layout for a given template type.
5610
5611   """
5612   #TODO: compute space requirements
5613
5614   vgname = lu.cfg.GetVGName()
5615   disk_count = len(disk_info)
5616   disks = []
5617   if template_name == constants.DT_DISKLESS:
5618     pass
5619   elif template_name == constants.DT_PLAIN:
5620     if len(secondary_nodes) != 0:
5621       raise errors.ProgrammerError("Wrong template configuration")
5622
5623     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5624                                       for i in range(disk_count)])
5625     for idx, disk in enumerate(disk_info):
5626       disk_index = idx + base_index
5627       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5628                               logical_id=(vgname, names[idx]),
5629                               iv_name="disk/%d" % disk_index,
5630                               mode=disk["mode"])
5631       disks.append(disk_dev)
5632   elif template_name == constants.DT_DRBD8:
5633     if len(secondary_nodes) != 1:
5634       raise errors.ProgrammerError("Wrong template configuration")
5635     remote_node = secondary_nodes[0]
5636     minors = lu.cfg.AllocateDRBDMinor(
5637       [primary_node, remote_node] * len(disk_info), instance_name)
5638
5639     names = []
5640     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5641                                                for i in range(disk_count)]):
5642       names.append(lv_prefix + "_data")
5643       names.append(lv_prefix + "_meta")
5644     for idx, disk in enumerate(disk_info):
5645       disk_index = idx + base_index
5646       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5647                                       disk["size"], names[idx*2:idx*2+2],
5648                                       "disk/%d" % disk_index,
5649                                       minors[idx*2], minors[idx*2+1])
5650       disk_dev.mode = disk["mode"]
5651       disks.append(disk_dev)
5652   elif template_name == constants.DT_FILE:
5653     if len(secondary_nodes) != 0:
5654       raise errors.ProgrammerError("Wrong template configuration")
5655
5656     for idx, disk in enumerate(disk_info):
5657       disk_index = idx + base_index
5658       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5659                               iv_name="disk/%d" % disk_index,
5660                               logical_id=(file_driver,
5661                                           "%s/disk%d" % (file_storage_dir,
5662                                                          disk_index)),
5663                               mode=disk["mode"])
5664       disks.append(disk_dev)
5665   else:
5666     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5667   return disks
5668
5669
5670 def _GetInstanceInfoText(instance):
5671   """Compute that text that should be added to the disk's metadata.
5672
5673   """
5674   return "originstname+%s" % instance.name
5675
5676
5677 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5678   """Create all disks for an instance.
5679
5680   This abstracts away some work from AddInstance.
5681
5682   @type lu: L{LogicalUnit}
5683   @param lu: the logical unit on whose behalf we execute
5684   @type instance: L{objects.Instance}
5685   @param instance: the instance whose disks we should create
5686   @type to_skip: list
5687   @param to_skip: list of indices to skip
5688   @type target_node: string
5689   @param target_node: if passed, overrides the target node for creation
5690   @rtype: boolean
5691   @return: the success of the creation
5692
5693   """
5694   info = _GetInstanceInfoText(instance)
5695   if target_node is None:
5696     pnode = instance.primary_node
5697     all_nodes = instance.all_nodes
5698   else:
5699     pnode = target_node
5700     all_nodes = [pnode]
5701
5702   if instance.disk_template == constants.DT_FILE:
5703     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5704     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5705
5706     result.Raise("Failed to create directory '%s' on"
5707                  " node %s" % (file_storage_dir, pnode))
5708
5709   # Note: this needs to be kept in sync with adding of disks in
5710   # LUSetInstanceParams
5711   for idx, device in enumerate(instance.disks):
5712     if to_skip and idx in to_skip:
5713       continue
5714     logging.info("Creating volume %s for instance %s",
5715                  device.iv_name, instance.name)
5716     #HARDCODE
5717     for node in all_nodes:
5718       f_create = node == pnode
5719       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5720
5721
5722 def _RemoveDisks(lu, instance, target_node=None):
5723   """Remove all disks for an instance.
5724
5725   This abstracts away some work from `AddInstance()` and
5726   `RemoveInstance()`. Note that in case some of the devices couldn't
5727   be removed, the removal will continue with the other ones (compare
5728   with `_CreateDisks()`).
5729
5730   @type lu: L{LogicalUnit}
5731   @param lu: the logical unit on whose behalf we execute
5732   @type instance: L{objects.Instance}
5733   @param instance: the instance whose disks we should remove
5734   @type target_node: string
5735   @param target_node: used to override the node on which to remove the disks
5736   @rtype: boolean
5737   @return: the success of the removal
5738
5739   """
5740   logging.info("Removing block devices for instance %s", instance.name)
5741
5742   all_result = True
5743   for device in instance.disks:
5744     if target_node:
5745       edata = [(target_node, device)]
5746     else:
5747       edata = device.ComputeNodeTree(instance.primary_node)
5748     for node, disk in edata:
5749       lu.cfg.SetDiskID(disk, node)
5750       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5751       if msg:
5752         lu.LogWarning("Could not remove block device %s on node %s,"
5753                       " continuing anyway: %s", device.iv_name, node, msg)
5754         all_result = False
5755
5756   if instance.disk_template == constants.DT_FILE:
5757     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5758     if target_node:
5759       tgt = target_node
5760     else:
5761       tgt = instance.primary_node
5762     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5763     if result.fail_msg:
5764       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5765                     file_storage_dir, instance.primary_node, result.fail_msg)
5766       all_result = False
5767
5768   return all_result
5769
5770
5771 def _ComputeDiskSize(disk_template, disks):
5772   """Compute disk size requirements in the volume group
5773
5774   """
5775   # Required free disk space as a function of disk and swap space
5776   req_size_dict = {
5777     constants.DT_DISKLESS: None,
5778     constants.DT_PLAIN: sum(d["size"] for d in disks),
5779     # 128 MB are added for drbd metadata for each disk
5780     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5781     constants.DT_FILE: None,
5782   }
5783
5784   if disk_template not in req_size_dict:
5785     raise errors.ProgrammerError("Disk template '%s' size requirement"
5786                                  " is unknown" %  disk_template)
5787
5788   return req_size_dict[disk_template]
5789
5790
5791 def _CheckHVParams(lu, nodenames, hvname, hvparams):
5792   """Hypervisor parameter validation.
5793
5794   This function abstract the hypervisor parameter validation to be
5795   used in both instance create and instance modify.
5796
5797   @type lu: L{LogicalUnit}
5798   @param lu: the logical unit for which we check
5799   @type nodenames: list
5800   @param nodenames: the list of nodes on which we should check
5801   @type hvname: string
5802   @param hvname: the name of the hypervisor we should use
5803   @type hvparams: dict
5804   @param hvparams: the parameters which we need to check
5805   @raise errors.OpPrereqError: if the parameters are not valid
5806
5807   """
5808   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5809                                                   hvname,
5810                                                   hvparams)
5811   for node in nodenames:
5812     info = hvinfo[node]
5813     if info.offline:
5814       continue
5815     info.Raise("Hypervisor parameter validation failed on node %s" % node)
5816
5817
5818 class LUCreateInstance(LogicalUnit):
5819   """Create an instance.
5820
5821   """
5822   HPATH = "instance-add"
5823   HTYPE = constants.HTYPE_INSTANCE
5824   _OP_REQP = ["instance_name", "disks", "disk_template",
5825               "mode", "start",
5826               "wait_for_sync", "ip_check", "nics",
5827               "hvparams", "beparams"]
5828   REQ_BGL = False
5829
5830   def CheckArguments(self):
5831     """Check arguments.
5832
5833     """
5834     # set optional parameters to none if they don't exist
5835     for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5836       if not hasattr(self.op, attr):
5837         setattr(self.op, attr, None)
5838
5839     # do not require name_check to ease forward/backward compatibility
5840     # for tools
5841     if not hasattr(self.op, "name_check"):
5842       self.op.name_check = True
5843     # validate/normalize the instance name
5844     self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
5845     if self.op.ip_check and not self.op.name_check:
5846       # TODO: make the ip check more flexible and not depend on the name check
5847       raise errors.OpPrereqError("Cannot do ip checks without a name check",
5848                                  errors.ECODE_INVAL)
5849     if (self.op.disk_template == constants.DT_FILE and
5850         not constants.ENABLE_FILE_STORAGE):
5851       raise errors.OpPrereqError("File storage disabled at configure time",
5852                                  errors.ECODE_INVAL)
5853     # check disk information: either all adopt, or no adopt
5854     has_adopt = has_no_adopt = False
5855     for disk in self.op.disks:
5856       if "adopt" in disk:
5857         has_adopt = True
5858       else:
5859         has_no_adopt = True
5860     if has_adopt and has_no_adopt:
5861       raise errors.OpPrereqError("Either all disks have are adoped or none is",
5862                                  errors.ECODE_INVAL)
5863     if has_adopt:
5864       if self.op.disk_template != constants.DT_PLAIN:
5865         raise errors.OpPrereqError("Disk adoption is only supported for the"
5866                                    " 'plain' disk template",
5867                                    errors.ECODE_INVAL)
5868       if self.op.iallocator is not None:
5869         raise errors.OpPrereqError("Disk adoption not allowed with an"
5870                                    " iallocator script", errors.ECODE_INVAL)
5871       if self.op.mode == constants.INSTANCE_IMPORT:
5872         raise errors.OpPrereqError("Disk adoption not allowed for"
5873                                    " instance import", errors.ECODE_INVAL)
5874
5875     self.adopt_disks = has_adopt
5876
5877   def ExpandNames(self):
5878     """ExpandNames for CreateInstance.
5879
5880     Figure out the right locks for instance creation.
5881
5882     """
5883     self.needed_locks = {}
5884
5885     # cheap checks, mostly valid constants given
5886
5887     # verify creation mode
5888     if self.op.mode not in (constants.INSTANCE_CREATE,
5889                             constants.INSTANCE_IMPORT):
5890       raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5891                                  self.op.mode, errors.ECODE_INVAL)
5892
5893     # disk template and mirror node verification
5894     _CheckDiskTemplate(self.op.disk_template)
5895
5896     if self.op.hypervisor is None:
5897       self.op.hypervisor = self.cfg.GetHypervisorType()
5898
5899     cluster = self.cfg.GetClusterInfo()
5900     enabled_hvs = cluster.enabled_hypervisors
5901     if self.op.hypervisor not in enabled_hvs:
5902       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5903                                  " cluster (%s)" % (self.op.hypervisor,
5904                                   ",".join(enabled_hvs)),
5905                                  errors.ECODE_STATE)
5906
5907     # check hypervisor parameter syntax (locally)
5908     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5909     filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5910                                   self.op.hvparams)
5911     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5912     hv_type.CheckParameterSyntax(filled_hvp)
5913     self.hv_full = filled_hvp
5914     # check that we don't specify global parameters on an instance
5915     _CheckGlobalHvParams(self.op.hvparams)
5916
5917     # fill and remember the beparams dict
5918     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5919     self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5920                                     self.op.beparams)
5921
5922     #### instance parameters check
5923
5924     # instance name verification
5925     if self.op.name_check:
5926       hostname1 = utils.GetHostInfo(self.op.instance_name)
5927       self.op.instance_name = instance_name = hostname1.name
5928       # used in CheckPrereq for ip ping check
5929       self.check_ip = hostname1.ip
5930     else:
5931       instance_name = self.op.instance_name
5932       self.check_ip = None
5933
5934     # this is just a preventive check, but someone might still add this
5935     # instance in the meantime, and creation will fail at lock-add time
5936     if instance_name in self.cfg.GetInstanceList():
5937       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5938                                  instance_name, errors.ECODE_EXISTS)
5939
5940     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5941
5942     # NIC buildup
5943     self.nics = []
5944     for idx, nic in enumerate(self.op.nics):
5945       nic_mode_req = nic.get("mode", None)
5946       nic_mode = nic_mode_req
5947       if nic_mode is None:
5948         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5949
5950       # in routed mode, for the first nic, the default ip is 'auto'
5951       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5952         default_ip_mode = constants.VALUE_AUTO
5953       else:
5954         default_ip_mode = constants.VALUE_NONE
5955
5956       # ip validity checks
5957       ip = nic.get("ip", default_ip_mode)
5958       if ip is None or ip.lower() == constants.VALUE_NONE:
5959         nic_ip = None
5960       elif ip.lower() == constants.VALUE_AUTO:
5961         if not self.op.name_check:
5962           raise errors.OpPrereqError("IP address set to auto but name checks"
5963                                      " have been skipped. Aborting.",
5964                                      errors.ECODE_INVAL)
5965         nic_ip = hostname1.ip
5966       else:
5967         if not utils.IsValidIP(ip):
5968           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5969                                      " like a valid IP" % ip,
5970                                      errors.ECODE_INVAL)
5971         nic_ip = ip
5972
5973       # TODO: check the ip address for uniqueness
5974       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5975         raise errors.OpPrereqError("Routed nic mode requires an ip address",
5976                                    errors.ECODE_INVAL)
5977
5978       # MAC address verification
5979       mac = nic.get("mac", constants.VALUE_AUTO)
5980       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5981         mac = utils.NormalizeAndValidateMac(mac)
5982
5983         try:
5984           self.cfg.ReserveMAC(mac, self.proc.GetECId())
5985         except errors.ReservationError:
5986           raise errors.OpPrereqError("MAC address %s already in use"
5987                                      " in cluster" % mac,
5988                                      errors.ECODE_NOTUNIQUE)
5989
5990       # bridge verification
5991       bridge = nic.get("bridge", None)
5992       link = nic.get("link", None)
5993       if bridge and link:
5994         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5995                                    " at the same time", errors.ECODE_INVAL)
5996       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5997         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
5998                                    errors.ECODE_INVAL)
5999       elif bridge:
6000         link = bridge
6001
6002       nicparams = {}
6003       if nic_mode_req:
6004         nicparams[constants.NIC_MODE] = nic_mode_req
6005       if link:
6006         nicparams[constants.NIC_LINK] = link
6007
6008       check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6009                                       nicparams)
6010       objects.NIC.CheckParameterSyntax(check_params)
6011       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6012
6013     # disk checks/pre-build
6014     self.disks = []
6015     for disk in self.op.disks:
6016       mode = disk.get("mode", constants.DISK_RDWR)
6017       if mode not in constants.DISK_ACCESS_SET:
6018         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6019                                    mode, errors.ECODE_INVAL)
6020       size = disk.get("size", None)
6021       if size is None:
6022         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6023       try:
6024         size = int(size)
6025       except (TypeError, ValueError):
6026         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6027                                    errors.ECODE_INVAL)
6028       new_disk = {"size": size, "mode": mode}
6029       if "adopt" in disk:
6030         new_disk["adopt"] = disk["adopt"]
6031       self.disks.append(new_disk)
6032
6033     # file storage checks
6034     if (self.op.file_driver and
6035         not self.op.file_driver in constants.FILE_DRIVER):
6036       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6037                                  self.op.file_driver, errors.ECODE_INVAL)
6038
6039     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6040       raise errors.OpPrereqError("File storage directory path not absolute",
6041                                  errors.ECODE_INVAL)
6042
6043     ### Node/iallocator related checks
6044     if [self.op.iallocator, self.op.pnode].count(None) != 1:
6045       raise errors.OpPrereqError("One and only one of iallocator and primary"
6046                                  " node must be given",
6047                                  errors.ECODE_INVAL)
6048
6049     if self.op.iallocator:
6050       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6051     else:
6052       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6053       nodelist = [self.op.pnode]
6054       if self.op.snode is not None:
6055         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6056         nodelist.append(self.op.snode)
6057       self.needed_locks[locking.LEVEL_NODE] = nodelist
6058
6059     # in case of import lock the source node too
6060     if self.op.mode == constants.INSTANCE_IMPORT:
6061       src_node = getattr(self.op, "src_node", None)
6062       src_path = getattr(self.op, "src_path", None)
6063
6064       if src_path is None:
6065         self.op.src_path = src_path = self.op.instance_name
6066
6067       if src_node is None:
6068         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6069         self.op.src_node = None
6070         if os.path.isabs(src_path):
6071           raise errors.OpPrereqError("Importing an instance from an absolute"
6072                                      " path requires a source node option.",
6073                                      errors.ECODE_INVAL)
6074       else:
6075         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6076         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6077           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6078         if not os.path.isabs(src_path):
6079           self.op.src_path = src_path = \
6080             utils.PathJoin(constants.EXPORT_DIR, src_path)
6081
6082       # On import force_variant must be True, because if we forced it at
6083       # initial install, our only chance when importing it back is that it
6084       # works again!
6085       self.op.force_variant = True
6086
6087     else: # INSTANCE_CREATE
6088       if getattr(self.op, "os_type", None) is None:
6089         raise errors.OpPrereqError("No guest OS specified",
6090                                    errors.ECODE_INVAL)
6091       self.op.force_variant = getattr(self.op, "force_variant", False)
6092
6093   def _RunAllocator(self):
6094     """Run the allocator based on input opcode.
6095
6096     """
6097     nics = [n.ToDict() for n in self.nics]
6098     ial = IAllocator(self.cfg, self.rpc,
6099                      mode=constants.IALLOCATOR_MODE_ALLOC,
6100                      name=self.op.instance_name,
6101                      disk_template=self.op.disk_template,
6102                      tags=[],
6103                      os=self.op.os_type,
6104                      vcpus=self.be_full[constants.BE_VCPUS],
6105                      mem_size=self.be_full[constants.BE_MEMORY],
6106                      disks=self.disks,
6107                      nics=nics,
6108                      hypervisor=self.op.hypervisor,
6109                      )
6110
6111     ial.Run(self.op.iallocator)
6112
6113     if not ial.success:
6114       raise errors.OpPrereqError("Can't compute nodes using"
6115                                  " iallocator '%s': %s" %
6116                                  (self.op.iallocator, ial.info),
6117                                  errors.ECODE_NORES)
6118     if len(ial.result) != ial.required_nodes:
6119       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6120                                  " of nodes (%s), required %s" %
6121                                  (self.op.iallocator, len(ial.result),
6122                                   ial.required_nodes), errors.ECODE_FAULT)
6123     self.op.pnode = ial.result[0]
6124     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6125                  self.op.instance_name, self.op.iallocator,
6126                  utils.CommaJoin(ial.result))
6127     if ial.required_nodes == 2:
6128       self.op.snode = ial.result[1]
6129
6130   def BuildHooksEnv(self):
6131     """Build hooks env.
6132
6133     This runs on master, primary and secondary nodes of the instance.
6134
6135     """
6136     env = {
6137       "ADD_MODE": self.op.mode,
6138       }
6139     if self.op.mode == constants.INSTANCE_IMPORT:
6140       env["SRC_NODE"] = self.op.src_node
6141       env["SRC_PATH"] = self.op.src_path
6142       env["SRC_IMAGES"] = self.src_images
6143
6144     env.update(_BuildInstanceHookEnv(
6145       name=self.op.instance_name,
6146       primary_node=self.op.pnode,
6147       secondary_nodes=self.secondaries,
6148       status=self.op.start,
6149       os_type=self.op.os_type,
6150       memory=self.be_full[constants.BE_MEMORY],
6151       vcpus=self.be_full[constants.BE_VCPUS],
6152       nics=_NICListToTuple(self, self.nics),
6153       disk_template=self.op.disk_template,
6154       disks=[(d["size"], d["mode"]) for d in self.disks],
6155       bep=self.be_full,
6156       hvp=self.hv_full,
6157       hypervisor_name=self.op.hypervisor,
6158     ))
6159
6160     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6161           self.secondaries)
6162     return env, nl, nl
6163
6164
6165   def CheckPrereq(self):
6166     """Check prerequisites.
6167
6168     """
6169     if (not self.cfg.GetVGName() and
6170         self.op.disk_template not in constants.DTS_NOT_LVM):
6171       raise errors.OpPrereqError("Cluster does not support lvm-based"
6172                                  " instances", errors.ECODE_STATE)
6173
6174     if self.op.mode == constants.INSTANCE_IMPORT:
6175       src_node = self.op.src_node
6176       src_path = self.op.src_path
6177
6178       if src_node is None:
6179         locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6180         exp_list = self.rpc.call_export_list(locked_nodes)
6181         found = False
6182         for node in exp_list:
6183           if exp_list[node].fail_msg:
6184             continue
6185           if src_path in exp_list[node].payload:
6186             found = True
6187             self.op.src_node = src_node = node
6188             self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6189                                                          src_path)
6190             break
6191         if not found:
6192           raise errors.OpPrereqError("No export found for relative path %s" %
6193                                       src_path, errors.ECODE_INVAL)
6194
6195       _CheckNodeOnline(self, src_node)
6196       result = self.rpc.call_export_info(src_node, src_path)
6197       result.Raise("No export or invalid export found in dir %s" % src_path)
6198
6199       export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6200       if not export_info.has_section(constants.INISECT_EXP):
6201         raise errors.ProgrammerError("Corrupted export config",
6202                                      errors.ECODE_ENVIRON)
6203
6204       ei_version = export_info.get(constants.INISECT_EXP, 'version')
6205       if (int(ei_version) != constants.EXPORT_VERSION):
6206         raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6207                                    (ei_version, constants.EXPORT_VERSION),
6208                                    errors.ECODE_ENVIRON)
6209
6210       # Check that the new instance doesn't have less disks than the export
6211       instance_disks = len(self.disks)
6212       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6213       if instance_disks < export_disks:
6214         raise errors.OpPrereqError("Not enough disks to import."
6215                                    " (instance: %d, export: %d)" %
6216                                    (instance_disks, export_disks),
6217                                    errors.ECODE_INVAL)
6218
6219       self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6220       disk_images = []
6221       for idx in range(export_disks):
6222         option = 'disk%d_dump' % idx
6223         if export_info.has_option(constants.INISECT_INS, option):
6224           # FIXME: are the old os-es, disk sizes, etc. useful?
6225           export_name = export_info.get(constants.INISECT_INS, option)
6226           image = utils.PathJoin(src_path, export_name)
6227           disk_images.append(image)
6228         else:
6229           disk_images.append(False)
6230
6231       self.src_images = disk_images
6232
6233       old_name = export_info.get(constants.INISECT_INS, 'name')
6234       # FIXME: int() here could throw a ValueError on broken exports
6235       exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
6236       if self.op.instance_name == old_name:
6237         for idx, nic in enumerate(self.nics):
6238           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6239             nic_mac_ini = 'nic%d_mac' % idx
6240             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6241
6242     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6243
6244     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6245     if self.op.ip_check:
6246       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6247         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6248                                    (self.check_ip, self.op.instance_name),
6249                                    errors.ECODE_NOTUNIQUE)
6250
6251     #### mac address generation
6252     # By generating here the mac address both the allocator and the hooks get
6253     # the real final mac address rather than the 'auto' or 'generate' value.
6254     # There is a race condition between the generation and the instance object
6255     # creation, which means that we know the mac is valid now, but we're not
6256     # sure it will be when we actually add the instance. If things go bad
6257     # adding the instance will abort because of a duplicate mac, and the
6258     # creation job will fail.
6259     for nic in self.nics:
6260       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6261         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6262
6263     #### allocator run
6264
6265     if self.op.iallocator is not None:
6266       self._RunAllocator()
6267
6268     #### node related checks
6269
6270     # check primary node
6271     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6272     assert self.pnode is not None, \
6273       "Cannot retrieve locked node %s" % self.op.pnode
6274     if pnode.offline:
6275       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6276                                  pnode.name, errors.ECODE_STATE)
6277     if pnode.drained:
6278       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6279                                  pnode.name, errors.ECODE_STATE)
6280
6281     self.secondaries = []
6282
6283     # mirror node verification
6284     if self.op.disk_template in constants.DTS_NET_MIRROR:
6285       if self.op.snode is None:
6286         raise errors.OpPrereqError("The networked disk templates need"
6287                                    " a mirror node", errors.ECODE_INVAL)
6288       if self.op.snode == pnode.name:
6289         raise errors.OpPrereqError("The secondary node cannot be the"
6290                                    " primary node.", errors.ECODE_INVAL)
6291       _CheckNodeOnline(self, self.op.snode)
6292       _CheckNodeNotDrained(self, self.op.snode)
6293       self.secondaries.append(self.op.snode)
6294
6295     nodenames = [pnode.name] + self.secondaries
6296
6297     req_size = _ComputeDiskSize(self.op.disk_template,
6298                                 self.disks)
6299
6300     # Check lv size requirements, if not adopting
6301     if req_size is not None and not self.adopt_disks:
6302       _CheckNodesFreeDisk(self, nodenames, req_size)
6303
6304     if self.adopt_disks: # instead, we must check the adoption data
6305       all_lvs = set([i["adopt"] for i in self.disks])
6306       if len(all_lvs) != len(self.disks):
6307         raise errors.OpPrereqError("Duplicate volume names given for adoption",
6308                                    errors.ECODE_INVAL)
6309       for lv_name in all_lvs:
6310         try:
6311           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6312         except errors.ReservationError:
6313           raise errors.OpPrereqError("LV named %s used by another instance" %
6314                                      lv_name, errors.ECODE_NOTUNIQUE)
6315
6316       node_lvs = self.rpc.call_lv_list([pnode.name],
6317                                        self.cfg.GetVGName())[pnode.name]
6318       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6319       node_lvs = node_lvs.payload
6320       delta = all_lvs.difference(node_lvs.keys())
6321       if delta:
6322         raise errors.OpPrereqError("Missing logical volume(s): %s" %
6323                                    utils.CommaJoin(delta),
6324                                    errors.ECODE_INVAL)
6325       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6326       if online_lvs:
6327         raise errors.OpPrereqError("Online logical volumes found, cannot"
6328                                    " adopt: %s" % utils.CommaJoin(online_lvs),
6329                                    errors.ECODE_STATE)
6330       # update the size of disk based on what is found
6331       for dsk in self.disks:
6332         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6333
6334     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6335
6336     # os verification
6337     result = self.rpc.call_os_get(pnode.name, self.op.os_type)
6338     result.Raise("OS '%s' not in supported os list for primary node %s" %
6339                  (self.op.os_type, pnode.name),
6340                  prereq=True, ecode=errors.ECODE_INVAL)
6341     if not self.op.force_variant:
6342       _CheckOSVariant(result.payload, self.op.os_type)
6343
6344     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6345
6346     # memory check on primary node
6347     if self.op.start:
6348       _CheckNodeFreeMemory(self, self.pnode.name,
6349                            "creating instance %s" % self.op.instance_name,
6350                            self.be_full[constants.BE_MEMORY],
6351                            self.op.hypervisor)
6352
6353     self.dry_run_result = list(nodenames)
6354
6355   def Exec(self, feedback_fn):
6356     """Create and add the instance to the cluster.
6357
6358     """
6359     instance = self.op.instance_name
6360     pnode_name = self.pnode.name
6361
6362     ht_kind = self.op.hypervisor
6363     if ht_kind in constants.HTS_REQ_PORT:
6364       network_port = self.cfg.AllocatePort()
6365     else:
6366       network_port = None
6367
6368     ##if self.op.vnc_bind_address is None:
6369     ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6370
6371     # this is needed because os.path.join does not accept None arguments
6372     if self.op.file_storage_dir is None:
6373       string_file_storage_dir = ""
6374     else:
6375       string_file_storage_dir = self.op.file_storage_dir
6376
6377     # build the full file storage dir path
6378     file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6379                                       string_file_storage_dir, instance)
6380
6381
6382     disks = _GenerateDiskTemplate(self,
6383                                   self.op.disk_template,
6384                                   instance, pnode_name,
6385                                   self.secondaries,
6386                                   self.disks,
6387                                   file_storage_dir,
6388                                   self.op.file_driver,
6389                                   0)
6390
6391     iobj = objects.Instance(name=instance, os=self.op.os_type,
6392                             primary_node=pnode_name,
6393                             nics=self.nics, disks=disks,
6394                             disk_template=self.op.disk_template,
6395                             admin_up=False,
6396                             network_port=network_port,
6397                             beparams=self.op.beparams,
6398                             hvparams=self.op.hvparams,
6399                             hypervisor=self.op.hypervisor,
6400                             )
6401
6402     if self.adopt_disks:
6403       # rename LVs to the newly-generated names; we need to construct
6404       # 'fake' LV disks with the old data, plus the new unique_id
6405       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6406       rename_to = []
6407       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6408         rename_to.append(t_dsk.logical_id)
6409         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6410         self.cfg.SetDiskID(t_dsk, pnode_name)
6411       result = self.rpc.call_blockdev_rename(pnode_name,
6412                                              zip(tmp_disks, rename_to))
6413       result.Raise("Failed to rename adoped LVs")
6414     else:
6415       feedback_fn("* creating instance disks...")
6416       try:
6417         _CreateDisks(self, iobj)
6418       except errors.OpExecError:
6419         self.LogWarning("Device creation failed, reverting...")
6420         try:
6421           _RemoveDisks(self, iobj)
6422         finally:
6423           self.cfg.ReleaseDRBDMinors(instance)
6424           raise
6425
6426     feedback_fn("adding instance %s to cluster config" % instance)
6427
6428     self.cfg.AddInstance(iobj, self.proc.GetECId())
6429
6430     # Declare that we don't want to remove the instance lock anymore, as we've
6431     # added the instance to the config
6432     del self.remove_locks[locking.LEVEL_INSTANCE]
6433     # Unlock all the nodes
6434     if self.op.mode == constants.INSTANCE_IMPORT:
6435       nodes_keep = [self.op.src_node]
6436       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6437                        if node != self.op.src_node]
6438       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6439       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6440     else:
6441       self.context.glm.release(locking.LEVEL_NODE)
6442       del self.acquired_locks[locking.LEVEL_NODE]
6443
6444     if self.op.wait_for_sync:
6445       disk_abort = not _WaitForSync(self, iobj)
6446     elif iobj.disk_template in constants.DTS_NET_MIRROR:
6447       # make sure the disks are not degraded (still sync-ing is ok)
6448       time.sleep(15)
6449       feedback_fn("* checking mirrors status")
6450       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6451     else:
6452       disk_abort = False
6453
6454     if disk_abort:
6455       _RemoveDisks(self, iobj)
6456       self.cfg.RemoveInstance(iobj.name)
6457       # Make sure the instance lock gets removed
6458       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6459       raise errors.OpExecError("There are some degraded disks for"
6460                                " this instance")
6461
6462     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6463       if self.op.mode == constants.INSTANCE_CREATE:
6464         feedback_fn("* running the instance OS create scripts...")
6465         # FIXME: pass debug option from opcode to backend
6466         result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6467                                                self.op.debug_level)
6468         result.Raise("Could not add os for instance %s"
6469                      " on node %s" % (instance, pnode_name))
6470
6471       elif self.op.mode == constants.INSTANCE_IMPORT:
6472         feedback_fn("* running the instance OS import scripts...")
6473         src_node = self.op.src_node
6474         src_images = self.src_images
6475         cluster_name = self.cfg.GetClusterName()
6476         # FIXME: pass debug option from opcode to backend
6477         import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6478                                                          src_node, src_images,
6479                                                          cluster_name,
6480                                                          self.op.debug_level)
6481         msg = import_result.fail_msg
6482         if msg:
6483           self.LogWarning("Error while importing the disk images for instance"
6484                           " %s on node %s: %s" % (instance, pnode_name, msg))
6485       else:
6486         # also checked in the prereq part
6487         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6488                                      % self.op.mode)
6489
6490     if self.op.start:
6491       iobj.admin_up = True
6492       self.cfg.Update(iobj, feedback_fn)
6493       logging.info("Starting instance %s on node %s", instance, pnode_name)
6494       feedback_fn("* starting instance...")
6495       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6496       result.Raise("Could not start instance")
6497
6498     return list(iobj.all_nodes)
6499
6500
6501 class LUConnectConsole(NoHooksLU):
6502   """Connect to an instance's console.
6503
6504   This is somewhat special in that it returns the command line that
6505   you need to run on the master node in order to connect to the
6506   console.
6507
6508   """
6509   _OP_REQP = ["instance_name"]
6510   REQ_BGL = False
6511
6512   def ExpandNames(self):
6513     self._ExpandAndLockInstance()
6514
6515   def CheckPrereq(self):
6516     """Check prerequisites.
6517
6518     This checks that the instance is in the cluster.
6519
6520     """
6521     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6522     assert self.instance is not None, \
6523       "Cannot retrieve locked instance %s" % self.op.instance_name
6524     _CheckNodeOnline(self, self.instance.primary_node)
6525
6526   def Exec(self, feedback_fn):
6527     """Connect to the console of an instance
6528
6529     """
6530     instance = self.instance
6531     node = instance.primary_node
6532
6533     node_insts = self.rpc.call_instance_list([node],
6534                                              [instance.hypervisor])[node]
6535     node_insts.Raise("Can't get node information from %s" % node)
6536
6537     if instance.name not in node_insts.payload:
6538       raise errors.OpExecError("Instance %s is not running." % instance.name)
6539
6540     logging.debug("Connecting to console of %s on %s", instance.name, node)
6541
6542     hyper = hypervisor.GetHypervisor(instance.hypervisor)
6543     cluster = self.cfg.GetClusterInfo()
6544     # beparams and hvparams are passed separately, to avoid editing the
6545     # instance and then saving the defaults in the instance itself.
6546     hvparams = cluster.FillHV(instance)
6547     beparams = cluster.FillBE(instance)
6548     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6549
6550     # build ssh cmdline
6551     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6552
6553
6554 class LUReplaceDisks(LogicalUnit):
6555   """Replace the disks of an instance.
6556
6557   """
6558   HPATH = "mirrors-replace"
6559   HTYPE = constants.HTYPE_INSTANCE
6560   _OP_REQP = ["instance_name", "mode", "disks"]
6561   REQ_BGL = False
6562
6563   def CheckArguments(self):
6564     if not hasattr(self.op, "remote_node"):
6565       self.op.remote_node = None
6566     if not hasattr(self.op, "iallocator"):
6567       self.op.iallocator = None
6568     if not hasattr(self.op, "early_release"):
6569       self.op.early_release = False
6570
6571     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6572                                   self.op.iallocator)
6573
6574   def ExpandNames(self):
6575     self._ExpandAndLockInstance()
6576
6577     if self.op.iallocator is not None:
6578       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6579
6580     elif self.op.remote_node is not None:
6581       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6582       self.op.remote_node = remote_node
6583
6584       # Warning: do not remove the locking of the new secondary here
6585       # unless DRBD8.AddChildren is changed to work in parallel;
6586       # currently it doesn't since parallel invocations of
6587       # FindUnusedMinor will conflict
6588       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6589       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6590
6591     else:
6592       self.needed_locks[locking.LEVEL_NODE] = []
6593       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6594
6595     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6596                                    self.op.iallocator, self.op.remote_node,
6597                                    self.op.disks, False, self.op.early_release)
6598
6599     self.tasklets = [self.replacer]
6600
6601   def DeclareLocks(self, level):
6602     # If we're not already locking all nodes in the set we have to declare the
6603     # instance's primary/secondary nodes.
6604     if (level == locking.LEVEL_NODE and
6605         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6606       self._LockInstancesNodes()
6607
6608   def BuildHooksEnv(self):
6609     """Build hooks env.
6610
6611     This runs on the master, the primary and all the secondaries.
6612
6613     """
6614     instance = self.replacer.instance
6615     env = {
6616       "MODE": self.op.mode,
6617       "NEW_SECONDARY": self.op.remote_node,
6618       "OLD_SECONDARY": instance.secondary_nodes[0],
6619       }
6620     env.update(_BuildInstanceHookEnvByObject(self, instance))
6621     nl = [
6622       self.cfg.GetMasterNode(),
6623       instance.primary_node,
6624       ]
6625     if self.op.remote_node is not None:
6626       nl.append(self.op.remote_node)
6627     return env, nl, nl
6628
6629
6630 class LUEvacuateNode(LogicalUnit):
6631   """Relocate the secondary instances from a node.
6632
6633   """
6634   HPATH = "node-evacuate"
6635   HTYPE = constants.HTYPE_NODE
6636   _OP_REQP = ["node_name"]
6637   REQ_BGL = False
6638
6639   def CheckArguments(self):
6640     if not hasattr(self.op, "remote_node"):
6641       self.op.remote_node = None
6642     if not hasattr(self.op, "iallocator"):
6643       self.op.iallocator = None
6644     if not hasattr(self.op, "early_release"):
6645       self.op.early_release = False
6646
6647     TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6648                                   self.op.remote_node,
6649                                   self.op.iallocator)
6650
6651   def ExpandNames(self):
6652     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6653
6654     self.needed_locks = {}
6655
6656     # Declare node locks
6657     if self.op.iallocator is not None:
6658       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6659
6660     elif self.op.remote_node is not None:
6661       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6662
6663       # Warning: do not remove the locking of the new secondary here
6664       # unless DRBD8.AddChildren is changed to work in parallel;
6665       # currently it doesn't since parallel invocations of
6666       # FindUnusedMinor will conflict
6667       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6668       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6669
6670     else:
6671       raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6672
6673     # Create tasklets for replacing disks for all secondary instances on this
6674     # node
6675     names = []
6676     tasklets = []
6677
6678     for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6679       logging.debug("Replacing disks for instance %s", inst.name)
6680       names.append(inst.name)
6681
6682       replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6683                                 self.op.iallocator, self.op.remote_node, [],
6684                                 True, self.op.early_release)
6685       tasklets.append(replacer)
6686
6687     self.tasklets = tasklets
6688     self.instance_names = names
6689
6690     # Declare instance locks
6691     self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6692
6693   def DeclareLocks(self, level):
6694     # If we're not already locking all nodes in the set we have to declare the
6695     # instance's primary/secondary nodes.
6696     if (level == locking.LEVEL_NODE and
6697         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6698       self._LockInstancesNodes()
6699
6700   def BuildHooksEnv(self):
6701     """Build hooks env.
6702
6703     This runs on the master, the primary and all the secondaries.
6704
6705     """
6706     env = {
6707       "NODE_NAME": self.op.node_name,
6708       }
6709
6710     nl = [self.cfg.GetMasterNode()]
6711
6712     if self.op.remote_node is not None:
6713       env["NEW_SECONDARY"] = self.op.remote_node
6714       nl.append(self.op.remote_node)
6715
6716     return (env, nl, nl)
6717
6718
6719 class TLReplaceDisks(Tasklet):
6720   """Replaces disks for an instance.
6721
6722   Note: Locking is not within the scope of this class.
6723
6724   """
6725   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6726                disks, delay_iallocator, early_release):
6727     """Initializes this class.
6728
6729     """
6730     Tasklet.__init__(self, lu)
6731
6732     # Parameters
6733     self.instance_name = instance_name
6734     self.mode = mode
6735     self.iallocator_name = iallocator_name
6736     self.remote_node = remote_node
6737     self.disks = disks
6738     self.delay_iallocator = delay_iallocator
6739     self.early_release = early_release
6740
6741     # Runtime data
6742     self.instance = None
6743     self.new_node = None
6744     self.target_node = None
6745     self.other_node = None
6746     self.remote_node_info = None
6747     self.node_secondary_ip = None
6748
6749   @staticmethod
6750   def CheckArguments(mode, remote_node, iallocator):
6751     """Helper function for users of this class.
6752
6753     """
6754     # check for valid parameter combination
6755     if mode == constants.REPLACE_DISK_CHG:
6756       if remote_node is None and iallocator is None:
6757         raise errors.OpPrereqError("When changing the secondary either an"
6758                                    " iallocator script must be used or the"
6759                                    " new node given", errors.ECODE_INVAL)
6760
6761       if remote_node is not None and iallocator is not None:
6762         raise errors.OpPrereqError("Give either the iallocator or the new"
6763                                    " secondary, not both", errors.ECODE_INVAL)
6764
6765     elif remote_node is not None or iallocator is not None:
6766       # Not replacing the secondary
6767       raise errors.OpPrereqError("The iallocator and new node options can"
6768                                  " only be used when changing the"
6769                                  " secondary node", errors.ECODE_INVAL)
6770
6771   @staticmethod
6772   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6773     """Compute a new secondary node using an IAllocator.
6774
6775     """
6776     ial = IAllocator(lu.cfg, lu.rpc,
6777                      mode=constants.IALLOCATOR_MODE_RELOC,
6778                      name=instance_name,
6779                      relocate_from=relocate_from)
6780
6781     ial.Run(iallocator_name)
6782
6783     if not ial.success:
6784       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6785                                  " %s" % (iallocator_name, ial.info),
6786                                  errors.ECODE_NORES)
6787
6788     if len(ial.result) != ial.required_nodes:
6789       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6790                                  " of nodes (%s), required %s" %
6791                                  (iallocator_name,
6792                                   len(ial.result), ial.required_nodes),
6793                                  errors.ECODE_FAULT)
6794
6795     remote_node_name = ial.result[0]
6796
6797     lu.LogInfo("Selected new secondary for instance '%s': %s",
6798                instance_name, remote_node_name)
6799
6800     return remote_node_name
6801
6802   def _FindFaultyDisks(self, node_name):
6803     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6804                                     node_name, True)
6805
6806   def CheckPrereq(self):
6807     """Check prerequisites.
6808
6809     This checks that the instance is in the cluster.
6810
6811     """
6812     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6813     assert instance is not None, \
6814       "Cannot retrieve locked instance %s" % self.instance_name
6815
6816     if instance.disk_template != constants.DT_DRBD8:
6817       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6818                                  " instances", errors.ECODE_INVAL)
6819
6820     if len(instance.secondary_nodes) != 1:
6821       raise errors.OpPrereqError("The instance has a strange layout,"
6822                                  " expected one secondary but found %d" %
6823                                  len(instance.secondary_nodes),
6824                                  errors.ECODE_FAULT)
6825
6826     if not self.delay_iallocator:
6827       self._CheckPrereq2()
6828
6829   def _CheckPrereq2(self):
6830     """Check prerequisites, second part.
6831
6832     This function should always be part of CheckPrereq. It was separated and is
6833     now called from Exec because during node evacuation iallocator was only
6834     called with an unmodified cluster model, not taking planned changes into
6835     account.
6836
6837     """
6838     instance = self.instance
6839     secondary_node = instance.secondary_nodes[0]
6840
6841     if self.iallocator_name is None:
6842       remote_node = self.remote_node
6843     else:
6844       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6845                                        instance.name, instance.secondary_nodes)
6846
6847     if remote_node is not None:
6848       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6849       assert self.remote_node_info is not None, \
6850         "Cannot retrieve locked node %s" % remote_node
6851     else:
6852       self.remote_node_info = None
6853
6854     if remote_node == self.instance.primary_node:
6855       raise errors.OpPrereqError("The specified node is the primary node of"
6856                                  " the instance.", errors.ECODE_INVAL)
6857
6858     if remote_node == secondary_node:
6859       raise errors.OpPrereqError("The specified node is already the"
6860                                  " secondary node of the instance.",
6861                                  errors.ECODE_INVAL)
6862
6863     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6864                                     constants.REPLACE_DISK_CHG):
6865       raise errors.OpPrereqError("Cannot specify disks to be replaced",
6866                                  errors.ECODE_INVAL)
6867
6868     if self.mode == constants.REPLACE_DISK_AUTO:
6869       faulty_primary = self._FindFaultyDisks(instance.primary_node)
6870       faulty_secondary = self._FindFaultyDisks(secondary_node)
6871
6872       if faulty_primary and faulty_secondary:
6873         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6874                                    " one node and can not be repaired"
6875                                    " automatically" % self.instance_name,
6876                                    errors.ECODE_STATE)
6877
6878       if faulty_primary:
6879         self.disks = faulty_primary
6880         self.target_node = instance.primary_node
6881         self.other_node = secondary_node
6882         check_nodes = [self.target_node, self.other_node]
6883       elif faulty_secondary:
6884         self.disks = faulty_secondary
6885         self.target_node = secondary_node
6886         self.other_node = instance.primary_node
6887         check_nodes = [self.target_node, self.other_node]
6888       else:
6889         self.disks = []
6890         check_nodes = []
6891
6892     else:
6893       # Non-automatic modes
6894       if self.mode == constants.REPLACE_DISK_PRI:
6895         self.target_node = instance.primary_node
6896         self.other_node = secondary_node
6897         check_nodes = [self.target_node, self.other_node]
6898
6899       elif self.mode == constants.REPLACE_DISK_SEC:
6900         self.target_node = secondary_node
6901         self.other_node = instance.primary_node
6902         check_nodes = [self.target_node, self.other_node]
6903
6904       elif self.mode == constants.REPLACE_DISK_CHG:
6905         self.new_node = remote_node
6906         self.other_node = instance.primary_node
6907         self.target_node = secondary_node
6908         check_nodes = [self.new_node, self.other_node]
6909
6910         _CheckNodeNotDrained(self.lu, remote_node)
6911
6912         old_node_info = self.cfg.GetNodeInfo(secondary_node)
6913         assert old_node_info is not None
6914         if old_node_info.offline and not self.early_release:
6915           # doesn't make sense to delay the release
6916           self.early_release = True
6917           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
6918                           " early-release mode", secondary_node)
6919
6920       else:
6921         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6922                                      self.mode)
6923
6924       # If not specified all disks should be replaced
6925       if not self.disks:
6926         self.disks = range(len(self.instance.disks))
6927
6928     for node in check_nodes:
6929       _CheckNodeOnline(self.lu, node)
6930
6931     # Check whether disks are valid
6932     for disk_idx in self.disks:
6933       instance.FindDisk(disk_idx)
6934
6935     # Get secondary node IP addresses
6936     node_2nd_ip = {}
6937
6938     for node_name in [self.target_node, self.other_node, self.new_node]:
6939       if node_name is not None:
6940         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6941
6942     self.node_secondary_ip = node_2nd_ip
6943
6944   def Exec(self, feedback_fn):
6945     """Execute disk replacement.
6946
6947     This dispatches the disk replacement to the appropriate handler.
6948
6949     """
6950     if self.delay_iallocator:
6951       self._CheckPrereq2()
6952
6953     if not self.disks:
6954       feedback_fn("No disks need replacement")
6955       return
6956
6957     feedback_fn("Replacing disk(s) %s for %s" %
6958                 (utils.CommaJoin(self.disks), self.instance.name))
6959
6960     activate_disks = (not self.instance.admin_up)
6961
6962     # Activate the instance disks if we're replacing them on a down instance
6963     if activate_disks:
6964       _StartInstanceDisks(self.lu, self.instance, True)
6965
6966     try:
6967       # Should we replace the secondary node?
6968       if self.new_node is not None:
6969         fn = self._ExecDrbd8Secondary
6970       else:
6971         fn = self._ExecDrbd8DiskOnly
6972
6973       return fn(feedback_fn)
6974
6975     finally:
6976       # Deactivate the instance disks if we're replacing them on a
6977       # down instance
6978       if activate_disks:
6979         _SafeShutdownInstanceDisks(self.lu, self.instance)
6980
6981   def _CheckVolumeGroup(self, nodes):
6982     self.lu.LogInfo("Checking volume groups")
6983
6984     vgname = self.cfg.GetVGName()
6985
6986     # Make sure volume group exists on all involved nodes
6987     results = self.rpc.call_vg_list(nodes)
6988     if not results:
6989       raise errors.OpExecError("Can't list volume groups on the nodes")
6990
6991     for node in nodes:
6992       res = results[node]
6993       res.Raise("Error checking node %s" % node)
6994       if vgname not in res.payload:
6995         raise errors.OpExecError("Volume group '%s' not found on node %s" %
6996                                  (vgname, node))
6997
6998   def _CheckDisksExistence(self, nodes):
6999     # Check disk existence
7000     for idx, dev in enumerate(self.instance.disks):
7001       if idx not in self.disks:
7002         continue
7003
7004       for node in nodes:
7005         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7006         self.cfg.SetDiskID(dev, node)
7007
7008         result = self.rpc.call_blockdev_find(node, dev)
7009
7010         msg = result.fail_msg
7011         if msg or not result.payload:
7012           if not msg:
7013             msg = "disk not found"
7014           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7015                                    (idx, node, msg))
7016
7017   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7018     for idx, dev in enumerate(self.instance.disks):
7019       if idx not in self.disks:
7020         continue
7021
7022       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7023                       (idx, node_name))
7024
7025       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7026                                    ldisk=ldisk):
7027         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7028                                  " replace disks for instance %s" %
7029                                  (node_name, self.instance.name))
7030
7031   def _CreateNewStorage(self, node_name):
7032     vgname = self.cfg.GetVGName()
7033     iv_names = {}
7034
7035     for idx, dev in enumerate(self.instance.disks):
7036       if idx not in self.disks:
7037         continue
7038
7039       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7040
7041       self.cfg.SetDiskID(dev, node_name)
7042
7043       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7044       names = _GenerateUniqueNames(self.lu, lv_names)
7045
7046       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7047                              logical_id=(vgname, names[0]))
7048       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7049                              logical_id=(vgname, names[1]))
7050
7051       new_lvs = [lv_data, lv_meta]
7052       old_lvs = dev.children
7053       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7054
7055       # we pass force_create=True to force the LVM creation
7056       for new_lv in new_lvs:
7057         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7058                         _GetInstanceInfoText(self.instance), False)
7059
7060     return iv_names
7061
7062   def _CheckDevices(self, node_name, iv_names):
7063     for name, (dev, _, _) in iv_names.iteritems():
7064       self.cfg.SetDiskID(dev, node_name)
7065
7066       result = self.rpc.call_blockdev_find(node_name, dev)
7067
7068       msg = result.fail_msg
7069       if msg or not result.payload:
7070         if not msg:
7071           msg = "disk not found"
7072         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7073                                  (name, msg))
7074
7075       if result.payload.is_degraded:
7076         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7077
7078   def _RemoveOldStorage(self, node_name, iv_names):
7079     for name, (_, old_lvs, _) in iv_names.iteritems():
7080       self.lu.LogInfo("Remove logical volumes for %s" % name)
7081
7082       for lv in old_lvs:
7083         self.cfg.SetDiskID(lv, node_name)
7084
7085         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7086         if msg:
7087           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7088                              hint="remove unused LVs manually")
7089
7090   def _ReleaseNodeLock(self, node_name):
7091     """Releases the lock for a given node."""
7092     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7093
7094   def _ExecDrbd8DiskOnly(self, feedback_fn):
7095     """Replace a disk on the primary or secondary for DRBD 8.
7096
7097     The algorithm for replace is quite complicated:
7098
7099       1. for each disk to be replaced:
7100
7101         1. create new LVs on the target node with unique names
7102         1. detach old LVs from the drbd device
7103         1. rename old LVs to name_replaced.<time_t>
7104         1. rename new LVs to old LVs
7105         1. attach the new LVs (with the old names now) to the drbd device
7106
7107       1. wait for sync across all devices
7108
7109       1. for each modified disk:
7110
7111         1. remove old LVs (which have the name name_replaces.<time_t>)
7112
7113     Failures are not very well handled.
7114
7115     """
7116     steps_total = 6
7117
7118     # Step: check device activation
7119     self.lu.LogStep(1, steps_total, "Check device existence")
7120     self._CheckDisksExistence([self.other_node, self.target_node])
7121     self._CheckVolumeGroup([self.target_node, self.other_node])
7122
7123     # Step: check other node consistency
7124     self.lu.LogStep(2, steps_total, "Check peer consistency")
7125     self._CheckDisksConsistency(self.other_node,
7126                                 self.other_node == self.instance.primary_node,
7127                                 False)
7128
7129     # Step: create new storage
7130     self.lu.LogStep(3, steps_total, "Allocate new storage")
7131     iv_names = self._CreateNewStorage(self.target_node)
7132
7133     # Step: for each lv, detach+rename*2+attach
7134     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7135     for dev, old_lvs, new_lvs in iv_names.itervalues():
7136       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7137
7138       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7139                                                      old_lvs)
7140       result.Raise("Can't detach drbd from local storage on node"
7141                    " %s for device %s" % (self.target_node, dev.iv_name))
7142       #dev.children = []
7143       #cfg.Update(instance)
7144
7145       # ok, we created the new LVs, so now we know we have the needed
7146       # storage; as such, we proceed on the target node to rename
7147       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7148       # using the assumption that logical_id == physical_id (which in
7149       # turn is the unique_id on that node)
7150
7151       # FIXME(iustin): use a better name for the replaced LVs
7152       temp_suffix = int(time.time())
7153       ren_fn = lambda d, suff: (d.physical_id[0],
7154                                 d.physical_id[1] + "_replaced-%s" % suff)
7155
7156       # Build the rename list based on what LVs exist on the node
7157       rename_old_to_new = []
7158       for to_ren in old_lvs:
7159         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7160         if not result.fail_msg and result.payload:
7161           # device exists
7162           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7163
7164       self.lu.LogInfo("Renaming the old LVs on the target node")
7165       result = self.rpc.call_blockdev_rename(self.target_node,
7166                                              rename_old_to_new)
7167       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7168
7169       # Now we rename the new LVs to the old LVs
7170       self.lu.LogInfo("Renaming the new LVs on the target node")
7171       rename_new_to_old = [(new, old.physical_id)
7172                            for old, new in zip(old_lvs, new_lvs)]
7173       result = self.rpc.call_blockdev_rename(self.target_node,
7174                                              rename_new_to_old)
7175       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7176
7177       for old, new in zip(old_lvs, new_lvs):
7178         new.logical_id = old.logical_id
7179         self.cfg.SetDiskID(new, self.target_node)
7180
7181       for disk in old_lvs:
7182         disk.logical_id = ren_fn(disk, temp_suffix)
7183         self.cfg.SetDiskID(disk, self.target_node)
7184
7185       # Now that the new lvs have the old name, we can add them to the device
7186       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7187       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7188                                                   new_lvs)
7189       msg = result.fail_msg
7190       if msg:
7191         for new_lv in new_lvs:
7192           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7193                                                new_lv).fail_msg
7194           if msg2:
7195             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7196                                hint=("cleanup manually the unused logical"
7197                                      "volumes"))
7198         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7199
7200       dev.children = new_lvs
7201
7202       self.cfg.Update(self.instance, feedback_fn)
7203
7204     cstep = 5
7205     if self.early_release:
7206       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7207       cstep += 1
7208       self._RemoveOldStorage(self.target_node, iv_names)
7209       # WARNING: we release both node locks here, do not do other RPCs
7210       # than WaitForSync to the primary node
7211       self._ReleaseNodeLock([self.target_node, self.other_node])
7212
7213     # Wait for sync
7214     # This can fail as the old devices are degraded and _WaitForSync
7215     # does a combined result over all disks, so we don't check its return value
7216     self.lu.LogStep(cstep, steps_total, "Sync devices")
7217     cstep += 1
7218     _WaitForSync(self.lu, self.instance)
7219
7220     # Check all devices manually
7221     self._CheckDevices(self.instance.primary_node, iv_names)
7222
7223     # Step: remove old storage
7224     if not self.early_release:
7225       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7226       cstep += 1
7227       self._RemoveOldStorage(self.target_node, iv_names)
7228
7229   def _ExecDrbd8Secondary(self, feedback_fn):
7230     """Replace the secondary node for DRBD 8.
7231
7232     The algorithm for replace is quite complicated:
7233       - for all disks of the instance:
7234         - create new LVs on the new node with same names
7235         - shutdown the drbd device on the old secondary
7236         - disconnect the drbd network on the primary
7237         - create the drbd device on the new secondary
7238         - network attach the drbd on the primary, using an artifice:
7239           the drbd code for Attach() will connect to the network if it
7240           finds a device which is connected to the good local disks but
7241           not network enabled
7242       - wait for sync across all devices
7243       - remove all disks from the old secondary
7244
7245     Failures are not very well handled.
7246
7247     """
7248     steps_total = 6
7249
7250     # Step: check device activation
7251     self.lu.LogStep(1, steps_total, "Check device existence")
7252     self._CheckDisksExistence([self.instance.primary_node])
7253     self._CheckVolumeGroup([self.instance.primary_node])
7254
7255     # Step: check other node consistency
7256     self.lu.LogStep(2, steps_total, "Check peer consistency")
7257     self._CheckDisksConsistency(self.instance.primary_node, True, True)
7258
7259     # Step: create new storage
7260     self.lu.LogStep(3, steps_total, "Allocate new storage")
7261     for idx, dev in enumerate(self.instance.disks):
7262       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7263                       (self.new_node, idx))
7264       # we pass force_create=True to force LVM creation
7265       for new_lv in dev.children:
7266         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7267                         _GetInstanceInfoText(self.instance), False)
7268
7269     # Step 4: dbrd minors and drbd setups changes
7270     # after this, we must manually remove the drbd minors on both the
7271     # error and the success paths
7272     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7273     minors = self.cfg.AllocateDRBDMinor([self.new_node
7274                                          for dev in self.instance.disks],
7275                                         self.instance.name)
7276     logging.debug("Allocated minors %r", minors)
7277
7278     iv_names = {}
7279     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7280       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7281                       (self.new_node, idx))
7282       # create new devices on new_node; note that we create two IDs:
7283       # one without port, so the drbd will be activated without
7284       # networking information on the new node at this stage, and one
7285       # with network, for the latter activation in step 4
7286       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7287       if self.instance.primary_node == o_node1:
7288         p_minor = o_minor1
7289       else:
7290         assert self.instance.primary_node == o_node2, "Three-node instance?"
7291         p_minor = o_minor2
7292
7293       new_alone_id = (self.instance.primary_node, self.new_node, None,
7294                       p_minor, new_minor, o_secret)
7295       new_net_id = (self.instance.primary_node, self.new_node, o_port,
7296                     p_minor, new_minor, o_secret)
7297
7298       iv_names[idx] = (dev, dev.children, new_net_id)
7299       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7300                     new_net_id)
7301       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7302                               logical_id=new_alone_id,
7303                               children=dev.children,
7304                               size=dev.size)
7305       try:
7306         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7307                               _GetInstanceInfoText(self.instance), False)
7308       except errors.GenericError:
7309         self.cfg.ReleaseDRBDMinors(self.instance.name)
7310         raise
7311
7312     # We have new devices, shutdown the drbd on the old secondary
7313     for idx, dev in enumerate(self.instance.disks):
7314       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7315       self.cfg.SetDiskID(dev, self.target_node)
7316       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7317       if msg:
7318         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7319                            "node: %s" % (idx, msg),
7320                            hint=("Please cleanup this device manually as"
7321                                  " soon as possible"))
7322
7323     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7324     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7325                                                self.node_secondary_ip,
7326                                                self.instance.disks)\
7327                                               [self.instance.primary_node]
7328
7329     msg = result.fail_msg
7330     if msg:
7331       # detaches didn't succeed (unlikely)
7332       self.cfg.ReleaseDRBDMinors(self.instance.name)
7333       raise errors.OpExecError("Can't detach the disks from the network on"
7334                                " old node: %s" % (msg,))
7335
7336     # if we managed to detach at least one, we update all the disks of
7337     # the instance to point to the new secondary
7338     self.lu.LogInfo("Updating instance configuration")
7339     for dev, _, new_logical_id in iv_names.itervalues():
7340       dev.logical_id = new_logical_id
7341       self.cfg.SetDiskID(dev, self.instance.primary_node)
7342
7343     self.cfg.Update(self.instance, feedback_fn)
7344
7345     # and now perform the drbd attach
7346     self.lu.LogInfo("Attaching primary drbds to new secondary"
7347                     " (standalone => connected)")
7348     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7349                                             self.new_node],
7350                                            self.node_secondary_ip,
7351                                            self.instance.disks,
7352                                            self.instance.name,
7353                                            False)
7354     for to_node, to_result in result.items():
7355       msg = to_result.fail_msg
7356       if msg:
7357         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7358                            to_node, msg,
7359                            hint=("please do a gnt-instance info to see the"
7360                                  " status of disks"))
7361     cstep = 5
7362     if self.early_release:
7363       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7364       cstep += 1
7365       self._RemoveOldStorage(self.target_node, iv_names)
7366       # WARNING: we release all node locks here, do not do other RPCs
7367       # than WaitForSync to the primary node
7368       self._ReleaseNodeLock([self.instance.primary_node,
7369                              self.target_node,
7370                              self.new_node])
7371
7372     # Wait for sync
7373     # This can fail as the old devices are degraded and _WaitForSync
7374     # does a combined result over all disks, so we don't check its return value
7375     self.lu.LogStep(cstep, steps_total, "Sync devices")
7376     cstep += 1
7377     _WaitForSync(self.lu, self.instance)
7378
7379     # Check all devices manually
7380     self._CheckDevices(self.instance.primary_node, iv_names)
7381
7382     # Step: remove old storage
7383     if not self.early_release:
7384       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7385       self._RemoveOldStorage(self.target_node, iv_names)
7386
7387
7388 class LURepairNodeStorage(NoHooksLU):
7389   """Repairs the volume group on a node.
7390
7391   """
7392   _OP_REQP = ["node_name"]
7393   REQ_BGL = False
7394
7395   def CheckArguments(self):
7396     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7397
7398   def ExpandNames(self):
7399     self.needed_locks = {
7400       locking.LEVEL_NODE: [self.op.node_name],
7401       }
7402
7403   def _CheckFaultyDisks(self, instance, node_name):
7404     """Ensure faulty disks abort the opcode or at least warn."""
7405     try:
7406       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7407                                   node_name, True):
7408         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7409                                    " node '%s'" % (instance.name, node_name),
7410                                    errors.ECODE_STATE)
7411     except errors.OpPrereqError, err:
7412       if self.op.ignore_consistency:
7413         self.proc.LogWarning(str(err.args[0]))
7414       else:
7415         raise
7416
7417   def CheckPrereq(self):
7418     """Check prerequisites.
7419
7420     """
7421     storage_type = self.op.storage_type
7422
7423     if (constants.SO_FIX_CONSISTENCY not in
7424         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7425       raise errors.OpPrereqError("Storage units of type '%s' can not be"
7426                                  " repaired" % storage_type,
7427                                  errors.ECODE_INVAL)
7428
7429     # Check whether any instance on this node has faulty disks
7430     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7431       if not inst.admin_up:
7432         continue
7433       check_nodes = set(inst.all_nodes)
7434       check_nodes.discard(self.op.node_name)
7435       for inst_node_name in check_nodes:
7436         self._CheckFaultyDisks(inst, inst_node_name)
7437
7438   def Exec(self, feedback_fn):
7439     feedback_fn("Repairing storage unit '%s' on %s ..." %
7440                 (self.op.name, self.op.node_name))
7441
7442     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7443     result = self.rpc.call_storage_execute(self.op.node_name,
7444                                            self.op.storage_type, st_args,
7445                                            self.op.name,
7446                                            constants.SO_FIX_CONSISTENCY)
7447     result.Raise("Failed to repair storage unit '%s' on %s" %
7448                  (self.op.name, self.op.node_name))
7449
7450
7451 class LUNodeEvacuationStrategy(NoHooksLU):
7452   """Computes the node evacuation strategy.
7453
7454   """
7455   _OP_REQP = ["nodes"]
7456   REQ_BGL = False
7457
7458   def CheckArguments(self):
7459     if not hasattr(self.op, "remote_node"):
7460       self.op.remote_node = None
7461     if not hasattr(self.op, "iallocator"):
7462       self.op.iallocator = None
7463     if self.op.remote_node is not None and self.op.iallocator is not None:
7464       raise errors.OpPrereqError("Give either the iallocator or the new"
7465                                  " secondary, not both", errors.ECODE_INVAL)
7466
7467   def ExpandNames(self):
7468     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7469     self.needed_locks = locks = {}
7470     if self.op.remote_node is None:
7471       locks[locking.LEVEL_NODE] = locking.ALL_SET
7472     else:
7473       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7474       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7475
7476   def CheckPrereq(self):
7477     pass
7478
7479   def Exec(self, feedback_fn):
7480     if self.op.remote_node is not None:
7481       instances = []
7482       for node in self.op.nodes:
7483         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7484       result = []
7485       for i in instances:
7486         if i.primary_node == self.op.remote_node:
7487           raise errors.OpPrereqError("Node %s is the primary node of"
7488                                      " instance %s, cannot use it as"
7489                                      " secondary" %
7490                                      (self.op.remote_node, i.name),
7491                                      errors.ECODE_INVAL)
7492         result.append([i.name, self.op.remote_node])
7493     else:
7494       ial = IAllocator(self.cfg, self.rpc,
7495                        mode=constants.IALLOCATOR_MODE_MEVAC,
7496                        evac_nodes=self.op.nodes)
7497       ial.Run(self.op.iallocator, validate=True)
7498       if not ial.success:
7499         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7500                                  errors.ECODE_NORES)
7501       result = ial.result
7502     return result
7503
7504
7505 class LUGrowDisk(LogicalUnit):
7506   """Grow a disk of an instance.
7507
7508   """
7509   HPATH = "disk-grow"
7510   HTYPE = constants.HTYPE_INSTANCE
7511   _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7512   REQ_BGL = False
7513
7514   def ExpandNames(self):
7515     self._ExpandAndLockInstance()
7516     self.needed_locks[locking.LEVEL_NODE] = []
7517     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7518
7519   def DeclareLocks(self, level):
7520     if level == locking.LEVEL_NODE:
7521       self._LockInstancesNodes()
7522
7523   def BuildHooksEnv(self):
7524     """Build hooks env.
7525
7526     This runs on the master, the primary and all the secondaries.
7527
7528     """
7529     env = {
7530       "DISK": self.op.disk,
7531       "AMOUNT": self.op.amount,
7532       }
7533     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7534     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7535     return env, nl, nl
7536
7537   def CheckPrereq(self):
7538     """Check prerequisites.
7539
7540     This checks that the instance is in the cluster.
7541
7542     """
7543     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7544     assert instance is not None, \
7545       "Cannot retrieve locked instance %s" % self.op.instance_name
7546     nodenames = list(instance.all_nodes)
7547     for node in nodenames:
7548       _CheckNodeOnline(self, node)
7549
7550
7551     self.instance = instance
7552
7553     if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
7554       raise errors.OpPrereqError("Instance's disk layout does not support"
7555                                  " growing.", errors.ECODE_INVAL)
7556
7557     self.disk = instance.FindDisk(self.op.disk)
7558
7559     _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7560
7561   def Exec(self, feedback_fn):
7562     """Execute disk grow.
7563
7564     """
7565     instance = self.instance
7566     disk = self.disk
7567     for node in instance.all_nodes:
7568       self.cfg.SetDiskID(disk, node)
7569       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7570       result.Raise("Grow request failed to node %s" % node)
7571
7572       # TODO: Rewrite code to work properly
7573       # DRBD goes into sync mode for a short amount of time after executing the
7574       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7575       # calling "resize" in sync mode fails. Sleeping for a short amount of
7576       # time is a work-around.
7577       time.sleep(5)
7578
7579     disk.RecordGrow(self.op.amount)
7580     self.cfg.Update(instance, feedback_fn)
7581     if self.op.wait_for_sync:
7582       disk_abort = not _WaitForSync(self, instance)
7583       if disk_abort:
7584         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7585                              " status.\nPlease check the instance.")
7586
7587
7588 class LUQueryInstanceData(NoHooksLU):
7589   """Query runtime instance data.
7590
7591   """
7592   _OP_REQP = ["instances", "static"]
7593   REQ_BGL = False
7594
7595   def ExpandNames(self):
7596     self.needed_locks = {}
7597     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7598
7599     if not isinstance(self.op.instances, list):
7600       raise errors.OpPrereqError("Invalid argument type 'instances'",
7601                                  errors.ECODE_INVAL)
7602
7603     if self.op.instances:
7604       self.wanted_names = []
7605       for name in self.op.instances:
7606         full_name = _ExpandInstanceName(self.cfg, name)
7607         self.wanted_names.append(full_name)
7608       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7609     else:
7610       self.wanted_names = None
7611       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7612
7613     self.needed_locks[locking.LEVEL_NODE] = []
7614     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7615
7616   def DeclareLocks(self, level):
7617     if level == locking.LEVEL_NODE:
7618       self._LockInstancesNodes()
7619
7620   def CheckPrereq(self):
7621     """Check prerequisites.
7622
7623     This only checks the optional instance list against the existing names.
7624
7625     """
7626     if self.wanted_names is None:
7627       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7628
7629     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7630                              in self.wanted_names]
7631     return
7632
7633   def _ComputeBlockdevStatus(self, node, instance_name, dev):
7634     """Returns the status of a block device
7635
7636     """
7637     if self.op.static or not node:
7638       return None
7639
7640     self.cfg.SetDiskID(dev, node)
7641
7642     result = self.rpc.call_blockdev_find(node, dev)
7643     if result.offline:
7644       return None
7645
7646     result.Raise("Can't compute disk status for %s" % instance_name)
7647
7648     status = result.payload
7649     if status is None:
7650       return None
7651
7652     return (status.dev_path, status.major, status.minor,
7653             status.sync_percent, status.estimated_time,
7654             status.is_degraded, status.ldisk_status)
7655
7656   def _ComputeDiskStatus(self, instance, snode, dev):
7657     """Compute block device status.
7658
7659     """
7660     if dev.dev_type in constants.LDS_DRBD:
7661       # we change the snode then (otherwise we use the one passed in)
7662       if dev.logical_id[0] == instance.primary_node:
7663         snode = dev.logical_id[1]
7664       else:
7665         snode = dev.logical_id[0]
7666
7667     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7668                                               instance.name, dev)
7669     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7670
7671     if dev.children:
7672       dev_children = [self._ComputeDiskStatus(instance, snode, child)
7673                       for child in dev.children]
7674     else:
7675       dev_children = []
7676
7677     data = {
7678       "iv_name": dev.iv_name,
7679       "dev_type": dev.dev_type,
7680       "logical_id": dev.logical_id,
7681       "physical_id": dev.physical_id,
7682       "pstatus": dev_pstatus,
7683       "sstatus": dev_sstatus,
7684       "children": dev_children,
7685       "mode": dev.mode,
7686       "size": dev.size,
7687       }
7688
7689     return data
7690
7691   def Exec(self, feedback_fn):
7692     """Gather and return data"""
7693     result = {}
7694
7695     cluster = self.cfg.GetClusterInfo()
7696
7697     for instance in self.wanted_instances:
7698       if not self.op.static:
7699         remote_info = self.rpc.call_instance_info(instance.primary_node,
7700                                                   instance.name,
7701                                                   instance.hypervisor)
7702         remote_info.Raise("Error checking node %s" % instance.primary_node)
7703         remote_info = remote_info.payload
7704         if remote_info and "state" in remote_info:
7705           remote_state = "up"
7706         else:
7707           remote_state = "down"
7708       else:
7709         remote_state = None
7710       if instance.admin_up:
7711         config_state = "up"
7712       else:
7713         config_state = "down"
7714
7715       disks = [self._ComputeDiskStatus(instance, None, device)
7716                for device in instance.disks]
7717
7718       idict = {
7719         "name": instance.name,
7720         "config_state": config_state,
7721         "run_state": remote_state,
7722         "pnode": instance.primary_node,
7723         "snodes": instance.secondary_nodes,
7724         "os": instance.os,
7725         # this happens to be the same format used for hooks
7726         "nics": _NICListToTuple(self, instance.nics),
7727         "disks": disks,
7728         "hypervisor": instance.hypervisor,
7729         "network_port": instance.network_port,
7730         "hv_instance": instance.hvparams,
7731         "hv_actual": cluster.FillHV(instance, skip_globals=True),
7732         "be_instance": instance.beparams,
7733         "be_actual": cluster.FillBE(instance),
7734         "serial_no": instance.serial_no,
7735         "mtime": instance.mtime,
7736         "ctime": instance.ctime,
7737         "uuid": instance.uuid,
7738         }
7739
7740       result[instance.name] = idict
7741
7742     return result
7743
7744
7745 class LUSetInstanceParams(LogicalUnit):
7746   """Modifies an instances's parameters.
7747
7748   """
7749   HPATH = "instance-modify"
7750   HTYPE = constants.HTYPE_INSTANCE
7751   _OP_REQP = ["instance_name"]
7752   REQ_BGL = False
7753
7754   def CheckArguments(self):
7755     if not hasattr(self.op, 'nics'):
7756       self.op.nics = []
7757     if not hasattr(self.op, 'disks'):
7758       self.op.disks = []
7759     if not hasattr(self.op, 'beparams'):
7760       self.op.beparams = {}
7761     if not hasattr(self.op, 'hvparams'):
7762       self.op.hvparams = {}
7763     self.op.force = getattr(self.op, "force", False)
7764     if not (self.op.nics or self.op.disks or
7765             self.op.hvparams or self.op.beparams):
7766       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7767
7768     if self.op.hvparams:
7769       _CheckGlobalHvParams(self.op.hvparams)
7770
7771     # Disk validation
7772     disk_addremove = 0
7773     for disk_op, disk_dict in self.op.disks:
7774       if disk_op == constants.DDM_REMOVE:
7775         disk_addremove += 1
7776         continue
7777       elif disk_op == constants.DDM_ADD:
7778         disk_addremove += 1
7779       else:
7780         if not isinstance(disk_op, int):
7781           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7782         if not isinstance(disk_dict, dict):
7783           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7784           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7785
7786       if disk_op == constants.DDM_ADD:
7787         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7788         if mode not in constants.DISK_ACCESS_SET:
7789           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7790                                      errors.ECODE_INVAL)
7791         size = disk_dict.get('size', None)
7792         if size is None:
7793           raise errors.OpPrereqError("Required disk parameter size missing",
7794                                      errors.ECODE_INVAL)
7795         try:
7796           size = int(size)
7797         except (TypeError, ValueError), err:
7798           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7799                                      str(err), errors.ECODE_INVAL)
7800         disk_dict['size'] = size
7801       else:
7802         # modification of disk
7803         if 'size' in disk_dict:
7804           raise errors.OpPrereqError("Disk size change not possible, use"
7805                                      " grow-disk", errors.ECODE_INVAL)
7806
7807     if disk_addremove > 1:
7808       raise errors.OpPrereqError("Only one disk add or remove operation"
7809                                  " supported at a time", errors.ECODE_INVAL)
7810
7811     # NIC validation
7812     nic_addremove = 0
7813     for nic_op, nic_dict in self.op.nics:
7814       if nic_op == constants.DDM_REMOVE:
7815         nic_addremove += 1
7816         continue
7817       elif nic_op == constants.DDM_ADD:
7818         nic_addremove += 1
7819       else:
7820         if not isinstance(nic_op, int):
7821           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
7822         if not isinstance(nic_dict, dict):
7823           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7824           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7825
7826       # nic_dict should be a dict
7827       nic_ip = nic_dict.get('ip', None)
7828       if nic_ip is not None:
7829         if nic_ip.lower() == constants.VALUE_NONE:
7830           nic_dict['ip'] = None
7831         else:
7832           if not utils.IsValidIP(nic_ip):
7833             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
7834                                        errors.ECODE_INVAL)
7835
7836       nic_bridge = nic_dict.get('bridge', None)
7837       nic_link = nic_dict.get('link', None)
7838       if nic_bridge and nic_link:
7839         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7840                                    " at the same time", errors.ECODE_INVAL)
7841       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7842         nic_dict['bridge'] = None
7843       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7844         nic_dict['link'] = None
7845
7846       if nic_op == constants.DDM_ADD:
7847         nic_mac = nic_dict.get('mac', None)
7848         if nic_mac is None:
7849           nic_dict['mac'] = constants.VALUE_AUTO
7850
7851       if 'mac' in nic_dict:
7852         nic_mac = nic_dict['mac']
7853         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7854           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
7855
7856         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7857           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7858                                      " modifying an existing nic",
7859                                      errors.ECODE_INVAL)
7860
7861     if nic_addremove > 1:
7862       raise errors.OpPrereqError("Only one NIC add or remove operation"
7863                                  " supported at a time", errors.ECODE_INVAL)
7864
7865   def ExpandNames(self):
7866     self._ExpandAndLockInstance()
7867     self.needed_locks[locking.LEVEL_NODE] = []
7868     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7869
7870   def DeclareLocks(self, level):
7871     if level == locking.LEVEL_NODE:
7872       self._LockInstancesNodes()
7873
7874   def BuildHooksEnv(self):
7875     """Build hooks env.
7876
7877     This runs on the master, primary and secondaries.
7878
7879     """
7880     args = dict()
7881     if constants.BE_MEMORY in self.be_new:
7882       args['memory'] = self.be_new[constants.BE_MEMORY]
7883     if constants.BE_VCPUS in self.be_new:
7884       args['vcpus'] = self.be_new[constants.BE_VCPUS]
7885     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7886     # information at all.
7887     if self.op.nics:
7888       args['nics'] = []
7889       nic_override = dict(self.op.nics)
7890       c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7891       for idx, nic in enumerate(self.instance.nics):
7892         if idx in nic_override:
7893           this_nic_override = nic_override[idx]
7894         else:
7895           this_nic_override = {}
7896         if 'ip' in this_nic_override:
7897           ip = this_nic_override['ip']
7898         else:
7899           ip = nic.ip
7900         if 'mac' in this_nic_override:
7901           mac = this_nic_override['mac']
7902         else:
7903           mac = nic.mac
7904         if idx in self.nic_pnew:
7905           nicparams = self.nic_pnew[idx]
7906         else:
7907           nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7908         mode = nicparams[constants.NIC_MODE]
7909         link = nicparams[constants.NIC_LINK]
7910         args['nics'].append((ip, mac, mode, link))
7911       if constants.DDM_ADD in nic_override:
7912         ip = nic_override[constants.DDM_ADD].get('ip', None)
7913         mac = nic_override[constants.DDM_ADD]['mac']
7914         nicparams = self.nic_pnew[constants.DDM_ADD]
7915         mode = nicparams[constants.NIC_MODE]
7916         link = nicparams[constants.NIC_LINK]
7917         args['nics'].append((ip, mac, mode, link))
7918       elif constants.DDM_REMOVE in nic_override:
7919         del args['nics'][-1]
7920
7921     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7922     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7923     return env, nl, nl
7924
7925   @staticmethod
7926   def _GetUpdatedParams(old_params, update_dict,
7927                         default_values, parameter_types):
7928     """Return the new params dict for the given params.
7929
7930     @type old_params: dict
7931     @param old_params: old parameters
7932     @type update_dict: dict
7933     @param update_dict: dict containing new parameter values,
7934                         or constants.VALUE_DEFAULT to reset the
7935                         parameter to its default value
7936     @type default_values: dict
7937     @param default_values: default values for the filled parameters
7938     @type parameter_types: dict
7939     @param parameter_types: dict mapping target dict keys to types
7940                             in constants.ENFORCEABLE_TYPES
7941     @rtype: (dict, dict)
7942     @return: (new_parameters, filled_parameters)
7943
7944     """
7945     params_copy = copy.deepcopy(old_params)
7946     for key, val in update_dict.iteritems():
7947       if val == constants.VALUE_DEFAULT:
7948         try:
7949           del params_copy[key]
7950         except KeyError:
7951           pass
7952       else:
7953         params_copy[key] = val
7954     utils.ForceDictType(params_copy, parameter_types)
7955     params_filled = objects.FillDict(default_values, params_copy)
7956     return (params_copy, params_filled)
7957
7958   def CheckPrereq(self):
7959     """Check prerequisites.
7960
7961     This only checks the instance list against the existing names.
7962
7963     """
7964     self.force = self.op.force
7965
7966     # checking the new params on the primary/secondary nodes
7967
7968     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7969     cluster = self.cluster = self.cfg.GetClusterInfo()
7970     assert self.instance is not None, \
7971       "Cannot retrieve locked instance %s" % self.op.instance_name
7972     pnode = instance.primary_node
7973     nodelist = list(instance.all_nodes)
7974
7975     # hvparams processing
7976     if self.op.hvparams:
7977       i_hvdict, hv_new = self._GetUpdatedParams(
7978                              instance.hvparams, self.op.hvparams,
7979                              cluster.hvparams[instance.hypervisor],
7980                              constants.HVS_PARAMETER_TYPES)
7981       # local check
7982       hypervisor.GetHypervisor(
7983         instance.hypervisor).CheckParameterSyntax(hv_new)
7984       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7985       self.hv_new = hv_new # the new actual values
7986       self.hv_inst = i_hvdict # the new dict (without defaults)
7987     else:
7988       self.hv_new = self.hv_inst = {}
7989
7990     # beparams processing
7991     if self.op.beparams:
7992       i_bedict, be_new = self._GetUpdatedParams(
7993                              instance.beparams, self.op.beparams,
7994                              cluster.beparams[constants.PP_DEFAULT],
7995                              constants.BES_PARAMETER_TYPES)
7996       self.be_new = be_new # the new actual values
7997       self.be_inst = i_bedict # the new dict (without defaults)
7998     else:
7999       self.be_new = self.be_inst = {}
8000
8001     self.warn = []
8002
8003     if constants.BE_MEMORY in self.op.beparams and not self.force:
8004       mem_check_list = [pnode]
8005       if be_new[constants.BE_AUTO_BALANCE]:
8006         # either we changed auto_balance to yes or it was from before
8007         mem_check_list.extend(instance.secondary_nodes)
8008       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8009                                                   instance.hypervisor)
8010       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8011                                          instance.hypervisor)
8012       pninfo = nodeinfo[pnode]
8013       msg = pninfo.fail_msg
8014       if msg:
8015         # Assume the primary node is unreachable and go ahead
8016         self.warn.append("Can't get info from primary node %s: %s" %
8017                          (pnode,  msg))
8018       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8019         self.warn.append("Node data from primary node %s doesn't contain"
8020                          " free memory information" % pnode)
8021       elif instance_info.fail_msg:
8022         self.warn.append("Can't get instance runtime information: %s" %
8023                         instance_info.fail_msg)
8024       else:
8025         if instance_info.payload:
8026           current_mem = int(instance_info.payload['memory'])
8027         else:
8028           # Assume instance not running
8029           # (there is a slight race condition here, but it's not very probable,
8030           # and we have no other way to check)
8031           current_mem = 0
8032         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8033                     pninfo.payload['memory_free'])
8034         if miss_mem > 0:
8035           raise errors.OpPrereqError("This change will prevent the instance"
8036                                      " from starting, due to %d MB of memory"
8037                                      " missing on its primary node" % miss_mem,
8038                                      errors.ECODE_NORES)
8039
8040       if be_new[constants.BE_AUTO_BALANCE]:
8041         for node, nres in nodeinfo.items():
8042           if node not in instance.secondary_nodes:
8043             continue
8044           msg = nres.fail_msg
8045           if msg:
8046             self.warn.append("Can't get info from secondary node %s: %s" %
8047                              (node, msg))
8048           elif not isinstance(nres.payload.get('memory_free', None), int):
8049             self.warn.append("Secondary node %s didn't return free"
8050                              " memory information" % node)
8051           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8052             self.warn.append("Not enough memory to failover instance to"
8053                              " secondary node %s" % node)
8054
8055     # NIC processing
8056     self.nic_pnew = {}
8057     self.nic_pinst = {}
8058     for nic_op, nic_dict in self.op.nics:
8059       if nic_op == constants.DDM_REMOVE:
8060         if not instance.nics:
8061           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8062                                      errors.ECODE_INVAL)
8063         continue
8064       if nic_op != constants.DDM_ADD:
8065         # an existing nic
8066         if not instance.nics:
8067           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8068                                      " no NICs" % nic_op,
8069                                      errors.ECODE_INVAL)
8070         if nic_op < 0 or nic_op >= len(instance.nics):
8071           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8072                                      " are 0 to %d" %
8073                                      (nic_op, len(instance.nics) - 1),
8074                                      errors.ECODE_INVAL)
8075         old_nic_params = instance.nics[nic_op].nicparams
8076         old_nic_ip = instance.nics[nic_op].ip
8077       else:
8078         old_nic_params = {}
8079         old_nic_ip = None
8080
8081       update_params_dict = dict([(key, nic_dict[key])
8082                                  for key in constants.NICS_PARAMETERS
8083                                  if key in nic_dict])
8084
8085       if 'bridge' in nic_dict:
8086         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8087
8088       new_nic_params, new_filled_nic_params = \
8089           self._GetUpdatedParams(old_nic_params, update_params_dict,
8090                                  cluster.nicparams[constants.PP_DEFAULT],
8091                                  constants.NICS_PARAMETER_TYPES)
8092       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8093       self.nic_pinst[nic_op] = new_nic_params
8094       self.nic_pnew[nic_op] = new_filled_nic_params
8095       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8096
8097       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8098         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8099         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8100         if msg:
8101           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8102           if self.force:
8103             self.warn.append(msg)
8104           else:
8105             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8106       if new_nic_mode == constants.NIC_MODE_ROUTED:
8107         if 'ip' in nic_dict:
8108           nic_ip = nic_dict['ip']
8109         else:
8110           nic_ip = old_nic_ip
8111         if nic_ip is None:
8112           raise errors.OpPrereqError('Cannot set the nic ip to None'
8113                                      ' on a routed nic', errors.ECODE_INVAL)
8114       if 'mac' in nic_dict:
8115         nic_mac = nic_dict['mac']
8116         if nic_mac is None:
8117           raise errors.OpPrereqError('Cannot set the nic mac to None',
8118                                      errors.ECODE_INVAL)
8119         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8120           # otherwise generate the mac
8121           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8122         else:
8123           # or validate/reserve the current one
8124           try:
8125             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8126           except errors.ReservationError:
8127             raise errors.OpPrereqError("MAC address %s already in use"
8128                                        " in cluster" % nic_mac,
8129                                        errors.ECODE_NOTUNIQUE)
8130
8131     # DISK processing
8132     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8133       raise errors.OpPrereqError("Disk operations not supported for"
8134                                  " diskless instances",
8135                                  errors.ECODE_INVAL)
8136     for disk_op, _ in self.op.disks:
8137       if disk_op == constants.DDM_REMOVE:
8138         if len(instance.disks) == 1:
8139           raise errors.OpPrereqError("Cannot remove the last disk of"
8140                                      " an instance",
8141                                      errors.ECODE_INVAL)
8142         ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
8143         ins_l = ins_l[pnode]
8144         msg = ins_l.fail_msg
8145         if msg:
8146           raise errors.OpPrereqError("Can't contact node %s: %s" %
8147                                      (pnode, msg), errors.ECODE_ENVIRON)
8148         if instance.name in ins_l.payload:
8149           raise errors.OpPrereqError("Instance is running, can't remove"
8150                                      " disks.", errors.ECODE_STATE)
8151
8152       if (disk_op == constants.DDM_ADD and
8153           len(instance.nics) >= constants.MAX_DISKS):
8154         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8155                                    " add more" % constants.MAX_DISKS,
8156                                    errors.ECODE_STATE)
8157       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8158         # an existing disk
8159         if disk_op < 0 or disk_op >= len(instance.disks):
8160           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8161                                      " are 0 to %d" %
8162                                      (disk_op, len(instance.disks)),
8163                                      errors.ECODE_INVAL)
8164
8165     return
8166
8167   def Exec(self, feedback_fn):
8168     """Modifies an instance.
8169
8170     All parameters take effect only at the next restart of the instance.
8171
8172     """
8173     # Process here the warnings from CheckPrereq, as we don't have a
8174     # feedback_fn there.
8175     for warn in self.warn:
8176       feedback_fn("WARNING: %s" % warn)
8177
8178     result = []
8179     instance = self.instance
8180     # disk changes
8181     for disk_op, disk_dict in self.op.disks:
8182       if disk_op == constants.DDM_REMOVE:
8183         # remove the last disk
8184         device = instance.disks.pop()
8185         device_idx = len(instance.disks)
8186         for node, disk in device.ComputeNodeTree(instance.primary_node):
8187           self.cfg.SetDiskID(disk, node)
8188           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8189           if msg:
8190             self.LogWarning("Could not remove disk/%d on node %s: %s,"
8191                             " continuing anyway", device_idx, node, msg)
8192         result.append(("disk/%d" % device_idx, "remove"))
8193       elif disk_op == constants.DDM_ADD:
8194         # add a new disk
8195         if instance.disk_template == constants.DT_FILE:
8196           file_driver, file_path = instance.disks[0].logical_id
8197           file_path = os.path.dirname(file_path)
8198         else:
8199           file_driver = file_path = None
8200         disk_idx_base = len(instance.disks)
8201         new_disk = _GenerateDiskTemplate(self,
8202                                          instance.disk_template,
8203                                          instance.name, instance.primary_node,
8204                                          instance.secondary_nodes,
8205                                          [disk_dict],
8206                                          file_path,
8207                                          file_driver,
8208                                          disk_idx_base)[0]
8209         instance.disks.append(new_disk)
8210         info = _GetInstanceInfoText(instance)
8211
8212         logging.info("Creating volume %s for instance %s",
8213                      new_disk.iv_name, instance.name)
8214         # Note: this needs to be kept in sync with _CreateDisks
8215         #HARDCODE
8216         for node in instance.all_nodes:
8217           f_create = node == instance.primary_node
8218           try:
8219             _CreateBlockDev(self, node, instance, new_disk,
8220                             f_create, info, f_create)
8221           except errors.OpExecError, err:
8222             self.LogWarning("Failed to create volume %s (%s) on"
8223                             " node %s: %s",
8224                             new_disk.iv_name, new_disk, node, err)
8225         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8226                        (new_disk.size, new_disk.mode)))
8227       else:
8228         # change a given disk
8229         instance.disks[disk_op].mode = disk_dict['mode']
8230         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8231     # NIC changes
8232     for nic_op, nic_dict in self.op.nics:
8233       if nic_op == constants.DDM_REMOVE:
8234         # remove the last nic
8235         del instance.nics[-1]
8236         result.append(("nic.%d" % len(instance.nics), "remove"))
8237       elif nic_op == constants.DDM_ADD:
8238         # mac and bridge should be set, by now
8239         mac = nic_dict['mac']
8240         ip = nic_dict.get('ip', None)
8241         nicparams = self.nic_pinst[constants.DDM_ADD]
8242         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8243         instance.nics.append(new_nic)
8244         result.append(("nic.%d" % (len(instance.nics) - 1),
8245                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
8246                        (new_nic.mac, new_nic.ip,
8247                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8248                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8249                        )))
8250       else:
8251         for key in 'mac', 'ip':
8252           if key in nic_dict:
8253             setattr(instance.nics[nic_op], key, nic_dict[key])
8254         if nic_op in self.nic_pinst:
8255           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8256         for key, val in nic_dict.iteritems():
8257           result.append(("nic.%s/%d" % (key, nic_op), val))
8258
8259     # hvparams changes
8260     if self.op.hvparams:
8261       instance.hvparams = self.hv_inst
8262       for key, val in self.op.hvparams.iteritems():
8263         result.append(("hv/%s" % key, val))
8264
8265     # beparams changes
8266     if self.op.beparams:
8267       instance.beparams = self.be_inst
8268       for key, val in self.op.beparams.iteritems():
8269         result.append(("be/%s" % key, val))
8270
8271     self.cfg.Update(instance, feedback_fn)
8272
8273     return result
8274
8275
8276 class LUQueryExports(NoHooksLU):
8277   """Query the exports list
8278
8279   """
8280   _OP_REQP = ['nodes']
8281   REQ_BGL = False
8282
8283   def ExpandNames(self):
8284     self.needed_locks = {}
8285     self.share_locks[locking.LEVEL_NODE] = 1
8286     if not self.op.nodes:
8287       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8288     else:
8289       self.needed_locks[locking.LEVEL_NODE] = \
8290         _GetWantedNodes(self, self.op.nodes)
8291
8292   def CheckPrereq(self):
8293     """Check prerequisites.
8294
8295     """
8296     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8297
8298   def Exec(self, feedback_fn):
8299     """Compute the list of all the exported system images.
8300
8301     @rtype: dict
8302     @return: a dictionary with the structure node->(export-list)
8303         where export-list is a list of the instances exported on
8304         that node.
8305
8306     """
8307     rpcresult = self.rpc.call_export_list(self.nodes)
8308     result = {}
8309     for node in rpcresult:
8310       if rpcresult[node].fail_msg:
8311         result[node] = False
8312       else:
8313         result[node] = rpcresult[node].payload
8314
8315     return result
8316
8317
8318 class LUExportInstance(LogicalUnit):
8319   """Export an instance to an image in the cluster.
8320
8321   """
8322   HPATH = "instance-export"
8323   HTYPE = constants.HTYPE_INSTANCE
8324   _OP_REQP = ["instance_name", "target_node", "shutdown"]
8325   REQ_BGL = False
8326
8327   def CheckArguments(self):
8328     """Check the arguments.
8329
8330     """
8331     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8332                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
8333
8334   def ExpandNames(self):
8335     self._ExpandAndLockInstance()
8336     # FIXME: lock only instance primary and destination node
8337     #
8338     # Sad but true, for now we have do lock all nodes, as we don't know where
8339     # the previous export might be, and and in this LU we search for it and
8340     # remove it from its current node. In the future we could fix this by:
8341     #  - making a tasklet to search (share-lock all), then create the new one,
8342     #    then one to remove, after
8343     #  - removing the removal operation altogether
8344     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8345
8346   def DeclareLocks(self, level):
8347     """Last minute lock declaration."""
8348     # All nodes are locked anyway, so nothing to do here.
8349
8350   def BuildHooksEnv(self):
8351     """Build hooks env.
8352
8353     This will run on the master, primary node and target node.
8354
8355     """
8356     env = {
8357       "EXPORT_NODE": self.op.target_node,
8358       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8359       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8360       }
8361     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8362     nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8363           self.op.target_node]
8364     return env, nl, nl
8365
8366   def CheckPrereq(self):
8367     """Check prerequisites.
8368
8369     This checks that the instance and node names are valid.
8370
8371     """
8372     instance_name = self.op.instance_name
8373     self.instance = self.cfg.GetInstanceInfo(instance_name)
8374     assert self.instance is not None, \
8375           "Cannot retrieve locked instance %s" % self.op.instance_name
8376     _CheckNodeOnline(self, self.instance.primary_node)
8377
8378     self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8379     self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8380     assert self.dst_node is not None
8381
8382     _CheckNodeOnline(self, self.dst_node.name)
8383     _CheckNodeNotDrained(self, self.dst_node.name)
8384
8385     # instance disk type verification
8386     for disk in self.instance.disks:
8387       if disk.dev_type == constants.LD_FILE:
8388         raise errors.OpPrereqError("Export not supported for instances with"
8389                                    " file-based disks", errors.ECODE_INVAL)
8390
8391   def Exec(self, feedback_fn):
8392     """Export an instance to an image in the cluster.
8393
8394     """
8395     instance = self.instance
8396     dst_node = self.dst_node
8397     src_node = instance.primary_node
8398
8399     if self.op.shutdown:
8400       # shutdown the instance, but not the disks
8401       feedback_fn("Shutting down instance %s" % instance.name)
8402       result = self.rpc.call_instance_shutdown(src_node, instance,
8403                                                self.shutdown_timeout)
8404       result.Raise("Could not shutdown instance %s on"
8405                    " node %s" % (instance.name, src_node))
8406
8407     vgname = self.cfg.GetVGName()
8408
8409     snap_disks = []
8410
8411     # set the disks ID correctly since call_instance_start needs the
8412     # correct drbd minor to create the symlinks
8413     for disk in instance.disks:
8414       self.cfg.SetDiskID(disk, src_node)
8415
8416     activate_disks = (not instance.admin_up)
8417
8418     if activate_disks:
8419       # Activate the instance disks if we'exporting a stopped instance
8420       feedback_fn("Activating disks for %s" % instance.name)
8421       _StartInstanceDisks(self, instance, None)
8422
8423     try:
8424       # per-disk results
8425       dresults = []
8426       try:
8427         for idx, disk in enumerate(instance.disks):
8428           feedback_fn("Creating a snapshot of disk/%s on node %s" %
8429                       (idx, src_node))
8430
8431           # result.payload will be a snapshot of an lvm leaf of the one we
8432           # passed
8433           result = self.rpc.call_blockdev_snapshot(src_node, disk)
8434           msg = result.fail_msg
8435           if msg:
8436             self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8437                             idx, src_node, msg)
8438             snap_disks.append(False)
8439           else:
8440             disk_id = (vgname, result.payload)
8441             new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8442                                    logical_id=disk_id, physical_id=disk_id,
8443                                    iv_name=disk.iv_name)
8444             snap_disks.append(new_dev)
8445
8446       finally:
8447         if self.op.shutdown and instance.admin_up:
8448           feedback_fn("Starting instance %s" % instance.name)
8449           result = self.rpc.call_instance_start(src_node, instance, None, None)
8450           msg = result.fail_msg
8451           if msg:
8452             _ShutdownInstanceDisks(self, instance)
8453             raise errors.OpExecError("Could not start instance: %s" % msg)
8454
8455       # TODO: check for size
8456
8457       cluster_name = self.cfg.GetClusterName()
8458       for idx, dev in enumerate(snap_disks):
8459         feedback_fn("Exporting snapshot %s from %s to %s" %
8460                     (idx, src_node, dst_node.name))
8461         if dev:
8462           # FIXME: pass debug from opcode to backend
8463           result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8464                                                  instance, cluster_name,
8465                                                  idx, self.op.debug_level)
8466           msg = result.fail_msg
8467           if msg:
8468             self.LogWarning("Could not export disk/%s from node %s to"
8469                             " node %s: %s", idx, src_node, dst_node.name, msg)
8470             dresults.append(False)
8471           else:
8472             dresults.append(True)
8473           msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8474           if msg:
8475             self.LogWarning("Could not remove snapshot for disk/%d from node"
8476                             " %s: %s", idx, src_node, msg)
8477         else:
8478           dresults.append(False)
8479
8480       feedback_fn("Finalizing export on %s" % dst_node.name)
8481       result = self.rpc.call_finalize_export(dst_node.name, instance,
8482                                              snap_disks)
8483       fin_resu = True
8484       msg = result.fail_msg
8485       if msg:
8486         self.LogWarning("Could not finalize export for instance %s"
8487                         " on node %s: %s", instance.name, dst_node.name, msg)
8488         fin_resu = False
8489
8490     finally:
8491       if activate_disks:
8492         feedback_fn("Deactivating disks for %s" % instance.name)
8493         _ShutdownInstanceDisks(self, instance)
8494
8495     nodelist = self.cfg.GetNodeList()
8496     nodelist.remove(dst_node.name)
8497
8498     # on one-node clusters nodelist will be empty after the removal
8499     # if we proceed the backup would be removed because OpQueryExports
8500     # substitutes an empty list with the full cluster node list.
8501     iname = instance.name
8502     if nodelist:
8503       feedback_fn("Removing old exports for instance %s" % iname)
8504       exportlist = self.rpc.call_export_list(nodelist)
8505       for node in exportlist:
8506         if exportlist[node].fail_msg:
8507           continue
8508         if iname in exportlist[node].payload:
8509           msg = self.rpc.call_export_remove(node, iname).fail_msg
8510           if msg:
8511             self.LogWarning("Could not remove older export for instance %s"
8512                             " on node %s: %s", iname, node, msg)
8513     return fin_resu, dresults
8514
8515
8516 class LURemoveExport(NoHooksLU):
8517   """Remove exports related to the named instance.
8518
8519   """
8520   _OP_REQP = ["instance_name"]
8521   REQ_BGL = False
8522
8523   def ExpandNames(self):
8524     self.needed_locks = {}
8525     # We need all nodes to be locked in order for RemoveExport to work, but we
8526     # don't need to lock the instance itself, as nothing will happen to it (and
8527     # we can remove exports also for a removed instance)
8528     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8529
8530   def CheckPrereq(self):
8531     """Check prerequisites.
8532     """
8533     pass
8534
8535   def Exec(self, feedback_fn):
8536     """Remove any export.
8537
8538     """
8539     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8540     # If the instance was not found we'll try with the name that was passed in.
8541     # This will only work if it was an FQDN, though.
8542     fqdn_warn = False
8543     if not instance_name:
8544       fqdn_warn = True
8545       instance_name = self.op.instance_name
8546
8547     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8548     exportlist = self.rpc.call_export_list(locked_nodes)
8549     found = False
8550     for node in exportlist:
8551       msg = exportlist[node].fail_msg
8552       if msg:
8553         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8554         continue
8555       if instance_name in exportlist[node].payload:
8556         found = True
8557         result = self.rpc.call_export_remove(node, instance_name)
8558         msg = result.fail_msg
8559         if msg:
8560           logging.error("Could not remove export for instance %s"
8561                         " on node %s: %s", instance_name, node, msg)
8562
8563     if fqdn_warn and not found:
8564       feedback_fn("Export not found. If trying to remove an export belonging"
8565                   " to a deleted instance please use its Fully Qualified"
8566                   " Domain Name.")
8567
8568
8569 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8570   """Generic tags LU.
8571
8572   This is an abstract class which is the parent of all the other tags LUs.
8573
8574   """
8575
8576   def ExpandNames(self):
8577     self.needed_locks = {}
8578     if self.op.kind == constants.TAG_NODE:
8579       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
8580       self.needed_locks[locking.LEVEL_NODE] = self.op.name
8581     elif self.op.kind == constants.TAG_INSTANCE:
8582       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
8583       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
8584
8585   def CheckPrereq(self):
8586     """Check prerequisites.
8587
8588     """
8589     if self.op.kind == constants.TAG_CLUSTER:
8590       self.target = self.cfg.GetClusterInfo()
8591     elif self.op.kind == constants.TAG_NODE:
8592       self.target = self.cfg.GetNodeInfo(self.op.name)
8593     elif self.op.kind == constants.TAG_INSTANCE:
8594       self.target = self.cfg.GetInstanceInfo(self.op.name)
8595     else:
8596       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8597                                  str(self.op.kind), errors.ECODE_INVAL)
8598
8599
8600 class LUGetTags(TagsLU):
8601   """Returns the tags of a given object.
8602
8603   """
8604   _OP_REQP = ["kind", "name"]
8605   REQ_BGL = False
8606
8607   def Exec(self, feedback_fn):
8608     """Returns the tag list.
8609
8610     """
8611     return list(self.target.GetTags())
8612
8613
8614 class LUSearchTags(NoHooksLU):
8615   """Searches the tags for a given pattern.
8616
8617   """
8618   _OP_REQP = ["pattern"]
8619   REQ_BGL = False
8620
8621   def ExpandNames(self):
8622     self.needed_locks = {}
8623
8624   def CheckPrereq(self):
8625     """Check prerequisites.
8626
8627     This checks the pattern passed for validity by compiling it.
8628
8629     """
8630     try:
8631       self.re = re.compile(self.op.pattern)
8632     except re.error, err:
8633       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8634                                  (self.op.pattern, err), errors.ECODE_INVAL)
8635
8636   def Exec(self, feedback_fn):
8637     """Returns the tag list.
8638
8639     """
8640     cfg = self.cfg
8641     tgts = [("/cluster", cfg.GetClusterInfo())]
8642     ilist = cfg.GetAllInstancesInfo().values()
8643     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8644     nlist = cfg.GetAllNodesInfo().values()
8645     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8646     results = []
8647     for path, target in tgts:
8648       for tag in target.GetTags():
8649         if self.re.search(tag):
8650           results.append((path, tag))
8651     return results
8652
8653
8654 class LUAddTags(TagsLU):
8655   """Sets a tag on a given object.
8656
8657   """
8658   _OP_REQP = ["kind", "name", "tags"]
8659   REQ_BGL = False
8660
8661   def CheckPrereq(self):
8662     """Check prerequisites.
8663
8664     This checks the type and length of the tag name and value.
8665
8666     """
8667     TagsLU.CheckPrereq(self)
8668     for tag in self.op.tags:
8669       objects.TaggableObject.ValidateTag(tag)
8670
8671   def Exec(self, feedback_fn):
8672     """Sets the tag.
8673
8674     """
8675     try:
8676       for tag in self.op.tags:
8677         self.target.AddTag(tag)
8678     except errors.TagError, err:
8679       raise errors.OpExecError("Error while setting tag: %s" % str(err))
8680     self.cfg.Update(self.target, feedback_fn)
8681
8682
8683 class LUDelTags(TagsLU):
8684   """Delete a list of tags from a given object.
8685
8686   """
8687   _OP_REQP = ["kind", "name", "tags"]
8688   REQ_BGL = False
8689
8690   def CheckPrereq(self):
8691     """Check prerequisites.
8692
8693     This checks that we have the given tag.
8694
8695     """
8696     TagsLU.CheckPrereq(self)
8697     for tag in self.op.tags:
8698       objects.TaggableObject.ValidateTag(tag)
8699     del_tags = frozenset(self.op.tags)
8700     cur_tags = self.target.GetTags()
8701     if not del_tags <= cur_tags:
8702       diff_tags = del_tags - cur_tags
8703       diff_names = ["'%s'" % tag for tag in diff_tags]
8704       diff_names.sort()
8705       raise errors.OpPrereqError("Tag(s) %s not found" %
8706                                  (",".join(diff_names)), errors.ECODE_NOENT)
8707
8708   def Exec(self, feedback_fn):
8709     """Remove the tag from the object.
8710
8711     """
8712     for tag in self.op.tags:
8713       self.target.RemoveTag(tag)
8714     self.cfg.Update(self.target, feedback_fn)
8715
8716
8717 class LUTestDelay(NoHooksLU):
8718   """Sleep for a specified amount of time.
8719
8720   This LU sleeps on the master and/or nodes for a specified amount of
8721   time.
8722
8723   """
8724   _OP_REQP = ["duration", "on_master", "on_nodes"]
8725   REQ_BGL = False
8726
8727   def ExpandNames(self):
8728     """Expand names and set required locks.
8729
8730     This expands the node list, if any.
8731
8732     """
8733     self.needed_locks = {}
8734     if self.op.on_nodes:
8735       # _GetWantedNodes can be used here, but is not always appropriate to use
8736       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8737       # more information.
8738       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8739       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8740
8741   def CheckPrereq(self):
8742     """Check prerequisites.
8743
8744     """
8745
8746   def Exec(self, feedback_fn):
8747     """Do the actual sleep.
8748
8749     """
8750     if self.op.on_master:
8751       if not utils.TestDelay(self.op.duration):
8752         raise errors.OpExecError("Error during master delay test")
8753     if self.op.on_nodes:
8754       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8755       for node, node_result in result.items():
8756         node_result.Raise("Failure during rpc call to node %s" % node)
8757
8758
8759 class IAllocator(object):
8760   """IAllocator framework.
8761
8762   An IAllocator instance has three sets of attributes:
8763     - cfg that is needed to query the cluster
8764     - input data (all members of the _KEYS class attribute are required)
8765     - four buffer attributes (in|out_data|text), that represent the
8766       input (to the external script) in text and data structure format,
8767       and the output from it, again in two formats
8768     - the result variables from the script (success, info, nodes) for
8769       easy usage
8770
8771   """
8772   # pylint: disable-msg=R0902
8773   # lots of instance attributes
8774   _ALLO_KEYS = [
8775     "name", "mem_size", "disks", "disk_template",
8776     "os", "tags", "nics", "vcpus", "hypervisor",
8777     ]
8778   _RELO_KEYS = [
8779     "name", "relocate_from",
8780     ]
8781   _EVAC_KEYS = [
8782     "evac_nodes",
8783     ]
8784
8785   def __init__(self, cfg, rpc, mode, **kwargs):
8786     self.cfg = cfg
8787     self.rpc = rpc
8788     # init buffer variables
8789     self.in_text = self.out_text = self.in_data = self.out_data = None
8790     # init all input fields so that pylint is happy
8791     self.mode = mode
8792     self.mem_size = self.disks = self.disk_template = None
8793     self.os = self.tags = self.nics = self.vcpus = None
8794     self.hypervisor = None
8795     self.relocate_from = None
8796     self.name = None
8797     self.evac_nodes = None
8798     # computed fields
8799     self.required_nodes = None
8800     # init result fields
8801     self.success = self.info = self.result = None
8802     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8803       keyset = self._ALLO_KEYS
8804       fn = self._AddNewInstance
8805     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8806       keyset = self._RELO_KEYS
8807       fn = self._AddRelocateInstance
8808     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8809       keyset = self._EVAC_KEYS
8810       fn = self._AddEvacuateNodes
8811     else:
8812       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8813                                    " IAllocator" % self.mode)
8814     for key in kwargs:
8815       if key not in keyset:
8816         raise errors.ProgrammerError("Invalid input parameter '%s' to"
8817                                      " IAllocator" % key)
8818       setattr(self, key, kwargs[key])
8819
8820     for key in keyset:
8821       if key not in kwargs:
8822         raise errors.ProgrammerError("Missing input parameter '%s' to"
8823                                      " IAllocator" % key)
8824     self._BuildInputData(fn)
8825
8826   def _ComputeClusterData(self):
8827     """Compute the generic allocator input data.
8828
8829     This is the data that is independent of the actual operation.
8830
8831     """
8832     cfg = self.cfg
8833     cluster_info = cfg.GetClusterInfo()
8834     # cluster data
8835     data = {
8836       "version": constants.IALLOCATOR_VERSION,
8837       "cluster_name": cfg.GetClusterName(),
8838       "cluster_tags": list(cluster_info.GetTags()),
8839       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8840       # we don't have job IDs
8841       }
8842     iinfo = cfg.GetAllInstancesInfo().values()
8843     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8844
8845     # node data
8846     node_results = {}
8847     node_list = cfg.GetNodeList()
8848
8849     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8850       hypervisor_name = self.hypervisor
8851     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8852       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8853     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8854       hypervisor_name = cluster_info.enabled_hypervisors[0]
8855
8856     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8857                                         hypervisor_name)
8858     node_iinfo = \
8859       self.rpc.call_all_instances_info(node_list,
8860                                        cluster_info.enabled_hypervisors)
8861     for nname, nresult in node_data.items():
8862       # first fill in static (config-based) values
8863       ninfo = cfg.GetNodeInfo(nname)
8864       pnr = {
8865         "tags": list(ninfo.GetTags()),
8866         "primary_ip": ninfo.primary_ip,
8867         "secondary_ip": ninfo.secondary_ip,
8868         "offline": ninfo.offline,
8869         "drained": ninfo.drained,
8870         "master_candidate": ninfo.master_candidate,
8871         }
8872
8873       if not (ninfo.offline or ninfo.drained):
8874         nresult.Raise("Can't get data for node %s" % nname)
8875         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8876                                 nname)
8877         remote_info = nresult.payload
8878
8879         for attr in ['memory_total', 'memory_free', 'memory_dom0',
8880                      'vg_size', 'vg_free', 'cpu_total']:
8881           if attr not in remote_info:
8882             raise errors.OpExecError("Node '%s' didn't return attribute"
8883                                      " '%s'" % (nname, attr))
8884           if not isinstance(remote_info[attr], int):
8885             raise errors.OpExecError("Node '%s' returned invalid value"
8886                                      " for '%s': %s" %
8887                                      (nname, attr, remote_info[attr]))
8888         # compute memory used by primary instances
8889         i_p_mem = i_p_up_mem = 0
8890         for iinfo, beinfo in i_list:
8891           if iinfo.primary_node == nname:
8892             i_p_mem += beinfo[constants.BE_MEMORY]
8893             if iinfo.name not in node_iinfo[nname].payload:
8894               i_used_mem = 0
8895             else:
8896               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8897             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8898             remote_info['memory_free'] -= max(0, i_mem_diff)
8899
8900             if iinfo.admin_up:
8901               i_p_up_mem += beinfo[constants.BE_MEMORY]
8902
8903         # compute memory used by instances
8904         pnr_dyn = {
8905           "total_memory": remote_info['memory_total'],
8906           "reserved_memory": remote_info['memory_dom0'],
8907           "free_memory": remote_info['memory_free'],
8908           "total_disk": remote_info['vg_size'],
8909           "free_disk": remote_info['vg_free'],
8910           "total_cpus": remote_info['cpu_total'],
8911           "i_pri_memory": i_p_mem,
8912           "i_pri_up_memory": i_p_up_mem,
8913           }
8914         pnr.update(pnr_dyn)
8915
8916       node_results[nname] = pnr
8917     data["nodes"] = node_results
8918
8919     # instance data
8920     instance_data = {}
8921     for iinfo, beinfo in i_list:
8922       nic_data = []
8923       for nic in iinfo.nics:
8924         filled_params = objects.FillDict(
8925             cluster_info.nicparams[constants.PP_DEFAULT],
8926             nic.nicparams)
8927         nic_dict = {"mac": nic.mac,
8928                     "ip": nic.ip,
8929                     "mode": filled_params[constants.NIC_MODE],
8930                     "link": filled_params[constants.NIC_LINK],
8931                    }
8932         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8933           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8934         nic_data.append(nic_dict)
8935       pir = {
8936         "tags": list(iinfo.GetTags()),
8937         "admin_up": iinfo.admin_up,
8938         "vcpus": beinfo[constants.BE_VCPUS],
8939         "memory": beinfo[constants.BE_MEMORY],
8940         "os": iinfo.os,
8941         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8942         "nics": nic_data,
8943         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8944         "disk_template": iinfo.disk_template,
8945         "hypervisor": iinfo.hypervisor,
8946         }
8947       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8948                                                  pir["disks"])
8949       instance_data[iinfo.name] = pir
8950
8951     data["instances"] = instance_data
8952
8953     self.in_data = data
8954
8955   def _AddNewInstance(self):
8956     """Add new instance data to allocator structure.
8957
8958     This in combination with _AllocatorGetClusterData will create the
8959     correct structure needed as input for the allocator.
8960
8961     The checks for the completeness of the opcode must have already been
8962     done.
8963
8964     """
8965     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8966
8967     if self.disk_template in constants.DTS_NET_MIRROR:
8968       self.required_nodes = 2
8969     else:
8970       self.required_nodes = 1
8971     request = {
8972       "name": self.name,
8973       "disk_template": self.disk_template,
8974       "tags": self.tags,
8975       "os": self.os,
8976       "vcpus": self.vcpus,
8977       "memory": self.mem_size,
8978       "disks": self.disks,
8979       "disk_space_total": disk_space,
8980       "nics": self.nics,
8981       "required_nodes": self.required_nodes,
8982       }
8983     return request
8984
8985   def _AddRelocateInstance(self):
8986     """Add relocate instance data to allocator structure.
8987
8988     This in combination with _IAllocatorGetClusterData will create the
8989     correct structure needed as input for the allocator.
8990
8991     The checks for the completeness of the opcode must have already been
8992     done.
8993
8994     """
8995     instance = self.cfg.GetInstanceInfo(self.name)
8996     if instance is None:
8997       raise errors.ProgrammerError("Unknown instance '%s' passed to"
8998                                    " IAllocator" % self.name)
8999
9000     if instance.disk_template not in constants.DTS_NET_MIRROR:
9001       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9002                                  errors.ECODE_INVAL)
9003
9004     if len(instance.secondary_nodes) != 1:
9005       raise errors.OpPrereqError("Instance has not exactly one secondary node",
9006                                  errors.ECODE_STATE)
9007
9008     self.required_nodes = 1
9009     disk_sizes = [{'size': disk.size} for disk in instance.disks]
9010     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9011
9012     request = {
9013       "name": self.name,
9014       "disk_space_total": disk_space,
9015       "required_nodes": self.required_nodes,
9016       "relocate_from": self.relocate_from,
9017       }
9018     return request
9019
9020   def _AddEvacuateNodes(self):
9021     """Add evacuate nodes data to allocator structure.
9022
9023     """
9024     request = {
9025       "evac_nodes": self.evac_nodes
9026       }
9027     return request
9028
9029   def _BuildInputData(self, fn):
9030     """Build input data structures.
9031
9032     """
9033     self._ComputeClusterData()
9034
9035     request = fn()
9036     request["type"] = self.mode
9037     self.in_data["request"] = request
9038
9039     self.in_text = serializer.Dump(self.in_data)
9040
9041   def Run(self, name, validate=True, call_fn=None):
9042     """Run an instance allocator and return the results.
9043
9044     """
9045     if call_fn is None:
9046       call_fn = self.rpc.call_iallocator_runner
9047
9048     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9049     result.Raise("Failure while running the iallocator script")
9050
9051     self.out_text = result.payload
9052     if validate:
9053       self._ValidateResult()
9054
9055   def _ValidateResult(self):
9056     """Process the allocator results.
9057
9058     This will process and if successful save the result in
9059     self.out_data and the other parameters.
9060
9061     """
9062     try:
9063       rdict = serializer.Load(self.out_text)
9064     except Exception, err:
9065       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9066
9067     if not isinstance(rdict, dict):
9068       raise errors.OpExecError("Can't parse iallocator results: not a dict")
9069
9070     # TODO: remove backwards compatiblity in later versions
9071     if "nodes" in rdict and "result" not in rdict:
9072       rdict["result"] = rdict["nodes"]
9073       del rdict["nodes"]
9074
9075     for key in "success", "info", "result":
9076       if key not in rdict:
9077         raise errors.OpExecError("Can't parse iallocator results:"
9078                                  " missing key '%s'" % key)
9079       setattr(self, key, rdict[key])
9080
9081     if not isinstance(rdict["result"], list):
9082       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9083                                " is not a list")
9084     self.out_data = rdict
9085
9086
9087 class LUTestAllocator(NoHooksLU):
9088   """Run allocator tests.
9089
9090   This LU runs the allocator tests
9091
9092   """
9093   _OP_REQP = ["direction", "mode", "name"]
9094
9095   def CheckPrereq(self):
9096     """Check prerequisites.
9097
9098     This checks the opcode parameters depending on the director and mode test.
9099
9100     """
9101     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9102       for attr in ["name", "mem_size", "disks", "disk_template",
9103                    "os", "tags", "nics", "vcpus"]:
9104         if not hasattr(self.op, attr):
9105           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9106                                      attr, errors.ECODE_INVAL)
9107       iname = self.cfg.ExpandInstanceName(self.op.name)
9108       if iname is not None:
9109         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9110                                    iname, errors.ECODE_EXISTS)
9111       if not isinstance(self.op.nics, list):
9112         raise errors.OpPrereqError("Invalid parameter 'nics'",
9113                                    errors.ECODE_INVAL)
9114       for row in self.op.nics:
9115         if (not isinstance(row, dict) or
9116             "mac" not in row or
9117             "ip" not in row or
9118             "bridge" not in row):
9119           raise errors.OpPrereqError("Invalid contents of the 'nics'"
9120                                      " parameter", errors.ECODE_INVAL)
9121       if not isinstance(self.op.disks, list):
9122         raise errors.OpPrereqError("Invalid parameter 'disks'",
9123                                    errors.ECODE_INVAL)
9124       for row in self.op.disks:
9125         if (not isinstance(row, dict) or
9126             "size" not in row or
9127             not isinstance(row["size"], int) or
9128             "mode" not in row or
9129             row["mode"] not in ['r', 'w']):
9130           raise errors.OpPrereqError("Invalid contents of the 'disks'"
9131                                      " parameter", errors.ECODE_INVAL)
9132       if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9133         self.op.hypervisor = self.cfg.GetHypervisorType()
9134     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9135       if not hasattr(self.op, "name"):
9136         raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9137                                    errors.ECODE_INVAL)
9138       fname = _ExpandInstanceName(self.cfg, self.op.name)
9139       self.op.name = fname
9140       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9141     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9142       if not hasattr(self.op, "evac_nodes"):
9143         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9144                                    " opcode input", errors.ECODE_INVAL)
9145     else:
9146       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9147                                  self.op.mode, errors.ECODE_INVAL)
9148
9149     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9150       if not hasattr(self.op, "allocator") or self.op.allocator is None:
9151         raise errors.OpPrereqError("Missing allocator name",
9152                                    errors.ECODE_INVAL)
9153     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9154       raise errors.OpPrereqError("Wrong allocator test '%s'" %
9155                                  self.op.direction, errors.ECODE_INVAL)
9156
9157   def Exec(self, feedback_fn):
9158     """Run the allocator test.
9159
9160     """
9161     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9162       ial = IAllocator(self.cfg, self.rpc,
9163                        mode=self.op.mode,
9164                        name=self.op.name,
9165                        mem_size=self.op.mem_size,
9166                        disks=self.op.disks,
9167                        disk_template=self.op.disk_template,
9168                        os=self.op.os,
9169                        tags=self.op.tags,
9170                        nics=self.op.nics,
9171                        vcpus=self.op.vcpus,
9172                        hypervisor=self.op.hypervisor,
9173                        )
9174     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9175       ial = IAllocator(self.cfg, self.rpc,
9176                        mode=self.op.mode,
9177                        name=self.op.name,
9178                        relocate_from=list(self.relocate_from),
9179                        )
9180     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9181       ial = IAllocator(self.cfg, self.rpc,
9182                        mode=self.op.mode,
9183                        evac_nodes=self.op.evac_nodes)
9184     else:
9185       raise errors.ProgrammerError("Uncatched mode %s in"
9186                                    " LUTestAllocator.Exec", self.op.mode)
9187
9188     if self.op.direction == constants.IALLOCATOR_DIR_IN:
9189       result = ial.in_text
9190     else:
9191       ial.Run(self.op.allocator, validate=False)
9192       result = ial.out_text
9193     return result