code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 import os
  30 import os.path
  31 import time
  32 import re
  33 import platform
  34 import logging
  35 import copy
  36 import OpenSSL
  37
  38 from ganeti import ssh
  39 from ganeti import utils
  40 from ganeti import errors
  41 from ganeti import hypervisor
  42 from ganeti import locking
  43 from ganeti import constants
  44 from ganeti import objects
  45 from ganeti import serializer
  46 from ganeti import ssconf
  47
  48
  49 class LogicalUnit(object):
  50   """Logical Unit base class.
  51
  52   Subclasses must follow these rules:
  53     - implement ExpandNames
  54     - implement CheckPrereq (except when tasklets are used)
  55     - implement Exec (except when tasklets are used)
  56     - implement BuildHooksEnv
  57     - redefine HPATH and HTYPE
  58     - optionally redefine their run requirements:
  59         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  60
  61   Note that all commands require root permissions.
  62
  63   @ivar dry_run_result: the value (if any) that will be returned to the caller
  64       in dry-run mode (signalled by opcode dry_run parameter)
  65
  66   """
  67   HPATH = None
  68   HTYPE = None
  69   _OP_REQP = []
  70   REQ_BGL = True
  71
  72   def __init__(self, processor, op, context, rpc):
  73     """Constructor for LogicalUnit.
  74
  75     This needs to be overridden in derived classes in order to check op
  76     validity.
  77
  78     """
  79     self.proc = processor
  80     self.op = op
  81     self.cfg = context.cfg
  82     self.context = context
  83     self.rpc = rpc
  84     # Dicts used to declare locking needs to mcpu
  85     self.needed_locks = None
  86     self.acquired_locks = {}
  87     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
  88     self.add_locks = {}
  89     self.remove_locks = {}
  90     # Used to force good behavior when calling helper functions
  91     self.recalculate_locks = {}
  92     self.__ssh = None
  93     # logging
  94     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
  95     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
  96     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
  97     # support for dry-run
  98     self.dry_run_result = None
  99     # support for generic debug attribute
 100     if (not hasattr(self.op, "debug_level") or
 101         not isinstance(self.op.debug_level, int)):
 102       self.op.debug_level = 0
 103
 104     # Tasklets
 105     self.tasklets = None
 106
 107     for attr_name in self._OP_REQP:
 108       attr_val = getattr(op, attr_name, None)
 109       if attr_val is None:
 110         raise errors.OpPrereqError("Required parameter '%s' missing" %
 111                                    attr_name, errors.ECODE_INVAL)
 112
 113     self.CheckArguments()
 114
 115   def __GetSSH(self):
 116     """Returns the SshRunner object
 117
 118     """
 119     if not self.__ssh:
 120       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 121     return self.__ssh
 122
 123   ssh = property(fget=__GetSSH)
 124
 125   def CheckArguments(self):
 126     """Check syntactic validity for the opcode arguments.
 127
 128     This method is for doing a simple syntactic check and ensure
 129     validity of opcode parameters, without any cluster-related
 130     checks. While the same can be accomplished in ExpandNames and/or
 131     CheckPrereq, doing these separate is better because:
 132
 133       - ExpandNames is left as as purely a lock-related function
 134       - CheckPrereq is run after we have acquired locks (and possible
 135         waited for them)
 136
 137     The function is allowed to change the self.op attribute so that
 138     later methods can no longer worry about missing parameters.
 139
 140     """
 141     pass
 142
 143   def ExpandNames(self):
 144     """Expand names for this LU.
 145
 146     This method is called before starting to execute the opcode, and it should
 147     update all the parameters of the opcode to their canonical form (e.g. a
 148     short node name must be fully expanded after this method has successfully
 149     completed). This way locking, hooks, logging, ecc. can work correctly.
 150
 151     LUs which implement this method must also populate the self.needed_locks
 152     member, as a dict with lock levels as keys, and a list of needed lock names
 153     as values. Rules:
 154
 155       - use an empty dict if you don't need any lock
 156       - if you don't need any lock at a particular level omit that level
 157       - don't put anything for the BGL level
 158       - if you want all locks at a level use locking.ALL_SET as a value
 159
 160     If you need to share locks (rather than acquire them exclusively) at one
 161     level you can modify self.share_locks, setting a true value (usually 1) for
 162     that level. By default locks are not shared.
 163
 164     This function can also define a list of tasklets, which then will be
 165     executed in order instead of the usual LU-level CheckPrereq and Exec
 166     functions, if those are not defined by the LU.
 167
 168     Examples::
 169
 170       # Acquire all nodes and one instance
 171       self.needed_locks = {
 172         locking.LEVEL_NODE: locking.ALL_SET,
 173         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 174       }
 175       # Acquire just two nodes
 176       self.needed_locks = {
 177         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 178       }
 179       # Acquire no locks
 180       self.needed_locks = {} # No, you can't leave it to the default value None
 181
 182     """
 183     # The implementation of this method is mandatory only if the new LU is
 184     # concurrent, so that old LUs don't need to be changed all at the same
 185     # time.
 186     if self.REQ_BGL:
 187       self.needed_locks = {} # Exclusive LUs don't need locks.
 188     else:
 189       raise NotImplementedError
 190
 191   def DeclareLocks(self, level):
 192     """Declare LU locking needs for a level
 193
 194     While most LUs can just declare their locking needs at ExpandNames time,
 195     sometimes there's the need to calculate some locks after having acquired
 196     the ones before. This function is called just before acquiring locks at a
 197     particular level, but after acquiring the ones at lower levels, and permits
 198     such calculations. It can be used to modify self.needed_locks, and by
 199     default it does nothing.
 200
 201     This function is only called if you have something already set in
 202     self.needed_locks for the level.
 203
 204     @param level: Locking level which is going to be locked
 205     @type level: member of ganeti.locking.LEVELS
 206
 207     """
 208
 209   def CheckPrereq(self):
 210     """Check prerequisites for this LU.
 211
 212     This method should check that the prerequisites for the execution
 213     of this LU are fulfilled. It can do internode communication, but
 214     it should be idempotent - no cluster or system changes are
 215     allowed.
 216
 217     The method should raise errors.OpPrereqError in case something is
 218     not fulfilled. Its return value is ignored.
 219
 220     This method should also update all the parameters of the opcode to
 221     their canonical form if it hasn't been done by ExpandNames before.
 222
 223     """
 224     if self.tasklets is not None:
 225       for (idx, tl) in enumerate(self.tasklets):
 226         logging.debug("Checking prerequisites for tasklet %s/%s",
 227                       idx + 1, len(self.tasklets))
 228         tl.CheckPrereq()
 229     else:
 230       raise NotImplementedError
 231
 232   def Exec(self, feedback_fn):
 233     """Execute the LU.
 234
 235     This method should implement the actual work. It should raise
 236     errors.OpExecError for failures that are somewhat dealt with in
 237     code, or expected.
 238
 239     """
 240     if self.tasklets is not None:
 241       for (idx, tl) in enumerate(self.tasklets):
 242         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 243         tl.Exec(feedback_fn)
 244     else:
 245       raise NotImplementedError
 246
 247   def BuildHooksEnv(self):
 248     """Build hooks environment for this LU.
 249
 250     This method should return a three-node tuple consisting of: a dict
 251     containing the environment that will be used for running the
 252     specific hook for this LU, a list of node names on which the hook
 253     should run before the execution, and a list of node names on which
 254     the hook should run after the execution.
 255
 256     The keys of the dict must not have 'GANETI_' prefixed as this will
 257     be handled in the hooks runner. Also note additional keys will be
 258     added by the hooks runner. If the LU doesn't define any
 259     environment, an empty dict (and not None) should be returned.
 260
 261     No nodes should be returned as an empty list (and not None).
 262
 263     Note that if the HPATH for a LU class is None, this function will
 264     not be called.
 265
 266     """
 267     raise NotImplementedError
 268
 269   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 270     """Notify the LU about the results of its hooks.
 271
 272     This method is called every time a hooks phase is executed, and notifies
 273     the Logical Unit about the hooks' result. The LU can then use it to alter
 274     its result based on the hooks.  By default the method does nothing and the
 275     previous result is passed back unchanged but any LU can define it if it
 276     wants to use the local cluster hook-scripts somehow.
 277
 278     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 279         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 280     @param hook_results: the results of the multi-node hooks rpc call
 281     @param feedback_fn: function used send feedback back to the caller
 282     @param lu_result: the previous Exec result this LU had, or None
 283         in the PRE phase
 284     @return: the new Exec result, based on the previous result
 285         and hook results
 286
 287     """
 288     # API must be kept, thus we ignore the unused argument and could
 289     # be a function warnings
 290     # pylint: disable-msg=W0613,R0201
 291     return lu_result
 292
 293   def _ExpandAndLockInstance(self):
 294     """Helper function to expand and lock an instance.
 295
 296     Many LUs that work on an instance take its name in self.op.instance_name
 297     and need to expand it and then declare the expanded name for locking. This
 298     function does it, and then updates self.op.instance_name to the expanded
 299     name. It also initializes needed_locks as a dict, if this hasn't been done
 300     before.
 301
 302     """
 303     if self.needed_locks is None:
 304       self.needed_locks = {}
 305     else:
 306       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 307         "_ExpandAndLockInstance called with instance-level locks set"
 308     self.op.instance_name = _ExpandInstanceName(self.cfg,
 309                                                 self.op.instance_name)
 310     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 311
 312   def _LockInstancesNodes(self, primary_only=False):
 313     """Helper function to declare instances' nodes for locking.
 314
 315     This function should be called after locking one or more instances to lock
 316     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 317     with all primary or secondary nodes for instances already locked and
 318     present in self.needed_locks[locking.LEVEL_INSTANCE].
 319
 320     It should be called from DeclareLocks, and for safety only works if
 321     self.recalculate_locks[locking.LEVEL_NODE] is set.
 322
 323     In the future it may grow parameters to just lock some instance's nodes, or
 324     to just lock primaries or secondary nodes, if needed.
 325
 326     If should be called in DeclareLocks in a way similar to::
 327
 328       if level == locking.LEVEL_NODE:
 329         self._LockInstancesNodes()
 330
 331     @type primary_only: boolean
 332     @param primary_only: only lock primary nodes of locked instances
 333
 334     """
 335     assert locking.LEVEL_NODE in self.recalculate_locks, \
 336       "_LockInstancesNodes helper function called with no nodes to recalculate"
 337
 338     # TODO: check if we're really been called with the instance locks held
 339
 340     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 341     # future we might want to have different behaviors depending on the value
 342     # of self.recalculate_locks[locking.LEVEL_NODE]
 343     wanted_nodes = []
 344     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 345       instance = self.context.cfg.GetInstanceInfo(instance_name)
 346       wanted_nodes.append(instance.primary_node)
 347       if not primary_only:
 348         wanted_nodes.extend(instance.secondary_nodes)
 349
 350     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 351       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 352     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 353       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 354
 355     del self.recalculate_locks[locking.LEVEL_NODE]
 356
 357
 358 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 359   """Simple LU which runs no hooks.
 360
 361   This LU is intended as a parent for other LogicalUnits which will
 362   run no hooks, in order to reduce duplicate code.
 363
 364   """
 365   HPATH = None
 366   HTYPE = None
 367
 368   def BuildHooksEnv(self):
 369     """Empty BuildHooksEnv for NoHooksLu.
 370
 371     This just raises an error.
 372
 373     """
 374     assert False, "BuildHooksEnv called for NoHooksLUs"
 375
 376
 377 class Tasklet:
 378   """Tasklet base class.
 379
 380   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 381   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 382   tasklets know nothing about locks.
 383
 384   Subclasses must follow these rules:
 385     - Implement CheckPrereq
 386     - Implement Exec
 387
 388   """
 389   def __init__(self, lu):
 390     self.lu = lu
 391
 392     # Shortcuts
 393     self.cfg = lu.cfg
 394     self.rpc = lu.rpc
 395
 396   def CheckPrereq(self):
 397     """Check prerequisites for this tasklets.
 398
 399     This method should check whether the prerequisites for the execution of
 400     this tasklet are fulfilled. It can do internode communication, but it
 401     should be idempotent - no cluster or system changes are allowed.
 402
 403     The method should raise errors.OpPrereqError in case something is not
 404     fulfilled. Its return value is ignored.
 405
 406     This method should also update all parameters to their canonical form if it
 407     hasn't been done before.
 408
 409     """
 410     raise NotImplementedError
 411
 412   def Exec(self, feedback_fn):
 413     """Execute the tasklet.
 414
 415     This method should implement the actual work. It should raise
 416     errors.OpExecError for failures that are somewhat dealt with in code, or
 417     expected.
 418
 419     """
 420     raise NotImplementedError
 421
 422
 423 def _GetWantedNodes(lu, nodes):
 424   """Returns list of checked and expanded node names.
 425
 426   @type lu: L{LogicalUnit}
 427   @param lu: the logical unit on whose behalf we execute
 428   @type nodes: list
 429   @param nodes: list of node names or None for all nodes
 430   @rtype: list
 431   @return: the list of nodes, sorted
 432   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 433
 434   """
 435   if not isinstance(nodes, list):
 436     raise errors.OpPrereqError("Invalid argument type 'nodes'",
 437                                errors.ECODE_INVAL)
 438
 439   if not nodes:
 440     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 441       " non-empty list of nodes whose name is to be expanded.")
 442
 443   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 444   return utils.NiceSort(wanted)
 445
 446
 447 def _GetWantedInstances(lu, instances):
 448   """Returns list of checked and expanded instance names.
 449
 450   @type lu: L{LogicalUnit}
 451   @param lu: the logical unit on whose behalf we execute
 452   @type instances: list
 453   @param instances: list of instance names or None for all instances
 454   @rtype: list
 455   @return: the list of instances, sorted
 456   @raise errors.OpPrereqError: if the instances parameter is wrong type
 457   @raise errors.OpPrereqError: if any of the passed instances is not found
 458
 459   """
 460   if not isinstance(instances, list):
 461     raise errors.OpPrereqError("Invalid argument type 'instances'",
 462                                errors.ECODE_INVAL)
 463
 464   if instances:
 465     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 466   else:
 467     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 468   return wanted
 469
 470
 471 def _CheckOutputFields(static, dynamic, selected):
 472   """Checks whether all selected fields are valid.
 473
 474   @type static: L{utils.FieldSet}
 475   @param static: static fields set
 476   @type dynamic: L{utils.FieldSet}
 477   @param dynamic: dynamic fields set
 478
 479   """
 480   f = utils.FieldSet()
 481   f.Extend(static)
 482   f.Extend(dynamic)
 483
 484   delta = f.NonMatching(selected)
 485   if delta:
 486     raise errors.OpPrereqError("Unknown output fields selected: %s"
 487                                % ",".join(delta), errors.ECODE_INVAL)
 488
 489
 490 def _CheckBooleanOpField(op, name):
 491   """Validates boolean opcode parameters.
 492
 493   This will ensure that an opcode parameter is either a boolean value,
 494   or None (but that it always exists).
 495
 496   """
 497   val = getattr(op, name, None)
 498   if not (val is None or isinstance(val, bool)):
 499     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 500                                (name, str(val)), errors.ECODE_INVAL)
 501   setattr(op, name, val)
 502
 503
 504 def _CheckGlobalHvParams(params):
 505   """Validates that given hypervisor params are not global ones.
 506
 507   This will ensure that instances don't get customised versions of
 508   global params.
 509
 510   """
 511   used_globals = constants.HVC_GLOBALS.intersection(params)
 512   if used_globals:
 513     msg = ("The following hypervisor parameters are global and cannot"
 514            " be customized at instance level, please modify them at"
 515            " cluster level: %s" % utils.CommaJoin(used_globals))
 516     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 517
 518
 519 def _CheckNodeOnline(lu, node):
 520   """Ensure that a given node is online.
 521
 522   @param lu: the LU on behalf of which we make the check
 523   @param node: the node to check
 524   @raise errors.OpPrereqError: if the node is offline
 525
 526   """
 527   if lu.cfg.GetNodeInfo(node).offline:
 528     raise errors.OpPrereqError("Can't use offline node %s" % node,
 529                                errors.ECODE_INVAL)
 530
 531
 532 def _CheckNodeNotDrained(lu, node):
 533   """Ensure that a given node is not drained.
 534
 535   @param lu: the LU on behalf of which we make the check
 536   @param node: the node to check
 537   @raise errors.OpPrereqError: if the node is drained
 538
 539   """
 540   if lu.cfg.GetNodeInfo(node).drained:
 541     raise errors.OpPrereqError("Can't use drained node %s" % node,
 542                                errors.ECODE_INVAL)
 543
 544
 545 def _CheckDiskTemplate(template):
 546   """Ensure a given disk template is valid.
 547
 548   """
 549   if template not in constants.DISK_TEMPLATES:
 550     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 551            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 552     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 553
 554
 555 def _CheckInstanceDown(lu, instance, reason):
 556   """Ensure that an instance is not running."""
 557   if instance.admin_up:
 558     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 559                                (instance.name, reason), errors.ECODE_STATE)
 560
 561   pnode = instance.primary_node
 562   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 563   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 564               prereq=True, ecode=errors.ECODE_ENVIRON)
 565
 566   if instance.name in ins_l.payload:
 567     raise errors.OpPrereqError("Instance %s is running, %s" %
 568                                (instance.name, reason), errors.ECODE_STATE)
 569
 570
 571 def _ExpandItemName(fn, name, kind):
 572   """Expand an item name.
 573
 574   @param fn: the function to use for expansion
 575   @param name: requested item name
 576   @param kind: text description ('Node' or 'Instance')
 577   @return: the resolved (full) name
 578   @raise errors.OpPrereqError: if the item is not found
 579
 580   """
 581   full_name = fn(name)
 582   if full_name is None:
 583     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 584                                errors.ECODE_NOENT)
 585   return full_name
 586
 587
 588 def _ExpandNodeName(cfg, name):
 589   """Wrapper over L{_ExpandItemName} for nodes."""
 590   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 591
 592
 593 def _ExpandInstanceName(cfg, name):
 594   """Wrapper over L{_ExpandItemName} for instance."""
 595   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 596
 597
 598 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 599                           memory, vcpus, nics, disk_template, disks,
 600                           bep, hvp, hypervisor_name):
 601   """Builds instance related env variables for hooks
 602
 603   This builds the hook environment from individual variables.
 604
 605   @type name: string
 606   @param name: the name of the instance
 607   @type primary_node: string
 608   @param primary_node: the name of the instance's primary node
 609   @type secondary_nodes: list
 610   @param secondary_nodes: list of secondary nodes as strings
 611   @type os_type: string
 612   @param os_type: the name of the instance's OS
 613   @type status: boolean
 614   @param status: the should_run status of the instance
 615   @type memory: string
 616   @param memory: the memory size of the instance
 617   @type vcpus: string
 618   @param vcpus: the count of VCPUs the instance has
 619   @type nics: list
 620   @param nics: list of tuples (ip, mac, mode, link) representing
 621       the NICs the instance has
 622   @type disk_template: string
 623   @param disk_template: the disk template of the instance
 624   @type disks: list
 625   @param disks: the list of (size, mode) pairs
 626   @type bep: dict
 627   @param bep: the backend parameters for the instance
 628   @type hvp: dict
 629   @param hvp: the hypervisor parameters for the instance
 630   @type hypervisor_name: string
 631   @param hypervisor_name: the hypervisor for the instance
 632   @rtype: dict
 633   @return: the hook environment for this instance
 634
 635   """
 636   if status:
 637     str_status = "up"
 638   else:
 639     str_status = "down"
 640   env = {
 641     "OP_TARGET": name,
 642     "INSTANCE_NAME": name,
 643     "INSTANCE_PRIMARY": primary_node,
 644     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 645     "INSTANCE_OS_TYPE": os_type,
 646     "INSTANCE_STATUS": str_status,
 647     "INSTANCE_MEMORY": memory,
 648     "INSTANCE_VCPUS": vcpus,
 649     "INSTANCE_DISK_TEMPLATE": disk_template,
 650     "INSTANCE_HYPERVISOR": hypervisor_name,
 651   }
 652
 653   if nics:
 654     nic_count = len(nics)
 655     for idx, (ip, mac, mode, link) in enumerate(nics):
 656       if ip is None:
 657         ip = ""
 658       env["INSTANCE_NIC%d_IP" % idx] = ip
 659       env["INSTANCE_NIC%d_MAC" % idx] = mac
 660       env["INSTANCE_NIC%d_MODE" % idx] = mode
 661       env["INSTANCE_NIC%d_LINK" % idx] = link
 662       if mode == constants.NIC_MODE_BRIDGED:
 663         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 664   else:
 665     nic_count = 0
 666
 667   env["INSTANCE_NIC_COUNT"] = nic_count
 668
 669   if disks:
 670     disk_count = len(disks)
 671     for idx, (size, mode) in enumerate(disks):
 672       env["INSTANCE_DISK%d_SIZE" % idx] = size
 673       env["INSTANCE_DISK%d_MODE" % idx] = mode
 674   else:
 675     disk_count = 0
 676
 677   env["INSTANCE_DISK_COUNT"] = disk_count
 678
 679   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 680     for key, value in source.items():
 681       env["INSTANCE_%s_%s" % (kind, key)] = value
 682
 683   return env
 684
 685
 686 def _NICListToTuple(lu, nics):
 687   """Build a list of nic information tuples.
 688
 689   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 690   value in LUQueryInstanceData.
 691
 692   @type lu:  L{LogicalUnit}
 693   @param lu: the logical unit on whose behalf we execute
 694   @type nics: list of L{objects.NIC}
 695   @param nics: list of nics to convert to hooks tuples
 696
 697   """
 698   hooks_nics = []
 699   c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
 700   for nic in nics:
 701     ip = nic.ip
 702     mac = nic.mac
 703     filled_params = objects.FillDict(c_nicparams, nic.nicparams)
 704     mode = filled_params[constants.NIC_MODE]
 705     link = filled_params[constants.NIC_LINK]
 706     hooks_nics.append((ip, mac, mode, link))
 707   return hooks_nics
 708
 709
 710 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 711   """Builds instance related env variables for hooks from an object.
 712
 713   @type lu: L{LogicalUnit}
 714   @param lu: the logical unit on whose behalf we execute
 715   @type instance: L{objects.Instance}
 716   @param instance: the instance for which we should build the
 717       environment
 718   @type override: dict
 719   @param override: dictionary with key/values that will override
 720       our values
 721   @rtype: dict
 722   @return: the hook environment dictionary
 723
 724   """
 725   cluster = lu.cfg.GetClusterInfo()
 726   bep = cluster.FillBE(instance)
 727   hvp = cluster.FillHV(instance)
 728   args = {
 729     'name': instance.name,
 730     'primary_node': instance.primary_node,
 731     'secondary_nodes': instance.secondary_nodes,
 732     'os_type': instance.os,
 733     'status': instance.admin_up,
 734     'memory': bep[constants.BE_MEMORY],
 735     'vcpus': bep[constants.BE_VCPUS],
 736     'nics': _NICListToTuple(lu, instance.nics),
 737     'disk_template': instance.disk_template,
 738     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 739     'bep': bep,
 740     'hvp': hvp,
 741     'hypervisor_name': instance.hypervisor,
 742   }
 743   if override:
 744     args.update(override)
 745   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 746
 747
 748 def _AdjustCandidatePool(lu, exceptions):
 749   """Adjust the candidate pool after node operations.
 750
 751   """
 752   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 753   if mod_list:
 754     lu.LogInfo("Promoted nodes to master candidate role: %s",
 755                utils.CommaJoin(node.name for node in mod_list))
 756     for name in mod_list:
 757       lu.context.ReaddNode(name)
 758   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 759   if mc_now > mc_max:
 760     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 761                (mc_now, mc_max))
 762
 763
 764 def _DecideSelfPromotion(lu, exceptions=None):
 765   """Decide whether I should promote myself as a master candidate.
 766
 767   """
 768   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 769   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 770   # the new node will increase mc_max with one, so:
 771   mc_should = min(mc_should + 1, cp_size)
 772   return mc_now < mc_should
 773
 774
 775 def _CheckNicsBridgesExist(lu, target_nics, target_node,
 776                                profile=constants.PP_DEFAULT):
 777   """Check that the brigdes needed by a list of nics exist.
 778
 779   """
 780   c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
 781   paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
 782                 for nic in target_nics]
 783   brlist = [params[constants.NIC_LINK] for params in paramslist
 784             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 785   if brlist:
 786     result = lu.rpc.call_bridges_exist(target_node, brlist)
 787     result.Raise("Error checking bridges on destination node '%s'" %
 788                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 789
 790
 791 def _CheckInstanceBridgesExist(lu, instance, node=None):
 792   """Check that the brigdes needed by an instance exist.
 793
 794   """
 795   if node is None:
 796     node = instance.primary_node
 797   _CheckNicsBridgesExist(lu, instance.nics, node)
 798
 799
 800 def _CheckOSVariant(os_obj, name):
 801   """Check whether an OS name conforms to the os variants specification.
 802
 803   @type os_obj: L{objects.OS}
 804   @param os_obj: OS object to check
 805   @type name: string
 806   @param name: OS name passed by the user, to check for validity
 807
 808   """
 809   if not os_obj.supported_variants:
 810     return
 811   try:
 812     variant = name.split("+", 1)[1]
 813   except IndexError:
 814     raise errors.OpPrereqError("OS name must include a variant",
 815                                errors.ECODE_INVAL)
 816
 817   if variant not in os_obj.supported_variants:
 818     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
 819
 820
 821 def _GetNodeInstancesInner(cfg, fn):
 822   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 823
 824
 825 def _GetNodeInstances(cfg, node_name):
 826   """Returns a list of all primary and secondary instances on a node.
 827
 828   """
 829
 830   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
 831
 832
 833 def _GetNodePrimaryInstances(cfg, node_name):
 834   """Returns primary instances on a node.
 835
 836   """
 837   return _GetNodeInstancesInner(cfg,
 838                                 lambda inst: node_name == inst.primary_node)
 839
 840
 841 def _GetNodeSecondaryInstances(cfg, node_name):
 842   """Returns secondary instances on a node.
 843
 844   """
 845   return _GetNodeInstancesInner(cfg,
 846                                 lambda inst: node_name in inst.secondary_nodes)
 847
 848
 849 def _GetStorageTypeArgs(cfg, storage_type):
 850   """Returns the arguments for a storage type.
 851
 852   """
 853   # Special case for file storage
 854   if storage_type == constants.ST_FILE:
 855     # storage.FileStorage wants a list of storage directories
 856     return [[cfg.GetFileStorageDir()]]
 857
 858   return []
 859
 860
 861 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
 862   faulty = []
 863
 864   for dev in instance.disks:
 865     cfg.SetDiskID(dev, node_name)
 866
 867   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
 868   result.Raise("Failed to get disk status from node %s" % node_name,
 869                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 870
 871   for idx, bdev_status in enumerate(result.payload):
 872     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
 873       faulty.append(idx)
 874
 875   return faulty
 876
 877
 878 def _FormatTimestamp(secs):
 879   """Formats a Unix timestamp with the local timezone.
 880
 881   """
 882   return time.strftime("%F %T %Z", time.gmtime(secs))
 883
 884
 885 class LUPostInitCluster(LogicalUnit):
 886   """Logical unit for running hooks after cluster initialization.
 887
 888   """
 889   HPATH = "cluster-init"
 890   HTYPE = constants.HTYPE_CLUSTER
 891   _OP_REQP = []
 892
 893   def BuildHooksEnv(self):
 894     """Build hooks env.
 895
 896     """
 897     env = {"OP_TARGET": self.cfg.GetClusterName()}
 898     mn = self.cfg.GetMasterNode()
 899     return env, [], [mn]
 900
 901   def CheckPrereq(self):
 902     """No prerequisites to check.
 903
 904     """
 905     return True
 906
 907   def Exec(self, feedback_fn):
 908     """Nothing to do.
 909
 910     """
 911     return True
 912
 913
 914 class LUDestroyCluster(LogicalUnit):
 915   """Logical unit for destroying the cluster.
 916
 917   """
 918   HPATH = "cluster-destroy"
 919   HTYPE = constants.HTYPE_CLUSTER
 920   _OP_REQP = []
 921
 922   def BuildHooksEnv(self):
 923     """Build hooks env.
 924
 925     """
 926     env = {"OP_TARGET": self.cfg.GetClusterName()}
 927     return env, [], []
 928
 929   def CheckPrereq(self):
 930     """Check prerequisites.
 931
 932     This checks whether the cluster is empty.
 933
 934     Any errors are signaled by raising errors.OpPrereqError.
 935
 936     """
 937     master = self.cfg.GetMasterNode()
 938
 939     nodelist = self.cfg.GetNodeList()
 940     if len(nodelist) != 1 or nodelist[0] != master:
 941       raise errors.OpPrereqError("There are still %d node(s) in"
 942                                  " this cluster." % (len(nodelist) - 1),
 943                                  errors.ECODE_INVAL)
 944     instancelist = self.cfg.GetInstanceList()
 945     if instancelist:
 946       raise errors.OpPrereqError("There are still %d instance(s) in"
 947                                  " this cluster." % len(instancelist),
 948                                  errors.ECODE_INVAL)
 949
 950   def Exec(self, feedback_fn):
 951     """Destroys the cluster.
 952
 953     """
 954     master = self.cfg.GetMasterNode()
 955     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
 956
 957     # Run post hooks on master node before it's removed
 958     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
 959     try:
 960       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
 961     except:
 962       # pylint: disable-msg=W0702
 963       self.LogWarning("Errors occurred running hooks on %s" % master)
 964
 965     result = self.rpc.call_node_stop_master(master, False)
 966     result.Raise("Could not disable the master role")
 967
 968     if modify_ssh_setup:
 969       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
 970       utils.CreateBackup(priv_key)
 971       utils.CreateBackup(pub_key)
 972
 973     return master
 974
 975
 976 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
 977                             warn_days=constants.SSL_CERT_EXPIRATION_WARN,
 978                             error_days=constants.SSL_CERT_EXPIRATION_ERROR):
 979   """Verifies certificate details for LUVerifyCluster.
 980
 981   """
 982   if expired:
 983     msg = "Certificate %s is expired" % filename
 984
 985     if not_before is not None and not_after is not None:
 986       msg += (" (valid from %s to %s)" %
 987               (_FormatTimestamp(not_before),
 988                _FormatTimestamp(not_after)))
 989     elif not_before is not None:
 990       msg += " (valid from %s)" % _FormatTimestamp(not_before)
 991     elif not_after is not None:
 992       msg += " (valid until %s)" % _FormatTimestamp(not_after)
 993
 994     return (LUVerifyCluster.ETYPE_ERROR, msg)
 995
 996   elif not_before is not None and not_before > now:
 997     return (LUVerifyCluster.ETYPE_WARNING,
 998             "Certificate %s not yet valid (valid from %s)" %
 999             (filename, _FormatTimestamp(not_before)))
1000
1001   elif not_after is not None:
1002     remaining_days = int((not_after - now) / (24 * 3600))
1003
1004     msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1005
1006     if remaining_days <= error_days:
1007       return (LUVerifyCluster.ETYPE_ERROR, msg)
1008
1009     if remaining_days <= warn_days:
1010       return (LUVerifyCluster.ETYPE_WARNING, msg)
1011
1012   return (None, None)
1013
1014
1015 def _VerifyCertificate(filename):
1016   """Verifies a certificate for LUVerifyCluster.
1017
1018   @type filename: string
1019   @param filename: Path to PEM file
1020
1021   """
1022   try:
1023     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1024                                            utils.ReadFile(filename))
1025   except Exception, err: # pylint: disable-msg=W0703
1026     return (LUVerifyCluster.ETYPE_ERROR,
1027             "Failed to load X509 certificate %s: %s" % (filename, err))
1028
1029   # Depending on the pyOpenSSL version, this can just return (None, None)
1030   (not_before, not_after) = utils.GetX509CertValidity(cert)
1031
1032   return _VerifyCertificateInner(filename, cert.has_expired(),
1033                                  not_before, not_after, time.time())
1034
1035
1036 class LUVerifyCluster(LogicalUnit):
1037   """Verifies the cluster status.
1038
1039   """
1040   HPATH = "cluster-verify"
1041   HTYPE = constants.HTYPE_CLUSTER
1042   _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1043   REQ_BGL = False
1044
1045   TCLUSTER = "cluster"
1046   TNODE = "node"
1047   TINSTANCE = "instance"
1048
1049   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1050   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1051   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1052   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1053   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1054   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1055   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1056   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1057   ENODEDRBD = (TNODE, "ENODEDRBD")
1058   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1059   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1060   ENODEHV = (TNODE, "ENODEHV")
1061   ENODELVM = (TNODE, "ENODELVM")
1062   ENODEN1 = (TNODE, "ENODEN1")
1063   ENODENET = (TNODE, "ENODENET")
1064   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1065   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1066   ENODERPC = (TNODE, "ENODERPC")
1067   ENODESSH = (TNODE, "ENODESSH")
1068   ENODEVERSION = (TNODE, "ENODEVERSION")
1069   ENODESETUP = (TNODE, "ENODESETUP")
1070   ENODETIME = (TNODE, "ENODETIME")
1071
1072   ETYPE_FIELD = "code"
1073   ETYPE_ERROR = "ERROR"
1074   ETYPE_WARNING = "WARNING"
1075
1076   def ExpandNames(self):
1077     self.needed_locks = {
1078       locking.LEVEL_NODE: locking.ALL_SET,
1079       locking.LEVEL_INSTANCE: locking.ALL_SET,
1080     }
1081     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1082
1083   def _Error(self, ecode, item, msg, *args, **kwargs):
1084     """Format an error message.
1085
1086     Based on the opcode's error_codes parameter, either format a
1087     parseable error code, or a simpler error string.
1088
1089     This must be called only from Exec and functions called from Exec.
1090
1091     """
1092     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1093     itype, etxt = ecode
1094     # first complete the msg
1095     if args:
1096       msg = msg % args
1097     # then format the whole message
1098     if self.op.error_codes:
1099       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1100     else:
1101       if item:
1102         item = " " + item
1103       else:
1104         item = ""
1105       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1106     # and finally report it via the feedback_fn
1107     self._feedback_fn("  - %s" % msg)
1108
1109   def _ErrorIf(self, cond, *args, **kwargs):
1110     """Log an error message if the passed condition is True.
1111
1112     """
1113     cond = bool(cond) or self.op.debug_simulate_errors
1114     if cond:
1115       self._Error(*args, **kwargs)
1116     # do not mark the operation as failed for WARN cases only
1117     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1118       self.bad = self.bad or cond
1119
1120   def _VerifyNode(self, nodeinfo, file_list, local_cksum,
1121                   node_result, master_files, drbd_map, vg_name):
1122     """Run multiple tests against a node.
1123
1124     Test list:
1125
1126       - compares ganeti version
1127       - checks vg existence and size > 20G
1128       - checks config file checksum
1129       - checks ssh to other nodes
1130
1131     @type nodeinfo: L{objects.Node}
1132     @param nodeinfo: the node to check
1133     @param file_list: required list of files
1134     @param local_cksum: dictionary of local files and their checksums
1135     @param node_result: the results from the node
1136     @param master_files: list of files that only masters should have
1137     @param drbd_map: the useddrbd minors for this node, in
1138         form of minor: (instance, must_exist) which correspond to instances
1139         and their running status
1140     @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1141
1142     """
1143     node = nodeinfo.name
1144     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1145
1146     # main result, node_result should be a non-empty dict
1147     test = not node_result or not isinstance(node_result, dict)
1148     _ErrorIf(test, self.ENODERPC, node,
1149                   "unable to verify node: no data returned")
1150     if test:
1151       return
1152
1153     # compares ganeti version
1154     local_version = constants.PROTOCOL_VERSION
1155     remote_version = node_result.get('version', None)
1156     test = not (remote_version and
1157                 isinstance(remote_version, (list, tuple)) and
1158                 len(remote_version) == 2)
1159     _ErrorIf(test, self.ENODERPC, node,
1160              "connection to node returned invalid data")
1161     if test:
1162       return
1163
1164     test = local_version != remote_version[0]
1165     _ErrorIf(test, self.ENODEVERSION, node,
1166              "incompatible protocol versions: master %s,"
1167              " node %s", local_version, remote_version[0])
1168     if test:
1169       return
1170
1171     # node seems compatible, we can actually try to look into its results
1172
1173     # full package version
1174     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1175                   self.ENODEVERSION, node,
1176                   "software version mismatch: master %s, node %s",
1177                   constants.RELEASE_VERSION, remote_version[1],
1178                   code=self.ETYPE_WARNING)
1179
1180     # checks vg existence and size > 20G
1181     if vg_name is not None:
1182       vglist = node_result.get(constants.NV_VGLIST, None)
1183       test = not vglist
1184       _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1185       if not test:
1186         vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1187                                               constants.MIN_VG_SIZE)
1188         _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1189
1190     # checks config file checksum
1191
1192     remote_cksum = node_result.get(constants.NV_FILELIST, None)
1193     test = not isinstance(remote_cksum, dict)
1194     _ErrorIf(test, self.ENODEFILECHECK, node,
1195              "node hasn't returned file checksum data")
1196     if not test:
1197       for file_name in file_list:
1198         node_is_mc = nodeinfo.master_candidate
1199         must_have = (file_name not in master_files) or node_is_mc
1200         # missing
1201         test1 = file_name not in remote_cksum
1202         # invalid checksum
1203         test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1204         # existing and good
1205         test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1206         _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1207                  "file '%s' missing", file_name)
1208         _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1209                  "file '%s' has wrong checksum", file_name)
1210         # not candidate and this is not a must-have file
1211         _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1212                  "file '%s' should not exist on non master"
1213                  " candidates (and the file is outdated)", file_name)
1214         # all good, except non-master/non-must have combination
1215         _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1216                  "file '%s' should not exist"
1217                  " on non master candidates", file_name)
1218
1219     # checks ssh to any
1220
1221     test = constants.NV_NODELIST not in node_result
1222     _ErrorIf(test, self.ENODESSH, node,
1223              "node hasn't returned node ssh connectivity data")
1224     if not test:
1225       if node_result[constants.NV_NODELIST]:
1226         for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1227           _ErrorIf(True, self.ENODESSH, node,
1228                    "ssh communication with node '%s': %s", a_node, a_msg)
1229
1230     test = constants.NV_NODENETTEST not in node_result
1231     _ErrorIf(test, self.ENODENET, node,
1232              "node hasn't returned node tcp connectivity data")
1233     if not test:
1234       if node_result[constants.NV_NODENETTEST]:
1235         nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1236         for anode in nlist:
1237           _ErrorIf(True, self.ENODENET, node,
1238                    "tcp communication with node '%s': %s",
1239                    anode, node_result[constants.NV_NODENETTEST][anode])
1240
1241     hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1242     if isinstance(hyp_result, dict):
1243       for hv_name, hv_result in hyp_result.iteritems():
1244         test = hv_result is not None
1245         _ErrorIf(test, self.ENODEHV, node,
1246                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1247
1248     # check used drbd list
1249     if vg_name is not None:
1250       used_minors = node_result.get(constants.NV_DRBDLIST, [])
1251       test = not isinstance(used_minors, (tuple, list))
1252       _ErrorIf(test, self.ENODEDRBD, node,
1253                "cannot parse drbd status file: %s", str(used_minors))
1254       if not test:
1255         for minor, (iname, must_exist) in drbd_map.items():
1256           test = minor not in used_minors and must_exist
1257           _ErrorIf(test, self.ENODEDRBD, node,
1258                    "drbd minor %d of instance %s is not active",
1259                    minor, iname)
1260         for minor in used_minors:
1261           test = minor not in drbd_map
1262           _ErrorIf(test, self.ENODEDRBD, node,
1263                    "unallocated drbd minor %d is in use", minor)
1264     test = node_result.get(constants.NV_NODESETUP,
1265                            ["Missing NODESETUP results"])
1266     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1267              "; ".join(test))
1268
1269     # check pv names
1270     if vg_name is not None:
1271       pvlist = node_result.get(constants.NV_PVLIST, None)
1272       test = pvlist is None
1273       _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1274       if not test:
1275         # check that ':' is not present in PV names, since it's a
1276         # special character for lvcreate (denotes the range of PEs to
1277         # use on the PV)
1278         for _, pvname, owner_vg in pvlist:
1279           test = ":" in pvname
1280           _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1281                    " '%s' of VG '%s'", pvname, owner_vg)
1282
1283   def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1284                       node_instance, n_offline):
1285     """Verify an instance.
1286
1287     This function checks to see if the required block devices are
1288     available on the instance's node.
1289
1290     """
1291     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1292     node_current = instanceconfig.primary_node
1293
1294     node_vol_should = {}
1295     instanceconfig.MapLVsByNode(node_vol_should)
1296
1297     for node in node_vol_should:
1298       if node in n_offline:
1299         # ignore missing volumes on offline nodes
1300         continue
1301       for volume in node_vol_should[node]:
1302         test = node not in node_vol_is or volume not in node_vol_is[node]
1303         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1304                  "volume %s missing on node %s", volume, node)
1305
1306     if instanceconfig.admin_up:
1307       test = ((node_current not in node_instance or
1308                not instance in node_instance[node_current]) and
1309               node_current not in n_offline)
1310       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1311                "instance not running on its primary node %s",
1312                node_current)
1313
1314     for node in node_instance:
1315       if (not node == node_current):
1316         test = instance in node_instance[node]
1317         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1318                  "instance should not run on node %s", node)
1319
1320   def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1321     """Verify if there are any unknown volumes in the cluster.
1322
1323     The .os, .swap and backup volumes are ignored. All other volumes are
1324     reported as unknown.
1325
1326     """
1327     for node in node_vol_is:
1328       for volume in node_vol_is[node]:
1329         test = (node not in node_vol_should or
1330                 volume not in node_vol_should[node])
1331         self._ErrorIf(test, self.ENODEORPHANLV, node,
1332                       "volume %s is unknown", volume)
1333
1334   def _VerifyOrphanInstances(self, instancelist, node_instance):
1335     """Verify the list of running instances.
1336
1337     This checks what instances are running but unknown to the cluster.
1338
1339     """
1340     for node in node_instance:
1341       for o_inst in node_instance[node]:
1342         test = o_inst not in instancelist
1343         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1344                       "instance %s on node %s should not exist", o_inst, node)
1345
1346   def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1347     """Verify N+1 Memory Resilience.
1348
1349     Check that if one single node dies we can still start all the instances it
1350     was primary for.
1351
1352     """
1353     for node, nodeinfo in node_info.iteritems():
1354       # This code checks that every node which is now listed as secondary has
1355       # enough memory to host all instances it is supposed to should a single
1356       # other node in the cluster fail.
1357       # FIXME: not ready for failover to an arbitrary node
1358       # FIXME: does not support file-backed instances
1359       # WARNING: we currently take into account down instances as well as up
1360       # ones, considering that even if they're down someone might want to start
1361       # them even in the event of a node failure.
1362       for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1363         needed_mem = 0
1364         for instance in instances:
1365           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1366           if bep[constants.BE_AUTO_BALANCE]:
1367             needed_mem += bep[constants.BE_MEMORY]
1368         test = nodeinfo['mfree'] < needed_mem
1369         self._ErrorIf(test, self.ENODEN1, node,
1370                       "not enough memory on to accommodate"
1371                       " failovers should peer node %s fail", prinode)
1372
1373   def CheckPrereq(self):
1374     """Check prerequisites.
1375
1376     Transform the list of checks we're going to skip into a set and check that
1377     all its members are valid.
1378
1379     """
1380     self.skip_set = frozenset(self.op.skip_checks)
1381     if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1382       raise errors.OpPrereqError("Invalid checks to be skipped specified",
1383                                  errors.ECODE_INVAL)
1384
1385   def BuildHooksEnv(self):
1386     """Build hooks env.
1387
1388     Cluster-Verify hooks just ran in the post phase and their failure makes
1389     the output be logged in the verify output and the verification to fail.
1390
1391     """
1392     all_nodes = self.cfg.GetNodeList()
1393     env = {
1394       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1395       }
1396     for node in self.cfg.GetAllNodesInfo().values():
1397       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1398
1399     return env, [], all_nodes
1400
1401   def Exec(self, feedback_fn):
1402     """Verify integrity of cluster, performing various test on nodes.
1403
1404     """
1405     self.bad = False
1406     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1407     verbose = self.op.verbose
1408     self._feedback_fn = feedback_fn
1409     feedback_fn("* Verifying global settings")
1410     for msg in self.cfg.VerifyConfig():
1411       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1412
1413     # Check the cluster certificates
1414     for cert_filename in constants.ALL_CERT_FILES:
1415       (errcode, msg) = _VerifyCertificate(cert_filename)
1416       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1417
1418     vg_name = self.cfg.GetVGName()
1419     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1420     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1421     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1422     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1423     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1424                         for iname in instancelist)
1425     i_non_redundant = [] # Non redundant instances
1426     i_non_a_balanced = [] # Non auto-balanced instances
1427     n_offline = [] # List of offline nodes
1428     n_drained = [] # List of nodes being drained
1429     node_volume = {}
1430     node_instance = {}
1431     node_info = {}
1432     instance_cfg = {}
1433
1434     # FIXME: verify OS list
1435     # do local checksums
1436     master_files = [constants.CLUSTER_CONF_FILE]
1437
1438     file_names = ssconf.SimpleStore().GetFileList()
1439     file_names.extend(constants.ALL_CERT_FILES)
1440     file_names.extend(master_files)
1441
1442     local_checksums = utils.FingerprintFiles(file_names)
1443
1444     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1445     node_verify_param = {
1446       constants.NV_FILELIST: file_names,
1447       constants.NV_NODELIST: [node.name for node in nodeinfo
1448                               if not node.offline],
1449       constants.NV_HYPERVISOR: hypervisors,
1450       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1451                                   node.secondary_ip) for node in nodeinfo
1452                                  if not node.offline],
1453       constants.NV_INSTANCELIST: hypervisors,
1454       constants.NV_VERSION: None,
1455       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1456       constants.NV_NODESETUP: None,
1457       constants.NV_TIME: None,
1458       }
1459
1460     if vg_name is not None:
1461       node_verify_param[constants.NV_VGLIST] = None
1462       node_verify_param[constants.NV_LVLIST] = vg_name
1463       node_verify_param[constants.NV_PVLIST] = [vg_name]
1464       node_verify_param[constants.NV_DRBDLIST] = None
1465
1466     # Due to the way our RPC system works, exact response times cannot be
1467     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1468     # time before and after executing the request, we can at least have a time
1469     # window.
1470     nvinfo_starttime = time.time()
1471     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1472                                            self.cfg.GetClusterName())
1473     nvinfo_endtime = time.time()
1474
1475     cluster = self.cfg.GetClusterInfo()
1476     master_node = self.cfg.GetMasterNode()
1477     all_drbd_map = self.cfg.ComputeDRBDMap()
1478
1479     feedback_fn("* Verifying node status")
1480     for node_i in nodeinfo:
1481       node = node_i.name
1482
1483       if node_i.offline:
1484         if verbose:
1485           feedback_fn("* Skipping offline node %s" % (node,))
1486         n_offline.append(node)
1487         continue
1488
1489       if node == master_node:
1490         ntype = "master"
1491       elif node_i.master_candidate:
1492         ntype = "master candidate"
1493       elif node_i.drained:
1494         ntype = "drained"
1495         n_drained.append(node)
1496       else:
1497         ntype = "regular"
1498       if verbose:
1499         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1500
1501       msg = all_nvinfo[node].fail_msg
1502       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1503       if msg:
1504         continue
1505
1506       nresult = all_nvinfo[node].payload
1507       node_drbd = {}
1508       for minor, instance in all_drbd_map[node].items():
1509         test = instance not in instanceinfo
1510         _ErrorIf(test, self.ECLUSTERCFG, None,
1511                  "ghost instance '%s' in temporary DRBD map", instance)
1512           # ghost instance should not be running, but otherwise we
1513           # don't give double warnings (both ghost instance and
1514           # unallocated minor in use)
1515         if test:
1516           node_drbd[minor] = (instance, False)
1517         else:
1518           instance = instanceinfo[instance]
1519           node_drbd[minor] = (instance.name, instance.admin_up)
1520
1521       self._VerifyNode(node_i, file_names, local_checksums,
1522                        nresult, master_files, node_drbd, vg_name)
1523
1524       lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1525       if vg_name is None:
1526         node_volume[node] = {}
1527       elif isinstance(lvdata, basestring):
1528         _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1529                  utils.SafeEncode(lvdata))
1530         node_volume[node] = {}
1531       elif not isinstance(lvdata, dict):
1532         _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1533         continue
1534       else:
1535         node_volume[node] = lvdata
1536
1537       # node_instance
1538       idata = nresult.get(constants.NV_INSTANCELIST, None)
1539       test = not isinstance(idata, list)
1540       _ErrorIf(test, self.ENODEHV, node,
1541                "rpc call to node failed (instancelist): %s",
1542                utils.SafeEncode(str(idata)))
1543       if test:
1544         continue
1545
1546       node_instance[node] = idata
1547
1548       # node_info
1549       nodeinfo = nresult.get(constants.NV_HVINFO, None)
1550       test = not isinstance(nodeinfo, dict)
1551       _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1552       if test:
1553         continue
1554
1555       # Node time
1556       ntime = nresult.get(constants.NV_TIME, None)
1557       try:
1558         ntime_merged = utils.MergeTime(ntime)
1559       except (ValueError, TypeError):
1560         _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1561
1562       if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1563         ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1564       elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1565         ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1566       else:
1567         ntime_diff = None
1568
1569       _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1570                "Node time diverges by at least %s from master node time",
1571                ntime_diff)
1572
1573       if ntime_diff is not None:
1574         continue
1575
1576       try:
1577         node_info[node] = {
1578           "mfree": int(nodeinfo['memory_free']),
1579           "pinst": [],
1580           "sinst": [],
1581           # dictionary holding all instances this node is secondary for,
1582           # grouped by their primary node. Each key is a cluster node, and each
1583           # value is a list of instances which have the key as primary and the
1584           # current node as secondary.  this is handy to calculate N+1 memory
1585           # availability if you can only failover from a primary to its
1586           # secondary.
1587           "sinst-by-pnode": {},
1588         }
1589         # FIXME: devise a free space model for file based instances as well
1590         if vg_name is not None:
1591           test = (constants.NV_VGLIST not in nresult or
1592                   vg_name not in nresult[constants.NV_VGLIST])
1593           _ErrorIf(test, self.ENODELVM, node,
1594                    "node didn't return data for the volume group '%s'"
1595                    " - it is either missing or broken", vg_name)
1596           if test:
1597             continue
1598           node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1599       except (ValueError, KeyError):
1600         _ErrorIf(True, self.ENODERPC, node,
1601                  "node returned invalid nodeinfo, check lvm/hypervisor")
1602         continue
1603
1604     node_vol_should = {}
1605
1606     feedback_fn("* Verifying instance status")
1607     for instance in instancelist:
1608       if verbose:
1609         feedback_fn("* Verifying instance %s" % instance)
1610       inst_config = instanceinfo[instance]
1611       self._VerifyInstance(instance, inst_config, node_volume,
1612                            node_instance, n_offline)
1613       inst_nodes_offline = []
1614
1615       inst_config.MapLVsByNode(node_vol_should)
1616
1617       instance_cfg[instance] = inst_config
1618
1619       pnode = inst_config.primary_node
1620       _ErrorIf(pnode not in node_info and pnode not in n_offline,
1621                self.ENODERPC, pnode, "instance %s, connection to"
1622                " primary node failed", instance)
1623       if pnode in node_info:
1624         node_info[pnode]['pinst'].append(instance)
1625
1626       if pnode in n_offline:
1627         inst_nodes_offline.append(pnode)
1628
1629       # If the instance is non-redundant we cannot survive losing its primary
1630       # node, so we are not N+1 compliant. On the other hand we have no disk
1631       # templates with more than one secondary so that situation is not well
1632       # supported either.
1633       # FIXME: does not support file-backed instances
1634       if len(inst_config.secondary_nodes) == 0:
1635         i_non_redundant.append(instance)
1636       _ErrorIf(len(inst_config.secondary_nodes) > 1,
1637                self.EINSTANCELAYOUT, instance,
1638                "instance has multiple secondary nodes", code="WARNING")
1639
1640       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1641         i_non_a_balanced.append(instance)
1642
1643       for snode in inst_config.secondary_nodes:
1644         _ErrorIf(snode not in node_info and snode not in n_offline,
1645                  self.ENODERPC, snode,
1646                  "instance %s, connection to secondary node"
1647                  " failed", instance)
1648
1649         if snode in node_info:
1650           node_info[snode]['sinst'].append(instance)
1651           if pnode not in node_info[snode]['sinst-by-pnode']:
1652             node_info[snode]['sinst-by-pnode'][pnode] = []
1653           node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1654
1655         if snode in n_offline:
1656           inst_nodes_offline.append(snode)
1657
1658       # warn that the instance lives on offline nodes
1659       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1660                "instance lives on offline node(s) %s",
1661                utils.CommaJoin(inst_nodes_offline))
1662
1663     feedback_fn("* Verifying orphan volumes")
1664     self._VerifyOrphanVolumes(node_vol_should, node_volume)
1665
1666     feedback_fn("* Verifying remaining instances")
1667     self._VerifyOrphanInstances(instancelist, node_instance)
1668
1669     if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1670       feedback_fn("* Verifying N+1 Memory redundancy")
1671       self._VerifyNPlusOneMemory(node_info, instance_cfg)
1672
1673     feedback_fn("* Other Notes")
1674     if i_non_redundant:
1675       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1676                   % len(i_non_redundant))
1677
1678     if i_non_a_balanced:
1679       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1680                   % len(i_non_a_balanced))
1681
1682     if n_offline:
1683       feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1684
1685     if n_drained:
1686       feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1687
1688     return not self.bad
1689
1690   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1691     """Analyze the post-hooks' result
1692
1693     This method analyses the hook result, handles it, and sends some
1694     nicely-formatted feedback back to the user.
1695
1696     @param phase: one of L{constants.HOOKS_PHASE_POST} or
1697         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1698     @param hooks_results: the results of the multi-node hooks rpc call
1699     @param feedback_fn: function used send feedback back to the caller
1700     @param lu_result: previous Exec result
1701     @return: the new Exec result, based on the previous result
1702         and hook results
1703
1704     """
1705     # We only really run POST phase hooks, and are only interested in
1706     # their results
1707     if phase == constants.HOOKS_PHASE_POST:
1708       # Used to change hooks' output to proper indentation
1709       indent_re = re.compile('^', re.M)
1710       feedback_fn("* Hooks Results")
1711       assert hooks_results, "invalid result from hooks"
1712
1713       for node_name in hooks_results:
1714         res = hooks_results[node_name]
1715         msg = res.fail_msg
1716         test = msg and not res.offline
1717         self._ErrorIf(test, self.ENODEHOOKS, node_name,
1718                       "Communication failure in hooks execution: %s", msg)
1719         if res.offline or msg:
1720           # No need to investigate payload if node is offline or gave an error.
1721           # override manually lu_result here as _ErrorIf only
1722           # overrides self.bad
1723           lu_result = 1
1724           continue
1725         for script, hkr, output in res.payload:
1726           test = hkr == constants.HKR_FAIL
1727           self._ErrorIf(test, self.ENODEHOOKS, node_name,
1728                         "Script %s failed, output:", script)
1729           if test:
1730             output = indent_re.sub('      ', output)
1731             feedback_fn("%s" % output)
1732             lu_result = 0
1733
1734       return lu_result
1735
1736
1737 class LUVerifyDisks(NoHooksLU):
1738   """Verifies the cluster disks status.
1739
1740   """
1741   _OP_REQP = []
1742   REQ_BGL = False
1743
1744   def ExpandNames(self):
1745     self.needed_locks = {
1746       locking.LEVEL_NODE: locking.ALL_SET,
1747       locking.LEVEL_INSTANCE: locking.ALL_SET,
1748     }
1749     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1750
1751   def CheckPrereq(self):
1752     """Check prerequisites.
1753
1754     This has no prerequisites.
1755
1756     """
1757     pass
1758
1759   def Exec(self, feedback_fn):
1760     """Verify integrity of cluster disks.
1761
1762     @rtype: tuple of three items
1763     @return: a tuple of (dict of node-to-node_error, list of instances
1764         which need activate-disks, dict of instance: (node, volume) for
1765         missing volumes
1766
1767     """
1768     result = res_nodes, res_instances, res_missing = {}, [], {}
1769
1770     vg_name = self.cfg.GetVGName()
1771     nodes = utils.NiceSort(self.cfg.GetNodeList())
1772     instances = [self.cfg.GetInstanceInfo(name)
1773                  for name in self.cfg.GetInstanceList()]
1774
1775     nv_dict = {}
1776     for inst in instances:
1777       inst_lvs = {}
1778       if (not inst.admin_up or
1779           inst.disk_template not in constants.DTS_NET_MIRROR):
1780         continue
1781       inst.MapLVsByNode(inst_lvs)
1782       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1783       for node, vol_list in inst_lvs.iteritems():
1784         for vol in vol_list:
1785           nv_dict[(node, vol)] = inst
1786
1787     if not nv_dict:
1788       return result
1789
1790     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1791
1792     for node in nodes:
1793       # node_volume
1794       node_res = node_lvs[node]
1795       if node_res.offline:
1796         continue
1797       msg = node_res.fail_msg
1798       if msg:
1799         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1800         res_nodes[node] = msg
1801         continue
1802
1803       lvs = node_res.payload
1804       for lv_name, (_, _, lv_online) in lvs.items():
1805         inst = nv_dict.pop((node, lv_name), None)
1806         if (not lv_online and inst is not None
1807             and inst.name not in res_instances):
1808           res_instances.append(inst.name)
1809
1810     # any leftover items in nv_dict are missing LVs, let's arrange the
1811     # data better
1812     for key, inst in nv_dict.iteritems():
1813       if inst.name not in res_missing:
1814         res_missing[inst.name] = []
1815       res_missing[inst.name].append(key)
1816
1817     return result
1818
1819
1820 class LURepairDiskSizes(NoHooksLU):
1821   """Verifies the cluster disks sizes.
1822
1823   """
1824   _OP_REQP = ["instances"]
1825   REQ_BGL = False
1826
1827   def ExpandNames(self):
1828     if not isinstance(self.op.instances, list):
1829       raise errors.OpPrereqError("Invalid argument type 'instances'",
1830                                  errors.ECODE_INVAL)
1831
1832     if self.op.instances:
1833       self.wanted_names = []
1834       for name in self.op.instances:
1835         full_name = _ExpandInstanceName(self.cfg, name)
1836         self.wanted_names.append(full_name)
1837       self.needed_locks = {
1838         locking.LEVEL_NODE: [],
1839         locking.LEVEL_INSTANCE: self.wanted_names,
1840         }
1841       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1842     else:
1843       self.wanted_names = None
1844       self.needed_locks = {
1845         locking.LEVEL_NODE: locking.ALL_SET,
1846         locking.LEVEL_INSTANCE: locking.ALL_SET,
1847         }
1848     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1849
1850   def DeclareLocks(self, level):
1851     if level == locking.LEVEL_NODE and self.wanted_names is not None:
1852       self._LockInstancesNodes(primary_only=True)
1853
1854   def CheckPrereq(self):
1855     """Check prerequisites.
1856
1857     This only checks the optional instance list against the existing names.
1858
1859     """
1860     if self.wanted_names is None:
1861       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1862
1863     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1864                              in self.wanted_names]
1865
1866   def _EnsureChildSizes(self, disk):
1867     """Ensure children of the disk have the needed disk size.
1868
1869     This is valid mainly for DRBD8 and fixes an issue where the
1870     children have smaller disk size.
1871
1872     @param disk: an L{ganeti.objects.Disk} object
1873
1874     """
1875     if disk.dev_type == constants.LD_DRBD8:
1876       assert disk.children, "Empty children for DRBD8?"
1877       fchild = disk.children[0]
1878       mismatch = fchild.size < disk.size
1879       if mismatch:
1880         self.LogInfo("Child disk has size %d, parent %d, fixing",
1881                      fchild.size, disk.size)
1882         fchild.size = disk.size
1883
1884       # and we recurse on this child only, not on the metadev
1885       return self._EnsureChildSizes(fchild) or mismatch
1886     else:
1887       return False
1888
1889   def Exec(self, feedback_fn):
1890     """Verify the size of cluster disks.
1891
1892     """
1893     # TODO: check child disks too
1894     # TODO: check differences in size between primary/secondary nodes
1895     per_node_disks = {}
1896     for instance in self.wanted_instances:
1897       pnode = instance.primary_node
1898       if pnode not in per_node_disks:
1899         per_node_disks[pnode] = []
1900       for idx, disk in enumerate(instance.disks):
1901         per_node_disks[pnode].append((instance, idx, disk))
1902
1903     changed = []
1904     for node, dskl in per_node_disks.items():
1905       newl = [v[2].Copy() for v in dskl]
1906       for dsk in newl:
1907         self.cfg.SetDiskID(dsk, node)
1908       result = self.rpc.call_blockdev_getsizes(node, newl)
1909       if result.fail_msg:
1910         self.LogWarning("Failure in blockdev_getsizes call to node"
1911                         " %s, ignoring", node)
1912         continue
1913       if len(result.data) != len(dskl):
1914         self.LogWarning("Invalid result from node %s, ignoring node results",
1915                         node)
1916         continue
1917       for ((instance, idx, disk), size) in zip(dskl, result.data):
1918         if size is None:
1919           self.LogWarning("Disk %d of instance %s did not return size"
1920                           " information, ignoring", idx, instance.name)
1921           continue
1922         if not isinstance(size, (int, long)):
1923           self.LogWarning("Disk %d of instance %s did not return valid"
1924                           " size information, ignoring", idx, instance.name)
1925           continue
1926         size = size >> 20
1927         if size != disk.size:
1928           self.LogInfo("Disk %d of instance %s has mismatched size,"
1929                        " correcting: recorded %d, actual %d", idx,
1930                        instance.name, disk.size, size)
1931           disk.size = size
1932           self.cfg.Update(instance, feedback_fn)
1933           changed.append((instance.name, idx, size))
1934         if self._EnsureChildSizes(disk):
1935           self.cfg.Update(instance, feedback_fn)
1936           changed.append((instance.name, idx, disk.size))
1937     return changed
1938
1939
1940 class LURenameCluster(LogicalUnit):
1941   """Rename the cluster.
1942
1943   """
1944   HPATH = "cluster-rename"
1945   HTYPE = constants.HTYPE_CLUSTER
1946   _OP_REQP = ["name"]
1947
1948   def BuildHooksEnv(self):
1949     """Build hooks env.
1950
1951     """
1952     env = {
1953       "OP_TARGET": self.cfg.GetClusterName(),
1954       "NEW_NAME": self.op.name,
1955       }
1956     mn = self.cfg.GetMasterNode()
1957     all_nodes = self.cfg.GetNodeList()
1958     return env, [mn], all_nodes
1959
1960   def CheckPrereq(self):
1961     """Verify that the passed name is a valid one.
1962
1963     """
1964     hostname = utils.GetHostInfo(self.op.name)
1965
1966     new_name = hostname.name
1967     self.ip = new_ip = hostname.ip
1968     old_name = self.cfg.GetClusterName()
1969     old_ip = self.cfg.GetMasterIP()
1970     if new_name == old_name and new_ip == old_ip:
1971       raise errors.OpPrereqError("Neither the name nor the IP address of the"
1972                                  " cluster has changed",
1973                                  errors.ECODE_INVAL)
1974     if new_ip != old_ip:
1975       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1976         raise errors.OpPrereqError("The given cluster IP address (%s) is"
1977                                    " reachable on the network. Aborting." %
1978                                    new_ip, errors.ECODE_NOTUNIQUE)
1979
1980     self.op.name = new_name
1981
1982   def Exec(self, feedback_fn):
1983     """Rename the cluster.
1984
1985     """
1986     clustername = self.op.name
1987     ip = self.ip
1988
1989     # shutdown the master IP
1990     master = self.cfg.GetMasterNode()
1991     result = self.rpc.call_node_stop_master(master, False)
1992     result.Raise("Could not disable the master role")
1993
1994     try:
1995       cluster = self.cfg.GetClusterInfo()
1996       cluster.cluster_name = clustername
1997       cluster.master_ip = ip
1998       self.cfg.Update(cluster, feedback_fn)
1999
2000       # update the known hosts file
2001       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2002       node_list = self.cfg.GetNodeList()
2003       try:
2004         node_list.remove(master)
2005       except ValueError:
2006         pass
2007       result = self.rpc.call_upload_file(node_list,
2008                                          constants.SSH_KNOWN_HOSTS_FILE)
2009       for to_node, to_result in result.iteritems():
2010         msg = to_result.fail_msg
2011         if msg:
2012           msg = ("Copy of file %s to node %s failed: %s" %
2013                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2014           self.proc.LogWarning(msg)
2015
2016     finally:
2017       result = self.rpc.call_node_start_master(master, False, False)
2018       msg = result.fail_msg
2019       if msg:
2020         self.LogWarning("Could not re-enable the master role on"
2021                         " the master, please restart manually: %s", msg)
2022
2023
2024 def _RecursiveCheckIfLVMBased(disk):
2025   """Check if the given disk or its children are lvm-based.
2026
2027   @type disk: L{objects.Disk}
2028   @param disk: the disk to check
2029   @rtype: boolean
2030   @return: boolean indicating whether a LD_LV dev_type was found or not
2031
2032   """
2033   if disk.children:
2034     for chdisk in disk.children:
2035       if _RecursiveCheckIfLVMBased(chdisk):
2036         return True
2037   return disk.dev_type == constants.LD_LV
2038
2039
2040 class LUSetClusterParams(LogicalUnit):
2041   """Change the parameters of the cluster.
2042
2043   """
2044   HPATH = "cluster-modify"
2045   HTYPE = constants.HTYPE_CLUSTER
2046   _OP_REQP = []
2047   REQ_BGL = False
2048
2049   def CheckArguments(self):
2050     """Check parameters
2051
2052     """
2053     if not hasattr(self.op, "candidate_pool_size"):
2054       self.op.candidate_pool_size = None
2055     if self.op.candidate_pool_size is not None:
2056       try:
2057         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2058       except (ValueError, TypeError), err:
2059         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2060                                    str(err), errors.ECODE_INVAL)
2061       if self.op.candidate_pool_size < 1:
2062         raise errors.OpPrereqError("At least one master candidate needed",
2063                                    errors.ECODE_INVAL)
2064
2065   def ExpandNames(self):
2066     # FIXME: in the future maybe other cluster params won't require checking on
2067     # all nodes to be modified.
2068     self.needed_locks = {
2069       locking.LEVEL_NODE: locking.ALL_SET,
2070     }
2071     self.share_locks[locking.LEVEL_NODE] = 1
2072
2073   def BuildHooksEnv(self):
2074     """Build hooks env.
2075
2076     """
2077     env = {
2078       "OP_TARGET": self.cfg.GetClusterName(),
2079       "NEW_VG_NAME": self.op.vg_name,
2080       }
2081     mn = self.cfg.GetMasterNode()
2082     return env, [mn], [mn]
2083
2084   def CheckPrereq(self):
2085     """Check prerequisites.
2086
2087     This checks whether the given params don't conflict and
2088     if the given volume group is valid.
2089
2090     """
2091     if self.op.vg_name is not None and not self.op.vg_name:
2092       instances = self.cfg.GetAllInstancesInfo().values()
2093       for inst in instances:
2094         for disk in inst.disks:
2095           if _RecursiveCheckIfLVMBased(disk):
2096             raise errors.OpPrereqError("Cannot disable lvm storage while"
2097                                        " lvm-based instances exist",
2098                                        errors.ECODE_INVAL)
2099
2100     node_list = self.acquired_locks[locking.LEVEL_NODE]
2101
2102     # if vg_name not None, checks given volume group on all nodes
2103     if self.op.vg_name:
2104       vglist = self.rpc.call_vg_list(node_list)
2105       for node in node_list:
2106         msg = vglist[node].fail_msg
2107         if msg:
2108           # ignoring down node
2109           self.LogWarning("Error while gathering data on node %s"
2110                           " (ignoring node): %s", node, msg)
2111           continue
2112         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2113                                               self.op.vg_name,
2114                                               constants.MIN_VG_SIZE)
2115         if vgstatus:
2116           raise errors.OpPrereqError("Error on node '%s': %s" %
2117                                      (node, vgstatus), errors.ECODE_ENVIRON)
2118
2119     self.cluster = cluster = self.cfg.GetClusterInfo()
2120     # validate params changes
2121     if self.op.beparams:
2122       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2123       self.new_beparams = objects.FillDict(
2124         cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2125
2126     if self.op.nicparams:
2127       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2128       self.new_nicparams = objects.FillDict(
2129         cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2130       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2131       nic_errors = []
2132
2133       # check all instances for consistency
2134       for instance in self.cfg.GetAllInstancesInfo().values():
2135         for nic_idx, nic in enumerate(instance.nics):
2136           params_copy = copy.deepcopy(nic.nicparams)
2137           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2138
2139           # check parameter syntax
2140           try:
2141             objects.NIC.CheckParameterSyntax(params_filled)
2142           except errors.ConfigurationError, err:
2143             nic_errors.append("Instance %s, nic/%d: %s" %
2144                               (instance.name, nic_idx, err))
2145
2146           # if we're moving instances to routed, check that they have an ip
2147           target_mode = params_filled[constants.NIC_MODE]
2148           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2149             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2150                               (instance.name, nic_idx))
2151       if nic_errors:
2152         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2153                                    "\n".join(nic_errors))
2154
2155     # hypervisor list/parameters
2156     self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2157     if self.op.hvparams:
2158       if not isinstance(self.op.hvparams, dict):
2159         raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2160                                    errors.ECODE_INVAL)
2161       for hv_name, hv_dict in self.op.hvparams.items():
2162         if hv_name not in self.new_hvparams:
2163           self.new_hvparams[hv_name] = hv_dict
2164         else:
2165           self.new_hvparams[hv_name].update(hv_dict)
2166
2167     # os hypervisor parameters
2168     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2169     if self.op.os_hvp:
2170       if not isinstance(self.op.os_hvp, dict):
2171         raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2172                                    errors.ECODE_INVAL)
2173       for os_name, hvs in self.op.os_hvp.items():
2174         if not isinstance(hvs, dict):
2175           raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2176                                       " input"), errors.ECODE_INVAL)
2177         if os_name not in self.new_os_hvp:
2178           self.new_os_hvp[os_name] = hvs
2179         else:
2180           for hv_name, hv_dict in hvs.items():
2181             if hv_name not in self.new_os_hvp[os_name]:
2182               self.new_os_hvp[os_name][hv_name] = hv_dict
2183             else:
2184               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2185
2186     if self.op.enabled_hypervisors is not None:
2187       self.hv_list = self.op.enabled_hypervisors
2188       if not self.hv_list:
2189         raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2190                                    " least one member",
2191                                    errors.ECODE_INVAL)
2192       invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2193       if invalid_hvs:
2194         raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2195                                    " entries: %s" %
2196                                    utils.CommaJoin(invalid_hvs),
2197                                    errors.ECODE_INVAL)
2198     else:
2199       self.hv_list = cluster.enabled_hypervisors
2200
2201     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2202       # either the enabled list has changed, or the parameters have, validate
2203       for hv_name, hv_params in self.new_hvparams.items():
2204         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2205             (self.op.enabled_hypervisors and
2206              hv_name in self.op.enabled_hypervisors)):
2207           # either this is a new hypervisor, or its parameters have changed
2208           hv_class = hypervisor.GetHypervisor(hv_name)
2209           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2210           hv_class.CheckParameterSyntax(hv_params)
2211           _CheckHVParams(self, node_list, hv_name, hv_params)
2212
2213     if self.op.os_hvp:
2214       # no need to check any newly-enabled hypervisors, since the
2215       # defaults have already been checked in the above code-block
2216       for os_name, os_hvp in self.new_os_hvp.items():
2217         for hv_name, hv_params in os_hvp.items():
2218           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2219           # we need to fill in the new os_hvp on top of the actual hv_p
2220           cluster_defaults = self.new_hvparams.get(hv_name, {})
2221           new_osp = objects.FillDict(cluster_defaults, hv_params)
2222           hv_class = hypervisor.GetHypervisor(hv_name)
2223           hv_class.CheckParameterSyntax(new_osp)
2224           _CheckHVParams(self, node_list, hv_name, new_osp)
2225
2226
2227   def Exec(self, feedback_fn):
2228     """Change the parameters of the cluster.
2229
2230     """
2231     if self.op.vg_name is not None:
2232       new_volume = self.op.vg_name
2233       if not new_volume:
2234         new_volume = None
2235       if new_volume != self.cfg.GetVGName():
2236         self.cfg.SetVGName(new_volume)
2237       else:
2238         feedback_fn("Cluster LVM configuration already in desired"
2239                     " state, not changing")
2240     if self.op.hvparams:
2241       self.cluster.hvparams = self.new_hvparams
2242     if self.op.os_hvp:
2243       self.cluster.os_hvp = self.new_os_hvp
2244     if self.op.enabled_hypervisors is not None:
2245       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2246     if self.op.beparams:
2247       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2248     if self.op.nicparams:
2249       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2250
2251     if self.op.candidate_pool_size is not None:
2252       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2253       # we need to update the pool size here, otherwise the save will fail
2254       _AdjustCandidatePool(self, [])
2255
2256     self.cfg.Update(self.cluster, feedback_fn)
2257
2258
2259 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2260   """Distribute additional files which are part of the cluster configuration.
2261
2262   ConfigWriter takes care of distributing the config and ssconf files, but
2263   there are more files which should be distributed to all nodes. This function
2264   makes sure those are copied.
2265
2266   @param lu: calling logical unit
2267   @param additional_nodes: list of nodes not in the config to distribute to
2268
2269   """
2270   # 1. Gather target nodes
2271   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2272   dist_nodes = lu.cfg.GetOnlineNodeList()
2273   if additional_nodes is not None:
2274     dist_nodes.extend(additional_nodes)
2275   if myself.name in dist_nodes:
2276     dist_nodes.remove(myself.name)
2277
2278   # 2. Gather files to distribute
2279   dist_files = set([constants.ETC_HOSTS,
2280                     constants.SSH_KNOWN_HOSTS_FILE,
2281                     constants.RAPI_CERT_FILE,
2282                     constants.RAPI_USERS_FILE,
2283                     constants.HMAC_CLUSTER_KEY,
2284                    ])
2285
2286   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2287   for hv_name in enabled_hypervisors:
2288     hv_class = hypervisor.GetHypervisor(hv_name)
2289     dist_files.update(hv_class.GetAncillaryFiles())
2290
2291   # 3. Perform the files upload
2292   for fname in dist_files:
2293     if os.path.exists(fname):
2294       result = lu.rpc.call_upload_file(dist_nodes, fname)
2295       for to_node, to_result in result.items():
2296         msg = to_result.fail_msg
2297         if msg:
2298           msg = ("Copy of file %s to node %s failed: %s" %
2299                  (fname, to_node, msg))
2300           lu.proc.LogWarning(msg)
2301
2302
2303 class LURedistributeConfig(NoHooksLU):
2304   """Force the redistribution of cluster configuration.
2305
2306   This is a very simple LU.
2307
2308   """
2309   _OP_REQP = []
2310   REQ_BGL = False
2311
2312   def ExpandNames(self):
2313     self.needed_locks = {
2314       locking.LEVEL_NODE: locking.ALL_SET,
2315     }
2316     self.share_locks[locking.LEVEL_NODE] = 1
2317
2318   def CheckPrereq(self):
2319     """Check prerequisites.
2320
2321     """
2322
2323   def Exec(self, feedback_fn):
2324     """Redistribute the configuration.
2325
2326     """
2327     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2328     _RedistributeAncillaryFiles(self)
2329
2330
2331 def _WaitForSync(lu, instance, oneshot=False):
2332   """Sleep and poll for an instance's disk to sync.
2333
2334   """
2335   if not instance.disks:
2336     return True
2337
2338   if not oneshot:
2339     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2340
2341   node = instance.primary_node
2342
2343   for dev in instance.disks:
2344     lu.cfg.SetDiskID(dev, node)
2345
2346   # TODO: Convert to utils.Retry
2347
2348   retries = 0
2349   degr_retries = 10 # in seconds, as we sleep 1 second each time
2350   while True:
2351     max_time = 0
2352     done = True
2353     cumul_degraded = False
2354     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2355     msg = rstats.fail_msg
2356     if msg:
2357       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2358       retries += 1
2359       if retries >= 10:
2360         raise errors.RemoteError("Can't contact node %s for mirror data,"
2361                                  " aborting." % node)
2362       time.sleep(6)
2363       continue
2364     rstats = rstats.payload
2365     retries = 0
2366     for i, mstat in enumerate(rstats):
2367       if mstat is None:
2368         lu.LogWarning("Can't compute data for node %s/%s",
2369                            node, instance.disks[i].iv_name)
2370         continue
2371
2372       cumul_degraded = (cumul_degraded or
2373                         (mstat.is_degraded and mstat.sync_percent is None))
2374       if mstat.sync_percent is not None:
2375         done = False
2376         if mstat.estimated_time is not None:
2377           rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2378           max_time = mstat.estimated_time
2379         else:
2380           rem_time = "no time estimate"
2381         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2382                         (instance.disks[i].iv_name, mstat.sync_percent,
2383                          rem_time))
2384
2385     # if we're done but degraded, let's do a few small retries, to
2386     # make sure we see a stable and not transient situation; therefore
2387     # we force restart of the loop
2388     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2389       logging.info("Degraded disks found, %d retries left", degr_retries)
2390       degr_retries -= 1
2391       time.sleep(1)
2392       continue
2393
2394     if done or oneshot:
2395       break
2396
2397     time.sleep(min(60, max_time))
2398
2399   if done:
2400     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2401   return not cumul_degraded
2402
2403
2404 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2405   """Check that mirrors are not degraded.
2406
2407   The ldisk parameter, if True, will change the test from the
2408   is_degraded attribute (which represents overall non-ok status for
2409   the device(s)) to the ldisk (representing the local storage status).
2410
2411   """
2412   lu.cfg.SetDiskID(dev, node)
2413
2414   result = True
2415
2416   if on_primary or dev.AssembleOnSecondary():
2417     rstats = lu.rpc.call_blockdev_find(node, dev)
2418     msg = rstats.fail_msg
2419     if msg:
2420       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2421       result = False
2422     elif not rstats.payload:
2423       lu.LogWarning("Can't find disk on node %s", node)
2424       result = False
2425     else:
2426       if ldisk:
2427         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2428       else:
2429         result = result and not rstats.payload.is_degraded
2430
2431   if dev.children:
2432     for child in dev.children:
2433       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2434
2435   return result
2436
2437
2438 class LUDiagnoseOS(NoHooksLU):
2439   """Logical unit for OS diagnose/query.
2440
2441   """
2442   _OP_REQP = ["output_fields", "names"]
2443   REQ_BGL = False
2444   _FIELDS_STATIC = utils.FieldSet()
2445   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2446   # Fields that need calculation of global os validity
2447   _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2448
2449   def ExpandNames(self):
2450     if self.op.names:
2451       raise errors.OpPrereqError("Selective OS query not supported",
2452                                  errors.ECODE_INVAL)
2453
2454     _CheckOutputFields(static=self._FIELDS_STATIC,
2455                        dynamic=self._FIELDS_DYNAMIC,
2456                        selected=self.op.output_fields)
2457
2458     # Lock all nodes, in shared mode
2459     # Temporary removal of locks, should be reverted later
2460     # TODO: reintroduce locks when they are lighter-weight
2461     self.needed_locks = {}
2462     #self.share_locks[locking.LEVEL_NODE] = 1
2463     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2464
2465   def CheckPrereq(self):
2466     """Check prerequisites.
2467
2468     """
2469
2470   @staticmethod
2471   def _DiagnoseByOS(rlist):
2472     """Remaps a per-node return list into an a per-os per-node dictionary
2473
2474     @param rlist: a map with node names as keys and OS objects as values
2475
2476     @rtype: dict
2477     @return: a dictionary with osnames as keys and as value another map, with
2478         nodes as keys and tuples of (path, status, diagnose) as values, eg::
2479
2480           {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2481                                      (/srv/..., False, "invalid api")],
2482                            "node2": [(/srv/..., True, "")]}
2483           }
2484
2485     """
2486     all_os = {}
2487     # we build here the list of nodes that didn't fail the RPC (at RPC
2488     # level), so that nodes with a non-responding node daemon don't
2489     # make all OSes invalid
2490     good_nodes = [node_name for node_name in rlist
2491                   if not rlist[node_name].fail_msg]
2492     for node_name, nr in rlist.items():
2493       if nr.fail_msg or not nr.payload:
2494         continue
2495       for name, path, status, diagnose, variants in nr.payload:
2496         if name not in all_os:
2497           # build a list of nodes for this os containing empty lists
2498           # for each node in node_list
2499           all_os[name] = {}
2500           for nname in good_nodes:
2501             all_os[name][nname] = []
2502         all_os[name][node_name].append((path, status, diagnose, variants))
2503     return all_os
2504
2505   def Exec(self, feedback_fn):
2506     """Compute the list of OSes.
2507
2508     """
2509     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2510     node_data = self.rpc.call_os_diagnose(valid_nodes)
2511     pol = self._DiagnoseByOS(node_data)
2512     output = []
2513     calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2514     calc_variants = "variants" in self.op.output_fields
2515
2516     for os_name, os_data in pol.items():
2517       row = []
2518       if calc_valid:
2519         valid = True
2520         variants = None
2521         for osl in os_data.values():
2522           valid = valid and osl and osl[0][1]
2523           if not valid:
2524             variants = None
2525             break
2526           if calc_variants:
2527             node_variants = osl[0][3]
2528             if variants is None:
2529               variants = node_variants
2530             else:
2531               variants = [v for v in variants if v in node_variants]
2532
2533       for field in self.op.output_fields:
2534         if field == "name":
2535           val = os_name
2536         elif field == "valid":
2537           val = valid
2538         elif field == "node_status":
2539           # this is just a copy of the dict
2540           val = {}
2541           for node_name, nos_list in os_data.items():
2542             val[node_name] = nos_list
2543         elif field == "variants":
2544           val =  variants
2545         else:
2546           raise errors.ParameterError(field)
2547         row.append(val)
2548       output.append(row)
2549
2550     return output
2551
2552
2553 class LURemoveNode(LogicalUnit):
2554   """Logical unit for removing a node.
2555
2556   """
2557   HPATH = "node-remove"
2558   HTYPE = constants.HTYPE_NODE
2559   _OP_REQP = ["node_name"]
2560
2561   def BuildHooksEnv(self):
2562     """Build hooks env.
2563
2564     This doesn't run on the target node in the pre phase as a failed
2565     node would then be impossible to remove.
2566
2567     """
2568     env = {
2569       "OP_TARGET": self.op.node_name,
2570       "NODE_NAME": self.op.node_name,
2571       }
2572     all_nodes = self.cfg.GetNodeList()
2573     try:
2574       all_nodes.remove(self.op.node_name)
2575     except ValueError:
2576       logging.warning("Node %s which is about to be removed not found"
2577                       " in the all nodes list", self.op.node_name)
2578     return env, all_nodes, all_nodes
2579
2580   def CheckPrereq(self):
2581     """Check prerequisites.
2582
2583     This checks:
2584      - the node exists in the configuration
2585      - it does not have primary or secondary instances
2586      - it's not the master
2587
2588     Any errors are signaled by raising errors.OpPrereqError.
2589
2590     """
2591     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2592     node = self.cfg.GetNodeInfo(self.op.node_name)
2593     assert node is not None
2594
2595     instance_list = self.cfg.GetInstanceList()
2596
2597     masternode = self.cfg.GetMasterNode()
2598     if node.name == masternode:
2599       raise errors.OpPrereqError("Node is the master node,"
2600                                  " you need to failover first.",
2601                                  errors.ECODE_INVAL)
2602
2603     for instance_name in instance_list:
2604       instance = self.cfg.GetInstanceInfo(instance_name)
2605       if node.name in instance.all_nodes:
2606         raise errors.OpPrereqError("Instance %s is still running on the node,"
2607                                    " please remove first." % instance_name,
2608                                    errors.ECODE_INVAL)
2609     self.op.node_name = node.name
2610     self.node = node
2611
2612   def Exec(self, feedback_fn):
2613     """Removes the node from the cluster.
2614
2615     """
2616     node = self.node
2617     logging.info("Stopping the node daemon and removing configs from node %s",
2618                  node.name)
2619
2620     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2621
2622     # Promote nodes to master candidate as needed
2623     _AdjustCandidatePool(self, exceptions=[node.name])
2624     self.context.RemoveNode(node.name)
2625
2626     # Run post hooks on the node before it's removed
2627     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2628     try:
2629       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2630     except:
2631       # pylint: disable-msg=W0702
2632       self.LogWarning("Errors occurred running hooks on %s" % node.name)
2633
2634     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2635     msg = result.fail_msg
2636     if msg:
2637       self.LogWarning("Errors encountered on the remote node while leaving"
2638                       " the cluster: %s", msg)
2639
2640
2641 class LUQueryNodes(NoHooksLU):
2642   """Logical unit for querying nodes.
2643
2644   """
2645   # pylint: disable-msg=W0142
2646   _OP_REQP = ["output_fields", "names", "use_locking"]
2647   REQ_BGL = False
2648
2649   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2650                     "master_candidate", "offline", "drained"]
2651
2652   _FIELDS_DYNAMIC = utils.FieldSet(
2653     "dtotal", "dfree",
2654     "mtotal", "mnode", "mfree",
2655     "bootid",
2656     "ctotal", "cnodes", "csockets",
2657     )
2658
2659   _FIELDS_STATIC = utils.FieldSet(*[
2660     "pinst_cnt", "sinst_cnt",
2661     "pinst_list", "sinst_list",
2662     "pip", "sip", "tags",
2663     "master",
2664     "role"] + _SIMPLE_FIELDS
2665     )
2666
2667   def ExpandNames(self):
2668     _CheckOutputFields(static=self._FIELDS_STATIC,
2669                        dynamic=self._FIELDS_DYNAMIC,
2670                        selected=self.op.output_fields)
2671
2672     self.needed_locks = {}
2673     self.share_locks[locking.LEVEL_NODE] = 1
2674
2675     if self.op.names:
2676       self.wanted = _GetWantedNodes(self, self.op.names)
2677     else:
2678       self.wanted = locking.ALL_SET
2679
2680     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2681     self.do_locking = self.do_node_query and self.op.use_locking
2682     if self.do_locking:
2683       # if we don't request only static fields, we need to lock the nodes
2684       self.needed_locks[locking.LEVEL_NODE] = self.wanted
2685
2686   def CheckPrereq(self):
2687     """Check prerequisites.
2688
2689     """
2690     # The validation of the node list is done in the _GetWantedNodes,
2691     # if non empty, and if empty, there's no validation to do
2692     pass
2693
2694   def Exec(self, feedback_fn):
2695     """Computes the list of nodes and their attributes.
2696
2697     """
2698     all_info = self.cfg.GetAllNodesInfo()
2699     if self.do_locking:
2700       nodenames = self.acquired_locks[locking.LEVEL_NODE]
2701     elif self.wanted != locking.ALL_SET:
2702       nodenames = self.wanted
2703       missing = set(nodenames).difference(all_info.keys())
2704       if missing:
2705         raise errors.OpExecError(
2706           "Some nodes were removed before retrieving their data: %s" % missing)
2707     else:
2708       nodenames = all_info.keys()
2709
2710     nodenames = utils.NiceSort(nodenames)
2711     nodelist = [all_info[name] for name in nodenames]
2712
2713     # begin data gathering
2714
2715     if self.do_node_query:
2716       live_data = {}
2717       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2718                                           self.cfg.GetHypervisorType())
2719       for name in nodenames:
2720         nodeinfo = node_data[name]
2721         if not nodeinfo.fail_msg and nodeinfo.payload:
2722           nodeinfo = nodeinfo.payload
2723           fn = utils.TryConvert
2724           live_data[name] = {
2725             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2726             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2727             "mfree": fn(int, nodeinfo.get('memory_free', None)),
2728             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2729             "dfree": fn(int, nodeinfo.get('vg_free', None)),
2730             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2731             "bootid": nodeinfo.get('bootid', None),
2732             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2733             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2734             }
2735         else:
2736           live_data[name] = {}
2737     else:
2738       live_data = dict.fromkeys(nodenames, {})
2739
2740     node_to_primary = dict([(name, set()) for name in nodenames])
2741     node_to_secondary = dict([(name, set()) for name in nodenames])
2742
2743     inst_fields = frozenset(("pinst_cnt", "pinst_list",
2744                              "sinst_cnt", "sinst_list"))
2745     if inst_fields & frozenset(self.op.output_fields):
2746       inst_data = self.cfg.GetAllInstancesInfo()
2747
2748       for inst in inst_data.values():
2749         if inst.primary_node in node_to_primary:
2750           node_to_primary[inst.primary_node].add(inst.name)
2751         for secnode in inst.secondary_nodes:
2752           if secnode in node_to_secondary:
2753             node_to_secondary[secnode].add(inst.name)
2754
2755     master_node = self.cfg.GetMasterNode()
2756
2757     # end data gathering
2758
2759     output = []
2760     for node in nodelist:
2761       node_output = []
2762       for field in self.op.output_fields:
2763         if field in self._SIMPLE_FIELDS:
2764           val = getattr(node, field)
2765         elif field == "pinst_list":
2766           val = list(node_to_primary[node.name])
2767         elif field == "sinst_list":
2768           val = list(node_to_secondary[node.name])
2769         elif field == "pinst_cnt":
2770           val = len(node_to_primary[node.name])
2771         elif field == "sinst_cnt":
2772           val = len(node_to_secondary[node.name])
2773         elif field == "pip":
2774           val = node.primary_ip
2775         elif field == "sip":
2776           val = node.secondary_ip
2777         elif field == "tags":
2778           val = list(node.GetTags())
2779         elif field == "master":
2780           val = node.name == master_node
2781         elif self._FIELDS_DYNAMIC.Matches(field):
2782           val = live_data[node.name].get(field, None)
2783         elif field == "role":
2784           if node.name == master_node:
2785             val = "M"
2786           elif node.master_candidate:
2787             val = "C"
2788           elif node.drained:
2789             val = "D"
2790           elif node.offline:
2791             val = "O"
2792           else:
2793             val = "R"
2794         else:
2795           raise errors.ParameterError(field)
2796         node_output.append(val)
2797       output.append(node_output)
2798
2799     return output
2800
2801
2802 class LUQueryNodeVolumes(NoHooksLU):
2803   """Logical unit for getting volumes on node(s).
2804
2805   """
2806   _OP_REQP = ["nodes", "output_fields"]
2807   REQ_BGL = False
2808   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2809   _FIELDS_STATIC = utils.FieldSet("node")
2810
2811   def ExpandNames(self):
2812     _CheckOutputFields(static=self._FIELDS_STATIC,
2813                        dynamic=self._FIELDS_DYNAMIC,
2814                        selected=self.op.output_fields)
2815
2816     self.needed_locks = {}
2817     self.share_locks[locking.LEVEL_NODE] = 1
2818     if not self.op.nodes:
2819       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2820     else:
2821       self.needed_locks[locking.LEVEL_NODE] = \
2822         _GetWantedNodes(self, self.op.nodes)
2823
2824   def CheckPrereq(self):
2825     """Check prerequisites.
2826
2827     This checks that the fields required are valid output fields.
2828
2829     """
2830     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2831
2832   def Exec(self, feedback_fn):
2833     """Computes the list of nodes and their attributes.
2834
2835     """
2836     nodenames = self.nodes
2837     volumes = self.rpc.call_node_volumes(nodenames)
2838
2839     ilist = [self.cfg.GetInstanceInfo(iname) for iname
2840              in self.cfg.GetInstanceList()]
2841
2842     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2843
2844     output = []
2845     for node in nodenames:
2846       nresult = volumes[node]
2847       if nresult.offline:
2848         continue
2849       msg = nresult.fail_msg
2850       if msg:
2851         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2852         continue
2853
2854       node_vols = nresult.payload[:]
2855       node_vols.sort(key=lambda vol: vol['dev'])
2856
2857       for vol in node_vols:
2858         node_output = []
2859         for field in self.op.output_fields:
2860           if field == "node":
2861             val = node
2862           elif field == "phys":
2863             val = vol['dev']
2864           elif field == "vg":
2865             val = vol['vg']
2866           elif field == "name":
2867             val = vol['name']
2868           elif field == "size":
2869             val = int(float(vol['size']))
2870           elif field == "instance":
2871             for inst in ilist:
2872               if node not in lv_by_node[inst]:
2873                 continue
2874               if vol['name'] in lv_by_node[inst][node]:
2875                 val = inst.name
2876                 break
2877             else:
2878               val = '-'
2879           else:
2880             raise errors.ParameterError(field)
2881           node_output.append(str(val))
2882
2883         output.append(node_output)
2884
2885     return output
2886
2887
2888 class LUQueryNodeStorage(NoHooksLU):
2889   """Logical unit for getting information on storage units on node(s).
2890
2891   """
2892   _OP_REQP = ["nodes", "storage_type", "output_fields"]
2893   REQ_BGL = False
2894   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2895
2896   def ExpandNames(self):
2897     storage_type = self.op.storage_type
2898
2899     if storage_type not in constants.VALID_STORAGE_TYPES:
2900       raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2901                                  errors.ECODE_INVAL)
2902
2903     _CheckOutputFields(static=self._FIELDS_STATIC,
2904                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2905                        selected=self.op.output_fields)
2906
2907     self.needed_locks = {}
2908     self.share_locks[locking.LEVEL_NODE] = 1
2909
2910     if self.op.nodes:
2911       self.needed_locks[locking.LEVEL_NODE] = \
2912         _GetWantedNodes(self, self.op.nodes)
2913     else:
2914       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2915
2916   def CheckPrereq(self):
2917     """Check prerequisites.
2918
2919     This checks that the fields required are valid output fields.
2920
2921     """
2922     self.op.name = getattr(self.op, "name", None)
2923
2924     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2925
2926   def Exec(self, feedback_fn):
2927     """Computes the list of nodes and their attributes.
2928
2929     """
2930     # Always get name to sort by
2931     if constants.SF_NAME in self.op.output_fields:
2932       fields = self.op.output_fields[:]
2933     else:
2934       fields = [constants.SF_NAME] + self.op.output_fields
2935
2936     # Never ask for node or type as it's only known to the LU
2937     for extra in [constants.SF_NODE, constants.SF_TYPE]:
2938       while extra in fields:
2939         fields.remove(extra)
2940
2941     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2942     name_idx = field_idx[constants.SF_NAME]
2943
2944     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2945     data = self.rpc.call_storage_list(self.nodes,
2946                                       self.op.storage_type, st_args,
2947                                       self.op.name, fields)
2948
2949     result = []
2950
2951     for node in utils.NiceSort(self.nodes):
2952       nresult = data[node]
2953       if nresult.offline:
2954         continue
2955
2956       msg = nresult.fail_msg
2957       if msg:
2958         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2959         continue
2960
2961       rows = dict([(row[name_idx], row) for row in nresult.payload])
2962
2963       for name in utils.NiceSort(rows.keys()):
2964         row = rows[name]
2965
2966         out = []
2967
2968         for field in self.op.output_fields:
2969           if field == constants.SF_NODE:
2970             val = node
2971           elif field == constants.SF_TYPE:
2972             val = self.op.storage_type
2973           elif field in field_idx:
2974             val = row[field_idx[field]]
2975           else:
2976             raise errors.ParameterError(field)
2977
2978           out.append(val)
2979
2980         result.append(out)
2981
2982     return result
2983
2984
2985 class LUModifyNodeStorage(NoHooksLU):
2986   """Logical unit for modifying a storage volume on a node.
2987
2988   """
2989   _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2990   REQ_BGL = False
2991
2992   def CheckArguments(self):
2993     self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
2994
2995     storage_type = self.op.storage_type
2996     if storage_type not in constants.VALID_STORAGE_TYPES:
2997       raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2998                                  errors.ECODE_INVAL)
2999
3000   def ExpandNames(self):
3001     self.needed_locks = {
3002       locking.LEVEL_NODE: self.op.node_name,
3003       }
3004
3005   def CheckPrereq(self):
3006     """Check prerequisites.
3007
3008     """
3009     storage_type = self.op.storage_type
3010
3011     try:
3012       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3013     except KeyError:
3014       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3015                                  " modified" % storage_type,
3016                                  errors.ECODE_INVAL)
3017
3018     diff = set(self.op.changes.keys()) - modifiable
3019     if diff:
3020       raise errors.OpPrereqError("The following fields can not be modified for"
3021                                  " storage units of type '%s': %r" %
3022                                  (storage_type, list(diff)),
3023                                  errors.ECODE_INVAL)
3024
3025   def Exec(self, feedback_fn):
3026     """Computes the list of nodes and their attributes.
3027
3028     """
3029     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3030     result = self.rpc.call_storage_modify(self.op.node_name,
3031                                           self.op.storage_type, st_args,
3032                                           self.op.name, self.op.changes)
3033     result.Raise("Failed to modify storage unit '%s' on %s" %
3034                  (self.op.name, self.op.node_name))
3035
3036
3037 class LUAddNode(LogicalUnit):
3038   """Logical unit for adding node to the cluster.
3039
3040   """
3041   HPATH = "node-add"
3042   HTYPE = constants.HTYPE_NODE
3043   _OP_REQP = ["node_name"]
3044
3045   def CheckArguments(self):
3046     # validate/normalize the node name
3047     self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3048
3049   def BuildHooksEnv(self):
3050     """Build hooks env.
3051
3052     This will run on all nodes before, and on all nodes + the new node after.
3053
3054     """
3055     env = {
3056       "OP_TARGET": self.op.node_name,
3057       "NODE_NAME": self.op.node_name,
3058       "NODE_PIP": self.op.primary_ip,
3059       "NODE_SIP": self.op.secondary_ip,
3060       }
3061     nodes_0 = self.cfg.GetNodeList()
3062     nodes_1 = nodes_0 + [self.op.node_name, ]
3063     return env, nodes_0, nodes_1
3064
3065   def CheckPrereq(self):
3066     """Check prerequisites.
3067
3068     This checks:
3069      - the new node is not already in the config
3070      - it is resolvable
3071      - its parameters (single/dual homed) matches the cluster
3072
3073     Any errors are signaled by raising errors.OpPrereqError.
3074
3075     """
3076     node_name = self.op.node_name
3077     cfg = self.cfg
3078
3079     dns_data = utils.GetHostInfo(node_name)
3080
3081     node = dns_data.name
3082     primary_ip = self.op.primary_ip = dns_data.ip
3083     secondary_ip = getattr(self.op, "secondary_ip", None)
3084     if secondary_ip is None:
3085       secondary_ip = primary_ip
3086     if not utils.IsValidIP(secondary_ip):
3087       raise errors.OpPrereqError("Invalid secondary IP given",
3088                                  errors.ECODE_INVAL)
3089     self.op.secondary_ip = secondary_ip
3090
3091     node_list = cfg.GetNodeList()
3092     if not self.op.readd and node in node_list:
3093       raise errors.OpPrereqError("Node %s is already in the configuration" %
3094                                  node, errors.ECODE_EXISTS)
3095     elif self.op.readd and node not in node_list:
3096       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3097                                  errors.ECODE_NOENT)
3098
3099     for existing_node_name in node_list:
3100       existing_node = cfg.GetNodeInfo(existing_node_name)
3101
3102       if self.op.readd and node == existing_node_name:
3103         if (existing_node.primary_ip != primary_ip or
3104             existing_node.secondary_ip != secondary_ip):
3105           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3106                                      " address configuration as before",
3107                                      errors.ECODE_INVAL)
3108         continue
3109
3110       if (existing_node.primary_ip == primary_ip or
3111           existing_node.secondary_ip == primary_ip or
3112           existing_node.primary_ip == secondary_ip or
3113           existing_node.secondary_ip == secondary_ip):
3114         raise errors.OpPrereqError("New node ip address(es) conflict with"
3115                                    " existing node %s" % existing_node.name,
3116                                    errors.ECODE_NOTUNIQUE)
3117
3118     # check that the type of the node (single versus dual homed) is the
3119     # same as for the master
3120     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3121     master_singlehomed = myself.secondary_ip == myself.primary_ip
3122     newbie_singlehomed = secondary_ip == primary_ip
3123     if master_singlehomed != newbie_singlehomed:
3124       if master_singlehomed:
3125         raise errors.OpPrereqError("The master has no private ip but the"
3126                                    " new node has one",
3127                                    errors.ECODE_INVAL)
3128       else:
3129         raise errors.OpPrereqError("The master has a private ip but the"
3130                                    " new node doesn't have one",
3131                                    errors.ECODE_INVAL)
3132
3133     # checks reachability
3134     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3135       raise errors.OpPrereqError("Node not reachable by ping",
3136                                  errors.ECODE_ENVIRON)
3137
3138     if not newbie_singlehomed:
3139       # check reachability from my secondary ip to newbie's secondary ip
3140       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3141                            source=myself.secondary_ip):
3142         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3143                                    " based ping to noded port",
3144                                    errors.ECODE_ENVIRON)
3145
3146     if self.op.readd:
3147       exceptions = [node]
3148     else:
3149       exceptions = []
3150
3151     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3152
3153     if self.op.readd:
3154       self.new_node = self.cfg.GetNodeInfo(node)
3155       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3156     else:
3157       self.new_node = objects.Node(name=node,
3158                                    primary_ip=primary_ip,
3159                                    secondary_ip=secondary_ip,
3160                                    master_candidate=self.master_candidate,
3161                                    offline=False, drained=False)
3162
3163   def Exec(self, feedback_fn):
3164     """Adds the new node to the cluster.
3165
3166     """
3167     new_node = self.new_node
3168     node = new_node.name
3169
3170     # for re-adds, reset the offline/drained/master-candidate flags;
3171     # we need to reset here, otherwise offline would prevent RPC calls
3172     # later in the procedure; this also means that if the re-add
3173     # fails, we are left with a non-offlined, broken node
3174     if self.op.readd:
3175       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3176       self.LogInfo("Readding a node, the offline/drained flags were reset")
3177       # if we demote the node, we do cleanup later in the procedure
3178       new_node.master_candidate = self.master_candidate
3179
3180     # notify the user about any possible mc promotion
3181     if new_node.master_candidate:
3182       self.LogInfo("Node will be a master candidate")
3183
3184     # check connectivity
3185     result = self.rpc.call_version([node])[node]
3186     result.Raise("Can't get version information from node %s" % node)
3187     if constants.PROTOCOL_VERSION == result.payload:
3188       logging.info("Communication to node %s fine, sw version %s match",
3189                    node, result.payload)
3190     else:
3191       raise errors.OpExecError("Version mismatch master version %s,"
3192                                " node version %s" %
3193                                (constants.PROTOCOL_VERSION, result.payload))
3194
3195     # setup ssh on node
3196     if self.cfg.GetClusterInfo().modify_ssh_setup:
3197       logging.info("Copy ssh key to node %s", node)
3198       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3199       keyarray = []
3200       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3201                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3202                   priv_key, pub_key]
3203
3204       for i in keyfiles:
3205         keyarray.append(utils.ReadFile(i))
3206
3207       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3208                                       keyarray[2], keyarray[3], keyarray[4],
3209                                       keyarray[5])
3210       result.Raise("Cannot transfer ssh keys to the new node")
3211
3212     # Add node to our /etc/hosts, and add key to known_hosts
3213     if self.cfg.GetClusterInfo().modify_etc_hosts:
3214       utils.AddHostToEtcHosts(new_node.name)
3215
3216     if new_node.secondary_ip != new_node.primary_ip:
3217       result = self.rpc.call_node_has_ip_address(new_node.name,
3218                                                  new_node.secondary_ip)
3219       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3220                    prereq=True, ecode=errors.ECODE_ENVIRON)
3221       if not result.payload:
3222         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3223                                  " you gave (%s). Please fix and re-run this"
3224                                  " command." % new_node.secondary_ip)
3225
3226     node_verify_list = [self.cfg.GetMasterNode()]
3227     node_verify_param = {
3228       constants.NV_NODELIST: [node],
3229       # TODO: do a node-net-test as well?
3230     }
3231
3232     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3233                                        self.cfg.GetClusterName())
3234     for verifier in node_verify_list:
3235       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3236       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3237       if nl_payload:
3238         for failed in nl_payload:
3239           feedback_fn("ssh/hostname verification failed"
3240                       " (checking from %s): %s" %
3241                       (verifier, nl_payload[failed]))
3242         raise errors.OpExecError("ssh/hostname verification failed.")
3243
3244     if self.op.readd:
3245       _RedistributeAncillaryFiles(self)
3246       self.context.ReaddNode(new_node)
3247       # make sure we redistribute the config
3248       self.cfg.Update(new_node, feedback_fn)
3249       # and make sure the new node will not have old files around
3250       if not new_node.master_candidate:
3251         result = self.rpc.call_node_demote_from_mc(new_node.name)
3252         msg = result.fail_msg
3253         if msg:
3254           self.LogWarning("Node failed to demote itself from master"
3255                           " candidate status: %s" % msg)
3256     else:
3257       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3258       self.context.AddNode(new_node, self.proc.GetECId())
3259
3260
3261 class LUSetNodeParams(LogicalUnit):
3262   """Modifies the parameters of a node.
3263
3264   """
3265   HPATH = "node-modify"
3266   HTYPE = constants.HTYPE_NODE
3267   _OP_REQP = ["node_name"]
3268   REQ_BGL = False
3269
3270   def CheckArguments(self):
3271     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3272     _CheckBooleanOpField(self.op, 'master_candidate')
3273     _CheckBooleanOpField(self.op, 'offline')
3274     _CheckBooleanOpField(self.op, 'drained')
3275     _CheckBooleanOpField(self.op, 'auto_promote')
3276     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3277     if all_mods.count(None) == 3:
3278       raise errors.OpPrereqError("Please pass at least one modification",
3279                                  errors.ECODE_INVAL)
3280     if all_mods.count(True) > 1:
3281       raise errors.OpPrereqError("Can't set the node into more than one"
3282                                  " state at the same time",
3283                                  errors.ECODE_INVAL)
3284
3285     # Boolean value that tells us whether we're offlining or draining the node
3286     self.offline_or_drain = (self.op.offline == True or
3287                              self.op.drained == True)
3288     self.deoffline_or_drain = (self.op.offline == False or
3289                                self.op.drained == False)
3290     self.might_demote = (self.op.master_candidate == False or
3291                          self.offline_or_drain)
3292
3293     self.lock_all = self.op.auto_promote and self.might_demote
3294
3295
3296   def ExpandNames(self):
3297     if self.lock_all:
3298       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3299     else:
3300       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3301
3302   def BuildHooksEnv(self):
3303     """Build hooks env.
3304
3305     This runs on the master node.
3306
3307     """
3308     env = {
3309       "OP_TARGET": self.op.node_name,
3310       "MASTER_CANDIDATE": str(self.op.master_candidate),
3311       "OFFLINE": str(self.op.offline),
3312       "DRAINED": str(self.op.drained),
3313       }
3314     nl = [self.cfg.GetMasterNode(),
3315           self.op.node_name]
3316     return env, nl, nl
3317
3318   def CheckPrereq(self):
3319     """Check prerequisites.
3320
3321     This only checks the instance list against the existing names.
3322
3323     """
3324     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3325
3326     if (self.op.master_candidate is not None or
3327         self.op.drained is not None or
3328         self.op.offline is not None):
3329       # we can't change the master's node flags
3330       if self.op.node_name == self.cfg.GetMasterNode():
3331         raise errors.OpPrereqError("The master role can be changed"
3332                                    " only via masterfailover",
3333                                    errors.ECODE_INVAL)
3334
3335
3336     if node.master_candidate and self.might_demote and not self.lock_all:
3337       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3338       # check if after removing the current node, we're missing master
3339       # candidates
3340       (mc_remaining, mc_should, _) = \
3341           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3342       if mc_remaining != mc_should:
3343         raise errors.OpPrereqError("Not enough master candidates, please"
3344                                    " pass auto_promote to allow promotion",
3345                                    errors.ECODE_INVAL)
3346
3347     if (self.op.master_candidate == True and
3348         ((node.offline and not self.op.offline == False) or
3349          (node.drained and not self.op.drained == False))):
3350       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3351                                  " to master_candidate" % node.name,
3352                                  errors.ECODE_INVAL)
3353
3354     # If we're being deofflined/drained, we'll MC ourself if needed
3355     if (self.deoffline_or_drain and not self.offline_or_drain and not
3356         self.op.master_candidate == True and not node.master_candidate):
3357       self.op.master_candidate = _DecideSelfPromotion(self)
3358       if self.op.master_candidate:
3359         self.LogInfo("Autopromoting node to master candidate")
3360
3361     return
3362
3363   def Exec(self, feedback_fn):
3364     """Modifies a node.
3365
3366     """
3367     node = self.node
3368
3369     result = []
3370     changed_mc = False
3371
3372     if self.op.offline is not None:
3373       node.offline = self.op.offline
3374       result.append(("offline", str(self.op.offline)))
3375       if self.op.offline == True:
3376         if node.master_candidate:
3377           node.master_candidate = False
3378           changed_mc = True
3379           result.append(("master_candidate", "auto-demotion due to offline"))
3380         if node.drained:
3381           node.drained = False
3382           result.append(("drained", "clear drained status due to offline"))
3383
3384     if self.op.master_candidate is not None:
3385       node.master_candidate = self.op.master_candidate
3386       changed_mc = True
3387       result.append(("master_candidate", str(self.op.master_candidate)))
3388       if self.op.master_candidate == False:
3389         rrc = self.rpc.call_node_demote_from_mc(node.name)
3390         msg = rrc.fail_msg
3391         if msg:
3392           self.LogWarning("Node failed to demote itself: %s" % msg)
3393
3394     if self.op.drained is not None:
3395       node.drained = self.op.drained
3396       result.append(("drained", str(self.op.drained)))
3397       if self.op.drained == True:
3398         if node.master_candidate:
3399           node.master_candidate = False
3400           changed_mc = True
3401           result.append(("master_candidate", "auto-demotion due to drain"))
3402           rrc = self.rpc.call_node_demote_from_mc(node.name)
3403           msg = rrc.fail_msg
3404           if msg:
3405             self.LogWarning("Node failed to demote itself: %s" % msg)
3406         if node.offline:
3407           node.offline = False
3408           result.append(("offline", "clear offline status due to drain"))
3409
3410     # we locked all nodes, we adjust the CP before updating this node
3411     if self.lock_all:
3412       _AdjustCandidatePool(self, [node.name])
3413
3414     # this will trigger configuration file update, if needed
3415     self.cfg.Update(node, feedback_fn)
3416
3417     # this will trigger job queue propagation or cleanup
3418     if changed_mc:
3419       self.context.ReaddNode(node)
3420
3421     return result
3422
3423
3424 class LUPowercycleNode(NoHooksLU):
3425   """Powercycles a node.
3426
3427   """
3428   _OP_REQP = ["node_name", "force"]
3429   REQ_BGL = False
3430
3431   def CheckArguments(self):
3432     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3433     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3434       raise errors.OpPrereqError("The node is the master and the force"
3435                                  " parameter was not set",
3436                                  errors.ECODE_INVAL)
3437
3438   def ExpandNames(self):
3439     """Locking for PowercycleNode.
3440
3441     This is a last-resort option and shouldn't block on other
3442     jobs. Therefore, we grab no locks.
3443
3444     """
3445     self.needed_locks = {}
3446
3447   def CheckPrereq(self):
3448     """Check prerequisites.
3449
3450     This LU has no prereqs.
3451
3452     """
3453     pass
3454
3455   def Exec(self, feedback_fn):
3456     """Reboots a node.
3457
3458     """
3459     result = self.rpc.call_node_powercycle(self.op.node_name,
3460                                            self.cfg.GetHypervisorType())
3461     result.Raise("Failed to schedule the reboot")
3462     return result.payload
3463
3464
3465 class LUQueryClusterInfo(NoHooksLU):
3466   """Query cluster configuration.
3467
3468   """
3469   _OP_REQP = []
3470   REQ_BGL = False
3471
3472   def ExpandNames(self):
3473     self.needed_locks = {}
3474
3475   def CheckPrereq(self):
3476     """No prerequsites needed for this LU.
3477
3478     """
3479     pass
3480
3481   def Exec(self, feedback_fn):
3482     """Return cluster config.
3483
3484     """
3485     cluster = self.cfg.GetClusterInfo()
3486     os_hvp = {}
3487
3488     # Filter just for enabled hypervisors
3489     for os_name, hv_dict in cluster.os_hvp.items():
3490       os_hvp[os_name] = {}
3491       for hv_name, hv_params in hv_dict.items():
3492         if hv_name in cluster.enabled_hypervisors:
3493           os_hvp[os_name][hv_name] = hv_params
3494
3495     result = {
3496       "software_version": constants.RELEASE_VERSION,
3497       "protocol_version": constants.PROTOCOL_VERSION,
3498       "config_version": constants.CONFIG_VERSION,
3499       "os_api_version": max(constants.OS_API_VERSIONS),
3500       "export_version": constants.EXPORT_VERSION,
3501       "architecture": (platform.architecture()[0], platform.machine()),
3502       "name": cluster.cluster_name,
3503       "master": cluster.master_node,
3504       "default_hypervisor": cluster.enabled_hypervisors[0],
3505       "enabled_hypervisors": cluster.enabled_hypervisors,
3506       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3507                         for hypervisor_name in cluster.enabled_hypervisors]),
3508       "os_hvp": os_hvp,
3509       "beparams": cluster.beparams,
3510       "nicparams": cluster.nicparams,
3511       "candidate_pool_size": cluster.candidate_pool_size,
3512       "master_netdev": cluster.master_netdev,
3513       "volume_group_name": cluster.volume_group_name,
3514       "file_storage_dir": cluster.file_storage_dir,
3515       "ctime": cluster.ctime,
3516       "mtime": cluster.mtime,
3517       "uuid": cluster.uuid,
3518       "tags": list(cluster.GetTags()),
3519       }
3520
3521     return result
3522
3523
3524 class LUQueryConfigValues(NoHooksLU):
3525   """Return configuration values.
3526
3527   """
3528   _OP_REQP = []
3529   REQ_BGL = False
3530   _FIELDS_DYNAMIC = utils.FieldSet()
3531   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3532                                   "watcher_pause")
3533
3534   def ExpandNames(self):
3535     self.needed_locks = {}
3536
3537     _CheckOutputFields(static=self._FIELDS_STATIC,
3538                        dynamic=self._FIELDS_DYNAMIC,
3539                        selected=self.op.output_fields)
3540
3541   def CheckPrereq(self):
3542     """No prerequisites.
3543
3544     """
3545     pass
3546
3547   def Exec(self, feedback_fn):
3548     """Dump a representation of the cluster config to the standard output.
3549
3550     """
3551     values = []
3552     for field in self.op.output_fields:
3553       if field == "cluster_name":
3554         entry = self.cfg.GetClusterName()
3555       elif field == "master_node":
3556         entry = self.cfg.GetMasterNode()
3557       elif field == "drain_flag":
3558         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3559       elif field == "watcher_pause":
3560         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3561       else:
3562         raise errors.ParameterError(field)
3563       values.append(entry)
3564     return values
3565
3566
3567 class LUActivateInstanceDisks(NoHooksLU):
3568   """Bring up an instance's disks.
3569
3570   """
3571   _OP_REQP = ["instance_name"]
3572   REQ_BGL = False
3573
3574   def ExpandNames(self):
3575     self._ExpandAndLockInstance()
3576     self.needed_locks[locking.LEVEL_NODE] = []
3577     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3578
3579   def DeclareLocks(self, level):
3580     if level == locking.LEVEL_NODE:
3581       self._LockInstancesNodes()
3582
3583   def CheckPrereq(self):
3584     """Check prerequisites.
3585
3586     This checks that the instance is in the cluster.
3587
3588     """
3589     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3590     assert self.instance is not None, \
3591       "Cannot retrieve locked instance %s" % self.op.instance_name
3592     _CheckNodeOnline(self, self.instance.primary_node)
3593     if not hasattr(self.op, "ignore_size"):
3594       self.op.ignore_size = False
3595
3596   def Exec(self, feedback_fn):
3597     """Activate the disks.
3598
3599     """
3600     disks_ok, disks_info = \
3601               _AssembleInstanceDisks(self, self.instance,
3602                                      ignore_size=self.op.ignore_size)
3603     if not disks_ok:
3604       raise errors.OpExecError("Cannot activate block devices")
3605
3606     return disks_info
3607
3608
3609 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3610                            ignore_size=False):
3611   """Prepare the block devices for an instance.
3612
3613   This sets up the block devices on all nodes.
3614
3615   @type lu: L{LogicalUnit}
3616   @param lu: the logical unit on whose behalf we execute
3617   @type instance: L{objects.Instance}
3618   @param instance: the instance for whose disks we assemble
3619   @type ignore_secondaries: boolean
3620   @param ignore_secondaries: if true, errors on secondary nodes
3621       won't result in an error return from the function
3622   @type ignore_size: boolean
3623   @param ignore_size: if true, the current known size of the disk
3624       will not be used during the disk activation, useful for cases
3625       when the size is wrong
3626   @return: False if the operation failed, otherwise a list of
3627       (host, instance_visible_name, node_visible_name)
3628       with the mapping from node devices to instance devices
3629
3630   """
3631   device_info = []
3632   disks_ok = True
3633   iname = instance.name
3634   # With the two passes mechanism we try to reduce the window of
3635   # opportunity for the race condition of switching DRBD to primary
3636   # before handshaking occured, but we do not eliminate it
3637
3638   # The proper fix would be to wait (with some limits) until the
3639   # connection has been made and drbd transitions from WFConnection
3640   # into any other network-connected state (Connected, SyncTarget,
3641   # SyncSource, etc.)
3642
3643   # 1st pass, assemble on all nodes in secondary mode
3644   for inst_disk in instance.disks:
3645     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3646       if ignore_size:
3647         node_disk = node_disk.Copy()
3648         node_disk.UnsetSize()
3649       lu.cfg.SetDiskID(node_disk, node)
3650       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3651       msg = result.fail_msg
3652       if msg:
3653         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3654                            " (is_primary=False, pass=1): %s",
3655                            inst_disk.iv_name, node, msg)
3656         if not ignore_secondaries:
3657           disks_ok = False
3658
3659   # FIXME: race condition on drbd migration to primary
3660
3661   # 2nd pass, do only the primary node
3662   for inst_disk in instance.disks:
3663     dev_path = None
3664
3665     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3666       if node != instance.primary_node:
3667         continue
3668       if ignore_size:
3669         node_disk = node_disk.Copy()
3670         node_disk.UnsetSize()
3671       lu.cfg.SetDiskID(node_disk, node)
3672       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3673       msg = result.fail_msg
3674       if msg:
3675         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3676                            " (is_primary=True, pass=2): %s",
3677                            inst_disk.iv_name, node, msg)
3678         disks_ok = False
3679       else:
3680         dev_path = result.payload
3681
3682     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3683
3684   # leave the disks configured for the primary node
3685   # this is a workaround that would be fixed better by
3686   # improving the logical/physical id handling
3687   for disk in instance.disks:
3688     lu.cfg.SetDiskID(disk, instance.primary_node)
3689
3690   return disks_ok, device_info
3691
3692
3693 def _StartInstanceDisks(lu, instance, force):
3694   """Start the disks of an instance.
3695
3696   """
3697   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3698                                            ignore_secondaries=force)
3699   if not disks_ok:
3700     _ShutdownInstanceDisks(lu, instance)
3701     if force is not None and not force:
3702       lu.proc.LogWarning("", hint="If the message above refers to a"
3703                          " secondary node,"
3704                          " you can retry the operation using '--force'.")
3705     raise errors.OpExecError("Disk consistency error")
3706
3707
3708 class LUDeactivateInstanceDisks(NoHooksLU):
3709   """Shutdown an instance's disks.
3710
3711   """
3712   _OP_REQP = ["instance_name"]
3713   REQ_BGL = False
3714
3715   def ExpandNames(self):
3716     self._ExpandAndLockInstance()
3717     self.needed_locks[locking.LEVEL_NODE] = []
3718     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3719
3720   def DeclareLocks(self, level):
3721     if level == locking.LEVEL_NODE:
3722       self._LockInstancesNodes()
3723
3724   def CheckPrereq(self):
3725     """Check prerequisites.
3726
3727     This checks that the instance is in the cluster.
3728
3729     """
3730     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3731     assert self.instance is not None, \
3732       "Cannot retrieve locked instance %s" % self.op.instance_name
3733
3734   def Exec(self, feedback_fn):
3735     """Deactivate the disks
3736
3737     """
3738     instance = self.instance
3739     _SafeShutdownInstanceDisks(self, instance)
3740
3741
3742 def _SafeShutdownInstanceDisks(lu, instance):
3743   """Shutdown block devices of an instance.
3744
3745   This function checks if an instance is running, before calling
3746   _ShutdownInstanceDisks.
3747
3748   """
3749   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3750   _ShutdownInstanceDisks(lu, instance)
3751
3752
3753 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3754   """Shutdown block devices of an instance.
3755
3756   This does the shutdown on all nodes of the instance.
3757
3758   If the ignore_primary is false, errors on the primary node are
3759   ignored.
3760
3761   """
3762   all_result = True
3763   for disk in instance.disks:
3764     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3765       lu.cfg.SetDiskID(top_disk, node)
3766       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3767       msg = result.fail_msg
3768       if msg:
3769         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3770                       disk.iv_name, node, msg)
3771         if not ignore_primary or node != instance.primary_node:
3772           all_result = False
3773   return all_result
3774
3775
3776 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3777   """Checks if a node has enough free memory.
3778
3779   This function check if a given node has the needed amount of free
3780   memory. In case the node has less memory or we cannot get the
3781   information from the node, this function raise an OpPrereqError
3782   exception.
3783
3784   @type lu: C{LogicalUnit}
3785   @param lu: a logical unit from which we get configuration data
3786   @type node: C{str}
3787   @param node: the node to check
3788   @type reason: C{str}
3789   @param reason: string to use in the error message
3790   @type requested: C{int}
3791   @param requested: the amount of memory in MiB to check for
3792   @type hypervisor_name: C{str}
3793   @param hypervisor_name: the hypervisor to ask for memory stats
3794   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3795       we cannot check the node
3796
3797   """
3798   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3799   nodeinfo[node].Raise("Can't get data from node %s" % node,
3800                        prereq=True, ecode=errors.ECODE_ENVIRON)
3801   free_mem = nodeinfo[node].payload.get('memory_free', None)
3802   if not isinstance(free_mem, int):
3803     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3804                                " was '%s'" % (node, free_mem),
3805                                errors.ECODE_ENVIRON)
3806   if requested > free_mem:
3807     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3808                                " needed %s MiB, available %s MiB" %
3809                                (node, reason, requested, free_mem),
3810                                errors.ECODE_NORES)
3811
3812
3813 def _CheckNodesFreeDisk(lu, nodenames, requested):
3814   """Checks if nodes have enough free disk space in the default VG.
3815
3816   This function check if all given nodes have the needed amount of
3817   free disk. In case any node has less disk or we cannot get the
3818   information from the node, this function raise an OpPrereqError
3819   exception.
3820
3821   @type lu: C{LogicalUnit}
3822   @param lu: a logical unit from which we get configuration data
3823   @type nodenames: C{list}
3824   @param node: the list of node names to check
3825   @type requested: C{int}
3826   @param requested: the amount of disk in MiB to check for
3827   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
3828       we cannot check the node
3829
3830   """
3831   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3832                                    lu.cfg.GetHypervisorType())
3833   for node in nodenames:
3834     info = nodeinfo[node]
3835     info.Raise("Cannot get current information from node %s" % node,
3836                prereq=True, ecode=errors.ECODE_ENVIRON)
3837     vg_free = info.payload.get("vg_free", None)
3838     if not isinstance(vg_free, int):
3839       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
3840                                  " result was '%s'" % (node, vg_free),
3841                                  errors.ECODE_ENVIRON)
3842     if requested > vg_free:
3843       raise errors.OpPrereqError("Not enough disk space on target node %s:"
3844                                  " required %d MiB, available %d MiB" %
3845                                  (node, requested, vg_free),
3846                                  errors.ECODE_NORES)
3847
3848
3849 class LUStartupInstance(LogicalUnit):
3850   """Starts an instance.
3851
3852   """
3853   HPATH = "instance-start"
3854   HTYPE = constants.HTYPE_INSTANCE
3855   _OP_REQP = ["instance_name", "force"]
3856   REQ_BGL = False
3857
3858   def ExpandNames(self):
3859     self._ExpandAndLockInstance()
3860
3861   def BuildHooksEnv(self):
3862     """Build hooks env.
3863
3864     This runs on master, primary and secondary nodes of the instance.
3865
3866     """
3867     env = {
3868       "FORCE": self.op.force,
3869       }
3870     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3871     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3872     return env, nl, nl
3873
3874   def CheckPrereq(self):
3875     """Check prerequisites.
3876
3877     This checks that the instance is in the cluster.
3878
3879     """
3880     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3881     assert self.instance is not None, \
3882       "Cannot retrieve locked instance %s" % self.op.instance_name
3883
3884     # extra beparams
3885     self.beparams = getattr(self.op, "beparams", {})
3886     if self.beparams:
3887       if not isinstance(self.beparams, dict):
3888         raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3889                                    " dict" % (type(self.beparams), ),
3890                                    errors.ECODE_INVAL)
3891       # fill the beparams dict
3892       utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3893       self.op.beparams = self.beparams
3894
3895     # extra hvparams
3896     self.hvparams = getattr(self.op, "hvparams", {})
3897     if self.hvparams:
3898       if not isinstance(self.hvparams, dict):
3899         raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3900                                    " dict" % (type(self.hvparams), ),
3901                                    errors.ECODE_INVAL)
3902
3903       # check hypervisor parameter syntax (locally)
3904       cluster = self.cfg.GetClusterInfo()
3905       utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3906       filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3907                                     instance.hvparams)
3908       filled_hvp.update(self.hvparams)
3909       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3910       hv_type.CheckParameterSyntax(filled_hvp)
3911       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3912       self.op.hvparams = self.hvparams
3913
3914     _CheckNodeOnline(self, instance.primary_node)
3915
3916     bep = self.cfg.GetClusterInfo().FillBE(instance)
3917     # check bridges existence
3918     _CheckInstanceBridgesExist(self, instance)
3919
3920     remote_info = self.rpc.call_instance_info(instance.primary_node,
3921                                               instance.name,
3922                                               instance.hypervisor)
3923     remote_info.Raise("Error checking node %s" % instance.primary_node,
3924                       prereq=True, ecode=errors.ECODE_ENVIRON)
3925     if not remote_info.payload: # not running already
3926       _CheckNodeFreeMemory(self, instance.primary_node,
3927                            "starting instance %s" % instance.name,
3928                            bep[constants.BE_MEMORY], instance.hypervisor)
3929
3930   def Exec(self, feedback_fn):
3931     """Start the instance.
3932
3933     """
3934     instance = self.instance
3935     force = self.op.force
3936
3937     self.cfg.MarkInstanceUp(instance.name)
3938
3939     node_current = instance.primary_node
3940
3941     _StartInstanceDisks(self, instance, force)
3942
3943     result = self.rpc.call_instance_start(node_current, instance,
3944                                           self.hvparams, self.beparams)
3945     msg = result.fail_msg
3946     if msg:
3947       _ShutdownInstanceDisks(self, instance)
3948       raise errors.OpExecError("Could not start instance: %s" % msg)
3949
3950
3951 class LURebootInstance(LogicalUnit):
3952   """Reboot an instance.
3953
3954   """
3955   HPATH = "instance-reboot"
3956   HTYPE = constants.HTYPE_INSTANCE
3957   _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3958   REQ_BGL = False
3959
3960   def CheckArguments(self):
3961     """Check the arguments.
3962
3963     """
3964     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3965                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
3966
3967   def ExpandNames(self):
3968     if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3969                                    constants.INSTANCE_REBOOT_HARD,
3970                                    constants.INSTANCE_REBOOT_FULL]:
3971       raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3972                                   (constants.INSTANCE_REBOOT_SOFT,
3973                                    constants.INSTANCE_REBOOT_HARD,
3974                                    constants.INSTANCE_REBOOT_FULL))
3975     self._ExpandAndLockInstance()
3976
3977   def BuildHooksEnv(self):
3978     """Build hooks env.
3979
3980     This runs on master, primary and secondary nodes of the instance.
3981
3982     """
3983     env = {
3984       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3985       "REBOOT_TYPE": self.op.reboot_type,
3986       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3987       }
3988     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3989     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3990     return env, nl, nl
3991
3992   def CheckPrereq(self):
3993     """Check prerequisites.
3994
3995     This checks that the instance is in the cluster.
3996
3997     """
3998     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3999     assert self.instance is not None, \
4000       "Cannot retrieve locked instance %s" % self.op.instance_name
4001
4002     _CheckNodeOnline(self, instance.primary_node)
4003
4004     # check bridges existence
4005     _CheckInstanceBridgesExist(self, instance)
4006
4007   def Exec(self, feedback_fn):
4008     """Reboot the instance.
4009
4010     """
4011     instance = self.instance
4012     ignore_secondaries = self.op.ignore_secondaries
4013     reboot_type = self.op.reboot_type
4014
4015     node_current = instance.primary_node
4016
4017     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4018                        constants.INSTANCE_REBOOT_HARD]:
4019       for disk in instance.disks:
4020         self.cfg.SetDiskID(disk, node_current)
4021       result = self.rpc.call_instance_reboot(node_current, instance,
4022                                              reboot_type,
4023                                              self.shutdown_timeout)
4024       result.Raise("Could not reboot instance")
4025     else:
4026       result = self.rpc.call_instance_shutdown(node_current, instance,
4027                                                self.shutdown_timeout)
4028       result.Raise("Could not shutdown instance for full reboot")
4029       _ShutdownInstanceDisks(self, instance)
4030       _StartInstanceDisks(self, instance, ignore_secondaries)
4031       result = self.rpc.call_instance_start(node_current, instance, None, None)
4032       msg = result.fail_msg
4033       if msg:
4034         _ShutdownInstanceDisks(self, instance)
4035         raise errors.OpExecError("Could not start instance for"
4036                                  " full reboot: %s" % msg)
4037
4038     self.cfg.MarkInstanceUp(instance.name)
4039
4040
4041 class LUShutdownInstance(LogicalUnit):
4042   """Shutdown an instance.
4043
4044   """
4045   HPATH = "instance-stop"
4046   HTYPE = constants.HTYPE_INSTANCE
4047   _OP_REQP = ["instance_name"]
4048   REQ_BGL = False
4049
4050   def CheckArguments(self):
4051     """Check the arguments.
4052
4053     """
4054     self.timeout = getattr(self.op, "timeout",
4055                            constants.DEFAULT_SHUTDOWN_TIMEOUT)
4056
4057   def ExpandNames(self):
4058     self._ExpandAndLockInstance()
4059
4060   def BuildHooksEnv(self):
4061     """Build hooks env.
4062
4063     This runs on master, primary and secondary nodes of the instance.
4064
4065     """
4066     env = _BuildInstanceHookEnvByObject(self, self.instance)
4067     env["TIMEOUT"] = self.timeout
4068     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4069     return env, nl, nl
4070
4071   def CheckPrereq(self):
4072     """Check prerequisites.
4073
4074     This checks that the instance is in the cluster.
4075
4076     """
4077     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4078     assert self.instance is not None, \
4079       "Cannot retrieve locked instance %s" % self.op.instance_name
4080     _CheckNodeOnline(self, self.instance.primary_node)
4081
4082   def Exec(self, feedback_fn):
4083     """Shutdown the instance.
4084
4085     """
4086     instance = self.instance
4087     node_current = instance.primary_node
4088     timeout = self.timeout
4089     self.cfg.MarkInstanceDown(instance.name)
4090     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4091     msg = result.fail_msg
4092     if msg:
4093       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4094
4095     _ShutdownInstanceDisks(self, instance)
4096
4097
4098 class LUReinstallInstance(LogicalUnit):
4099   """Reinstall an instance.
4100
4101   """
4102   HPATH = "instance-reinstall"
4103   HTYPE = constants.HTYPE_INSTANCE
4104   _OP_REQP = ["instance_name"]
4105   REQ_BGL = False
4106
4107   def ExpandNames(self):
4108     self._ExpandAndLockInstance()
4109
4110   def BuildHooksEnv(self):
4111     """Build hooks env.
4112
4113     This runs on master, primary and secondary nodes of the instance.
4114
4115     """
4116     env = _BuildInstanceHookEnvByObject(self, self.instance)
4117     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4118     return env, nl, nl
4119
4120   def CheckPrereq(self):
4121     """Check prerequisites.
4122
4123     This checks that the instance is in the cluster and is not running.
4124
4125     """
4126     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4127     assert instance is not None, \
4128       "Cannot retrieve locked instance %s" % self.op.instance_name
4129     _CheckNodeOnline(self, instance.primary_node)
4130
4131     if instance.disk_template == constants.DT_DISKLESS:
4132       raise errors.OpPrereqError("Instance '%s' has no disks" %
4133                                  self.op.instance_name,
4134                                  errors.ECODE_INVAL)
4135     _CheckInstanceDown(self, instance, "cannot reinstall")
4136
4137     self.op.os_type = getattr(self.op, "os_type", None)
4138     self.op.force_variant = getattr(self.op, "force_variant", False)
4139     if self.op.os_type is not None:
4140       # OS verification
4141       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4142       result = self.rpc.call_os_get(pnode, self.op.os_type)
4143       result.Raise("OS '%s' not in supported OS list for primary node %s" %
4144                    (self.op.os_type, pnode),
4145                    prereq=True, ecode=errors.ECODE_INVAL)
4146       if not self.op.force_variant:
4147         _CheckOSVariant(result.payload, self.op.os_type)
4148
4149     self.instance = instance
4150
4151   def Exec(self, feedback_fn):
4152     """Reinstall the instance.
4153
4154     """
4155     inst = self.instance
4156
4157     if self.op.os_type is not None:
4158       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4159       inst.os = self.op.os_type
4160       self.cfg.Update(inst, feedback_fn)
4161
4162     _StartInstanceDisks(self, inst, None)
4163     try:
4164       feedback_fn("Running the instance OS create scripts...")
4165       # FIXME: pass debug option from opcode to backend
4166       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4167                                              self.op.debug_level)
4168       result.Raise("Could not install OS for instance %s on node %s" %
4169                    (inst.name, inst.primary_node))
4170     finally:
4171       _ShutdownInstanceDisks(self, inst)
4172
4173
4174 class LURecreateInstanceDisks(LogicalUnit):
4175   """Recreate an instance's missing disks.
4176
4177   """
4178   HPATH = "instance-recreate-disks"
4179   HTYPE = constants.HTYPE_INSTANCE
4180   _OP_REQP = ["instance_name", "disks"]
4181   REQ_BGL = False
4182
4183   def CheckArguments(self):
4184     """Check the arguments.
4185
4186     """
4187     if not isinstance(self.op.disks, list):
4188       raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4189     for item in self.op.disks:
4190       if (not isinstance(item, int) or
4191           item < 0):
4192         raise errors.OpPrereqError("Invalid disk specification '%s'" %
4193                                    str(item), errors.ECODE_INVAL)
4194
4195   def ExpandNames(self):
4196     self._ExpandAndLockInstance()
4197
4198   def BuildHooksEnv(self):
4199     """Build hooks env.
4200
4201     This runs on master, primary and secondary nodes of the instance.
4202
4203     """
4204     env = _BuildInstanceHookEnvByObject(self, self.instance)
4205     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4206     return env, nl, nl
4207
4208   def CheckPrereq(self):
4209     """Check prerequisites.
4210
4211     This checks that the instance is in the cluster and is not running.
4212
4213     """
4214     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4215     assert instance is not None, \
4216       "Cannot retrieve locked instance %s" % self.op.instance_name
4217     _CheckNodeOnline(self, instance.primary_node)
4218
4219     if instance.disk_template == constants.DT_DISKLESS:
4220       raise errors.OpPrereqError("Instance '%s' has no disks" %
4221                                  self.op.instance_name, errors.ECODE_INVAL)
4222     _CheckInstanceDown(self, instance, "cannot recreate disks")
4223
4224     if not self.op.disks:
4225       self.op.disks = range(len(instance.disks))
4226     else:
4227       for idx in self.op.disks:
4228         if idx >= len(instance.disks):
4229           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4230                                      errors.ECODE_INVAL)
4231
4232     self.instance = instance
4233
4234   def Exec(self, feedback_fn):
4235     """Recreate the disks.
4236
4237     """
4238     to_skip = []
4239     for idx, _ in enumerate(self.instance.disks):
4240       if idx not in self.op.disks: # disk idx has not been passed in
4241         to_skip.append(idx)
4242         continue
4243
4244     _CreateDisks(self, self.instance, to_skip=to_skip)
4245
4246
4247 class LURenameInstance(LogicalUnit):
4248   """Rename an instance.
4249
4250   """
4251   HPATH = "instance-rename"
4252   HTYPE = constants.HTYPE_INSTANCE
4253   _OP_REQP = ["instance_name", "new_name"]
4254
4255   def BuildHooksEnv(self):
4256     """Build hooks env.
4257
4258     This runs on master, primary and secondary nodes of the instance.
4259
4260     """
4261     env = _BuildInstanceHookEnvByObject(self, self.instance)
4262     env["INSTANCE_NEW_NAME"] = self.op.new_name
4263     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4264     return env, nl, nl
4265
4266   def CheckPrereq(self):
4267     """Check prerequisites.
4268
4269     This checks that the instance is in the cluster and is not running.
4270
4271     """
4272     self.op.instance_name = _ExpandInstanceName(self.cfg,
4273                                                 self.op.instance_name)
4274     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4275     assert instance is not None
4276     _CheckNodeOnline(self, instance.primary_node)
4277     _CheckInstanceDown(self, instance, "cannot rename")
4278     self.instance = instance
4279
4280     # new name verification
4281     name_info = utils.GetHostInfo(self.op.new_name)
4282
4283     self.op.new_name = new_name = name_info.name
4284     instance_list = self.cfg.GetInstanceList()
4285     if new_name in instance_list:
4286       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4287                                  new_name, errors.ECODE_EXISTS)
4288
4289     if not getattr(self.op, "ignore_ip", False):
4290       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4291         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4292                                    (name_info.ip, new_name),
4293                                    errors.ECODE_NOTUNIQUE)
4294
4295
4296   def Exec(self, feedback_fn):
4297     """Reinstall the instance.
4298
4299     """
4300     inst = self.instance
4301     old_name = inst.name
4302
4303     if inst.disk_template == constants.DT_FILE:
4304       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4305
4306     self.cfg.RenameInstance(inst.name, self.op.new_name)
4307     # Change the instance lock. This is definitely safe while we hold the BGL
4308     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4309     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4310
4311     # re-read the instance from the configuration after rename
4312     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4313
4314     if inst.disk_template == constants.DT_FILE:
4315       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4316       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4317                                                      old_file_storage_dir,
4318                                                      new_file_storage_dir)
4319       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4320                    " (but the instance has been renamed in Ganeti)" %
4321                    (inst.primary_node, old_file_storage_dir,
4322                     new_file_storage_dir))
4323
4324     _StartInstanceDisks(self, inst, None)
4325     try:
4326       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4327                                                  old_name, self.op.debug_level)
4328       msg = result.fail_msg
4329       if msg:
4330         msg = ("Could not run OS rename script for instance %s on node %s"
4331                " (but the instance has been renamed in Ganeti): %s" %
4332                (inst.name, inst.primary_node, msg))
4333         self.proc.LogWarning(msg)
4334     finally:
4335       _ShutdownInstanceDisks(self, inst)
4336
4337
4338 class LURemoveInstance(LogicalUnit):
4339   """Remove an instance.
4340
4341   """
4342   HPATH = "instance-remove"
4343   HTYPE = constants.HTYPE_INSTANCE
4344   _OP_REQP = ["instance_name", "ignore_failures"]
4345   REQ_BGL = False
4346
4347   def CheckArguments(self):
4348     """Check the arguments.
4349
4350     """
4351     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4352                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4353
4354   def ExpandNames(self):
4355     self._ExpandAndLockInstance()
4356     self.needed_locks[locking.LEVEL_NODE] = []
4357     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4358
4359   def DeclareLocks(self, level):
4360     if level == locking.LEVEL_NODE:
4361       self._LockInstancesNodes()
4362
4363   def BuildHooksEnv(self):
4364     """Build hooks env.
4365
4366     This runs on master, primary and secondary nodes of the instance.
4367
4368     """
4369     env = _BuildInstanceHookEnvByObject(self, self.instance)
4370     env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4371     nl = [self.cfg.GetMasterNode()]
4372     nl_post = list(self.instance.all_nodes) + nl
4373     return env, nl, nl_post
4374
4375   def CheckPrereq(self):
4376     """Check prerequisites.
4377
4378     This checks that the instance is in the cluster.
4379
4380     """
4381     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4382     assert self.instance is not None, \
4383       "Cannot retrieve locked instance %s" % self.op.instance_name
4384
4385   def Exec(self, feedback_fn):
4386     """Remove the instance.
4387
4388     """
4389     instance = self.instance
4390     logging.info("Shutting down instance %s on node %s",
4391                  instance.name, instance.primary_node)
4392
4393     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4394                                              self.shutdown_timeout)
4395     msg = result.fail_msg
4396     if msg:
4397       if self.op.ignore_failures:
4398         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4399       else:
4400         raise errors.OpExecError("Could not shutdown instance %s on"
4401                                  " node %s: %s" %
4402                                  (instance.name, instance.primary_node, msg))
4403
4404     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4405
4406
4407 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4408   """Utility function to remove an instance.
4409
4410   """
4411   logging.info("Removing block devices for instance %s", instance.name)
4412
4413   if not _RemoveDisks(lu, instance):
4414     if not ignore_failures:
4415       raise errors.OpExecError("Can't remove instance's disks")
4416     feedback_fn("Warning: can't remove instance's disks")
4417
4418   logging.info("Removing instance %s out of cluster config", instance.name)
4419
4420   lu.cfg.RemoveInstance(instance.name)
4421
4422   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4423     "Instance lock removal conflict"
4424
4425   # Remove lock for the instance
4426   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4427
4428
4429 class LUQueryInstances(NoHooksLU):
4430   """Logical unit for querying instances.
4431
4432   """
4433   # pylint: disable-msg=W0142
4434   _OP_REQP = ["output_fields", "names", "use_locking"]
4435   REQ_BGL = False
4436   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4437                     "serial_no", "ctime", "mtime", "uuid"]
4438   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4439                                     "admin_state",
4440                                     "disk_template", "ip", "mac", "bridge",
4441                                     "nic_mode", "nic_link",
4442                                     "sda_size", "sdb_size", "vcpus", "tags",
4443                                     "network_port", "beparams",
4444                                     r"(disk)\.(size)/([0-9]+)",
4445                                     r"(disk)\.(sizes)", "disk_usage",
4446                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4447                                     r"(nic)\.(bridge)/([0-9]+)",
4448                                     r"(nic)\.(macs|ips|modes|links|bridges)",
4449                                     r"(disk|nic)\.(count)",
4450                                     "hvparams",
4451                                     ] + _SIMPLE_FIELDS +
4452                                   ["hv/%s" % name
4453                                    for name in constants.HVS_PARAMETERS
4454                                    if name not in constants.HVC_GLOBALS] +
4455                                   ["be/%s" % name
4456                                    for name in constants.BES_PARAMETERS])
4457   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4458
4459
4460   def ExpandNames(self):
4461     _CheckOutputFields(static=self._FIELDS_STATIC,
4462                        dynamic=self._FIELDS_DYNAMIC,
4463                        selected=self.op.output_fields)
4464
4465     self.needed_locks = {}
4466     self.share_locks[locking.LEVEL_INSTANCE] = 1
4467     self.share_locks[locking.LEVEL_NODE] = 1
4468
4469     if self.op.names:
4470       self.wanted = _GetWantedInstances(self, self.op.names)
4471     else:
4472       self.wanted = locking.ALL_SET
4473
4474     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4475     self.do_locking = self.do_node_query and self.op.use_locking
4476     if self.do_locking:
4477       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4478       self.needed_locks[locking.LEVEL_NODE] = []
4479       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4480
4481   def DeclareLocks(self, level):
4482     if level == locking.LEVEL_NODE and self.do_locking:
4483       self._LockInstancesNodes()
4484
4485   def CheckPrereq(self):
4486     """Check prerequisites.
4487
4488     """
4489     pass
4490
4491   def Exec(self, feedback_fn):
4492     """Computes the list of nodes and their attributes.
4493
4494     """
4495     # pylint: disable-msg=R0912
4496     # way too many branches here
4497     all_info = self.cfg.GetAllInstancesInfo()
4498     if self.wanted == locking.ALL_SET:
4499       # caller didn't specify instance names, so ordering is not important
4500       if self.do_locking:
4501         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4502       else:
4503         instance_names = all_info.keys()
4504       instance_names = utils.NiceSort(instance_names)
4505     else:
4506       # caller did specify names, so we must keep the ordering
4507       if self.do_locking:
4508         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4509       else:
4510         tgt_set = all_info.keys()
4511       missing = set(self.wanted).difference(tgt_set)
4512       if missing:
4513         raise errors.OpExecError("Some instances were removed before"
4514                                  " retrieving their data: %s" % missing)
4515       instance_names = self.wanted
4516
4517     instance_list = [all_info[iname] for iname in instance_names]
4518
4519     # begin data gathering
4520
4521     nodes = frozenset([inst.primary_node for inst in instance_list])
4522     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4523
4524     bad_nodes = []
4525     off_nodes = []
4526     if self.do_node_query:
4527       live_data = {}
4528       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4529       for name in nodes:
4530         result = node_data[name]
4531         if result.offline:
4532           # offline nodes will be in both lists
4533           off_nodes.append(name)
4534         if result.fail_msg:
4535           bad_nodes.append(name)
4536         else:
4537           if result.payload:
4538             live_data.update(result.payload)
4539           # else no instance is alive
4540     else:
4541       live_data = dict([(name, {}) for name in instance_names])
4542
4543     # end data gathering
4544
4545     HVPREFIX = "hv/"
4546     BEPREFIX = "be/"
4547     output = []
4548     cluster = self.cfg.GetClusterInfo()
4549     for instance in instance_list:
4550       iout = []
4551       i_hv = cluster.FillHV(instance, skip_globals=True)
4552       i_be = cluster.FillBE(instance)
4553       i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4554                                  nic.nicparams) for nic in instance.nics]
4555       for field in self.op.output_fields:
4556         st_match = self._FIELDS_STATIC.Matches(field)
4557         if field in self._SIMPLE_FIELDS:
4558           val = getattr(instance, field)
4559         elif field == "pnode":
4560           val = instance.primary_node
4561         elif field == "snodes":
4562           val = list(instance.secondary_nodes)
4563         elif field == "admin_state":
4564           val = instance.admin_up
4565         elif field == "oper_state":
4566           if instance.primary_node in bad_nodes:
4567             val = None
4568           else:
4569             val = bool(live_data.get(instance.name))
4570         elif field == "status":
4571           if instance.primary_node in off_nodes:
4572             val = "ERROR_nodeoffline"
4573           elif instance.primary_node in bad_nodes:
4574             val = "ERROR_nodedown"
4575           else:
4576             running = bool(live_data.get(instance.name))
4577             if running:
4578               if instance.admin_up:
4579                 val = "running"
4580               else:
4581                 val = "ERROR_up"
4582             else:
4583               if instance.admin_up:
4584                 val = "ERROR_down"
4585               else:
4586                 val = "ADMIN_down"
4587         elif field == "oper_ram":
4588           if instance.primary_node in bad_nodes:
4589             val = None
4590           elif instance.name in live_data:
4591             val = live_data[instance.name].get("memory", "?")
4592           else:
4593             val = "-"
4594         elif field == "vcpus":
4595           val = i_be[constants.BE_VCPUS]
4596         elif field == "disk_template":
4597           val = instance.disk_template
4598         elif field == "ip":
4599           if instance.nics:
4600             val = instance.nics[0].ip
4601           else:
4602             val = None
4603         elif field == "nic_mode":
4604           if instance.nics:
4605             val = i_nicp[0][constants.NIC_MODE]
4606           else:
4607             val = None
4608         elif field == "nic_link":
4609           if instance.nics:
4610             val = i_nicp[0][constants.NIC_LINK]
4611           else:
4612             val = None
4613         elif field == "bridge":
4614           if (instance.nics and
4615               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4616             val = i_nicp[0][constants.NIC_LINK]
4617           else:
4618             val = None
4619         elif field == "mac":
4620           if instance.nics:
4621             val = instance.nics[0].mac
4622           else:
4623             val = None
4624         elif field == "sda_size" or field == "sdb_size":
4625           idx = ord(field[2]) - ord('a')
4626           try:
4627             val = instance.FindDisk(idx).size
4628           except errors.OpPrereqError:
4629             val = None
4630         elif field == "disk_usage": # total disk usage per node
4631           disk_sizes = [{'size': disk.size} for disk in instance.disks]
4632           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4633         elif field == "tags":
4634           val = list(instance.GetTags())
4635         elif field == "hvparams":
4636           val = i_hv
4637         elif (field.startswith(HVPREFIX) and
4638               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4639               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4640           val = i_hv.get(field[len(HVPREFIX):], None)
4641         elif field == "beparams":
4642           val = i_be
4643         elif (field.startswith(BEPREFIX) and
4644               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4645           val = i_be.get(field[len(BEPREFIX):], None)
4646         elif st_match and st_match.groups():
4647           # matches a variable list
4648           st_groups = st_match.groups()
4649           if st_groups and st_groups[0] == "disk":
4650             if st_groups[1] == "count":
4651               val = len(instance.disks)
4652             elif st_groups[1] == "sizes":
4653               val = [disk.size for disk in instance.disks]
4654             elif st_groups[1] == "size":
4655               try:
4656                 val = instance.FindDisk(st_groups[2]).size
4657               except errors.OpPrereqError:
4658                 val = None
4659             else:
4660               assert False, "Unhandled disk parameter"
4661           elif st_groups[0] == "nic":
4662             if st_groups[1] == "count":
4663               val = len(instance.nics)
4664             elif st_groups[1] == "macs":
4665               val = [nic.mac for nic in instance.nics]
4666             elif st_groups[1] == "ips":
4667               val = [nic.ip for nic in instance.nics]
4668             elif st_groups[1] == "modes":
4669               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4670             elif st_groups[1] == "links":
4671               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4672             elif st_groups[1] == "bridges":
4673               val = []
4674               for nicp in i_nicp:
4675                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4676                   val.append(nicp[constants.NIC_LINK])
4677                 else:
4678                   val.append(None)
4679             else:
4680               # index-based item
4681               nic_idx = int(st_groups[2])
4682               if nic_idx >= len(instance.nics):
4683                 val = None
4684               else:
4685                 if st_groups[1] == "mac":
4686                   val = instance.nics[nic_idx].mac
4687                 elif st_groups[1] == "ip":
4688                   val = instance.nics[nic_idx].ip
4689                 elif st_groups[1] == "mode":
4690                   val = i_nicp[nic_idx][constants.NIC_MODE]
4691                 elif st_groups[1] == "link":
4692                   val = i_nicp[nic_idx][constants.NIC_LINK]
4693                 elif st_groups[1] == "bridge":
4694                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4695                   if nic_mode == constants.NIC_MODE_BRIDGED:
4696                     val = i_nicp[nic_idx][constants.NIC_LINK]
4697                   else:
4698                     val = None
4699                 else:
4700                   assert False, "Unhandled NIC parameter"
4701           else:
4702             assert False, ("Declared but unhandled variable parameter '%s'" %
4703                            field)
4704         else:
4705           assert False, "Declared but unhandled parameter '%s'" % field
4706         iout.append(val)
4707       output.append(iout)
4708
4709     return output
4710
4711
4712 class LUFailoverInstance(LogicalUnit):
4713   """Failover an instance.
4714
4715   """
4716   HPATH = "instance-failover"
4717   HTYPE = constants.HTYPE_INSTANCE
4718   _OP_REQP = ["instance_name", "ignore_consistency"]
4719   REQ_BGL = False
4720
4721   def CheckArguments(self):
4722     """Check the arguments.
4723
4724     """
4725     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4726                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4727
4728   def ExpandNames(self):
4729     self._ExpandAndLockInstance()
4730     self.needed_locks[locking.LEVEL_NODE] = []
4731     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4732
4733   def DeclareLocks(self, level):
4734     if level == locking.LEVEL_NODE:
4735       self._LockInstancesNodes()
4736
4737   def BuildHooksEnv(self):
4738     """Build hooks env.
4739
4740     This runs on master, primary and secondary nodes of the instance.
4741
4742     """
4743     instance = self.instance
4744     source_node = instance.primary_node
4745     target_node = instance.secondary_nodes[0]
4746     env = {
4747       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4748       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4749       "OLD_PRIMARY": source_node,
4750       "OLD_SECONDARY": target_node,
4751       "NEW_PRIMARY": target_node,
4752       "NEW_SECONDARY": source_node,
4753       }
4754     env.update(_BuildInstanceHookEnvByObject(self, instance))
4755     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4756     nl_post = list(nl)
4757     nl_post.append(source_node)
4758     return env, nl, nl_post
4759
4760   def CheckPrereq(self):
4761     """Check prerequisites.
4762
4763     This checks that the instance is in the cluster.
4764
4765     """
4766     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4767     assert self.instance is not None, \
4768       "Cannot retrieve locked instance %s" % self.op.instance_name
4769
4770     bep = self.cfg.GetClusterInfo().FillBE(instance)
4771     if instance.disk_template not in constants.DTS_NET_MIRROR:
4772       raise errors.OpPrereqError("Instance's disk layout is not"
4773                                  " network mirrored, cannot failover.",
4774                                  errors.ECODE_STATE)
4775
4776     secondary_nodes = instance.secondary_nodes
4777     if not secondary_nodes:
4778       raise errors.ProgrammerError("no secondary node but using "
4779                                    "a mirrored disk template")
4780
4781     target_node = secondary_nodes[0]
4782     _CheckNodeOnline(self, target_node)
4783     _CheckNodeNotDrained(self, target_node)
4784     if instance.admin_up:
4785       # check memory requirements on the secondary node
4786       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4787                            instance.name, bep[constants.BE_MEMORY],
4788                            instance.hypervisor)
4789     else:
4790       self.LogInfo("Not checking memory on the secondary node as"
4791                    " instance will not be started")
4792
4793     # check bridge existance
4794     _CheckInstanceBridgesExist(self, instance, node=target_node)
4795
4796   def Exec(self, feedback_fn):
4797     """Failover an instance.
4798
4799     The failover is done by shutting it down on its present node and
4800     starting it on the secondary.
4801
4802     """
4803     instance = self.instance
4804
4805     source_node = instance.primary_node
4806     target_node = instance.secondary_nodes[0]
4807
4808     if instance.admin_up:
4809       feedback_fn("* checking disk consistency between source and target")
4810       for dev in instance.disks:
4811         # for drbd, these are drbd over lvm
4812         if not _CheckDiskConsistency(self, dev, target_node, False):
4813           if not self.op.ignore_consistency:
4814             raise errors.OpExecError("Disk %s is degraded on target node,"
4815                                      " aborting failover." % dev.iv_name)
4816     else:
4817       feedback_fn("* not checking disk consistency as instance is not running")
4818
4819     feedback_fn("* shutting down instance on source node")
4820     logging.info("Shutting down instance %s on node %s",
4821                  instance.name, source_node)
4822
4823     result = self.rpc.call_instance_shutdown(source_node, instance,
4824                                              self.shutdown_timeout)
4825     msg = result.fail_msg
4826     if msg:
4827       if self.op.ignore_consistency:
4828         self.proc.LogWarning("Could not shutdown instance %s on node %s."
4829                              " Proceeding anyway. Please make sure node"
4830                              " %s is down. Error details: %s",
4831                              instance.name, source_node, source_node, msg)
4832       else:
4833         raise errors.OpExecError("Could not shutdown instance %s on"
4834                                  " node %s: %s" %
4835                                  (instance.name, source_node, msg))
4836
4837     feedback_fn("* deactivating the instance's disks on source node")
4838     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4839       raise errors.OpExecError("Can't shut down the instance's disks.")
4840
4841     instance.primary_node = target_node
4842     # distribute new instance config to the other nodes
4843     self.cfg.Update(instance, feedback_fn)
4844
4845     # Only start the instance if it's marked as up
4846     if instance.admin_up:
4847       feedback_fn("* activating the instance's disks on target node")
4848       logging.info("Starting instance %s on node %s",
4849                    instance.name, target_node)
4850
4851       disks_ok, _ = _AssembleInstanceDisks(self, instance,
4852                                                ignore_secondaries=True)
4853       if not disks_ok:
4854         _ShutdownInstanceDisks(self, instance)
4855         raise errors.OpExecError("Can't activate the instance's disks")
4856
4857       feedback_fn("* starting the instance on the target node")
4858       result = self.rpc.call_instance_start(target_node, instance, None, None)
4859       msg = result.fail_msg
4860       if msg:
4861         _ShutdownInstanceDisks(self, instance)
4862         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4863                                  (instance.name, target_node, msg))
4864
4865
4866 class LUMigrateInstance(LogicalUnit):
4867   """Migrate an instance.
4868
4869   This is migration without shutting down, compared to the failover,
4870   which is done with shutdown.
4871
4872   """
4873   HPATH = "instance-migrate"
4874   HTYPE = constants.HTYPE_INSTANCE
4875   _OP_REQP = ["instance_name", "live", "cleanup"]
4876
4877   REQ_BGL = False
4878
4879   def ExpandNames(self):
4880     self._ExpandAndLockInstance()
4881
4882     self.needed_locks[locking.LEVEL_NODE] = []
4883     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4884
4885     self._migrater = TLMigrateInstance(self, self.op.instance_name,
4886                                        self.op.live, self.op.cleanup)
4887     self.tasklets = [self._migrater]
4888
4889   def DeclareLocks(self, level):
4890     if level == locking.LEVEL_NODE:
4891       self._LockInstancesNodes()
4892
4893   def BuildHooksEnv(self):
4894     """Build hooks env.
4895
4896     This runs on master, primary and secondary nodes of the instance.
4897
4898     """
4899     instance = self._migrater.instance
4900     source_node = instance.primary_node
4901     target_node = instance.secondary_nodes[0]
4902     env = _BuildInstanceHookEnvByObject(self, instance)
4903     env["MIGRATE_LIVE"] = self.op.live
4904     env["MIGRATE_CLEANUP"] = self.op.cleanup
4905     env.update({
4906         "OLD_PRIMARY": source_node,
4907         "OLD_SECONDARY": target_node,
4908         "NEW_PRIMARY": target_node,
4909         "NEW_SECONDARY": source_node,
4910         })
4911     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4912     nl_post = list(nl)
4913     nl_post.append(source_node)
4914     return env, nl, nl_post
4915
4916
4917 class LUMoveInstance(LogicalUnit):
4918   """Move an instance by data-copying.
4919
4920   """
4921   HPATH = "instance-move"
4922   HTYPE = constants.HTYPE_INSTANCE
4923   _OP_REQP = ["instance_name", "target_node"]
4924   REQ_BGL = False
4925
4926   def CheckArguments(self):
4927     """Check the arguments.
4928
4929     """
4930     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4931                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4932
4933   def ExpandNames(self):
4934     self._ExpandAndLockInstance()
4935     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
4936     self.op.target_node = target_node
4937     self.needed_locks[locking.LEVEL_NODE] = [target_node]
4938     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4939
4940   def DeclareLocks(self, level):
4941     if level == locking.LEVEL_NODE:
4942       self._LockInstancesNodes(primary_only=True)
4943
4944   def BuildHooksEnv(self):
4945     """Build hooks env.
4946
4947     This runs on master, primary and secondary nodes of the instance.
4948
4949     """
4950     env = {
4951       "TARGET_NODE": self.op.target_node,
4952       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4953       }
4954     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4955     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4956                                        self.op.target_node]
4957     return env, nl, nl
4958
4959   def CheckPrereq(self):
4960     """Check prerequisites.
4961
4962     This checks that the instance is in the cluster.
4963
4964     """
4965     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4966     assert self.instance is not None, \
4967       "Cannot retrieve locked instance %s" % self.op.instance_name
4968
4969     node = self.cfg.GetNodeInfo(self.op.target_node)
4970     assert node is not None, \
4971       "Cannot retrieve locked node %s" % self.op.target_node
4972
4973     self.target_node = target_node = node.name
4974
4975     if target_node == instance.primary_node:
4976       raise errors.OpPrereqError("Instance %s is already on the node %s" %
4977                                  (instance.name, target_node),
4978                                  errors.ECODE_STATE)
4979
4980     bep = self.cfg.GetClusterInfo().FillBE(instance)
4981
4982     for idx, dsk in enumerate(instance.disks):
4983       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4984         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4985                                    " cannot copy" % idx, errors.ECODE_STATE)
4986
4987     _CheckNodeOnline(self, target_node)
4988     _CheckNodeNotDrained(self, target_node)
4989
4990     if instance.admin_up:
4991       # check memory requirements on the secondary node
4992       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4993                            instance.name, bep[constants.BE_MEMORY],
4994                            instance.hypervisor)
4995     else:
4996       self.LogInfo("Not checking memory on the secondary node as"
4997                    " instance will not be started")
4998
4999     # check bridge existance
5000     _CheckInstanceBridgesExist(self, instance, node=target_node)
5001
5002   def Exec(self, feedback_fn):
5003     """Move an instance.
5004
5005     The move is done by shutting it down on its present node, copying
5006     the data over (slow) and starting it on the new node.
5007
5008     """
5009     instance = self.instance
5010
5011     source_node = instance.primary_node
5012     target_node = self.target_node
5013
5014     self.LogInfo("Shutting down instance %s on source node %s",
5015                  instance.name, source_node)
5016
5017     result = self.rpc.call_instance_shutdown(source_node, instance,
5018                                              self.shutdown_timeout)
5019     msg = result.fail_msg
5020     if msg:
5021       if self.op.ignore_consistency:
5022         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5023                              " Proceeding anyway. Please make sure node"
5024                              " %s is down. Error details: %s",
5025                              instance.name, source_node, source_node, msg)
5026       else:
5027         raise errors.OpExecError("Could not shutdown instance %s on"
5028                                  " node %s: %s" %
5029                                  (instance.name, source_node, msg))
5030
5031     # create the target disks
5032     try:
5033       _CreateDisks(self, instance, target_node=target_node)
5034     except errors.OpExecError:
5035       self.LogWarning("Device creation failed, reverting...")
5036       try:
5037         _RemoveDisks(self, instance, target_node=target_node)
5038       finally:
5039         self.cfg.ReleaseDRBDMinors(instance.name)
5040         raise
5041
5042     cluster_name = self.cfg.GetClusterInfo().cluster_name
5043
5044     errs = []
5045     # activate, get path, copy the data over
5046     for idx, disk in enumerate(instance.disks):
5047       self.LogInfo("Copying data for disk %d", idx)
5048       result = self.rpc.call_blockdev_assemble(target_node, disk,
5049                                                instance.name, True)
5050       if result.fail_msg:
5051         self.LogWarning("Can't assemble newly created disk %d: %s",
5052                         idx, result.fail_msg)
5053         errs.append(result.fail_msg)
5054         break
5055       dev_path = result.payload
5056       result = self.rpc.call_blockdev_export(source_node, disk,
5057                                              target_node, dev_path,
5058                                              cluster_name)
5059       if result.fail_msg:
5060         self.LogWarning("Can't copy data over for disk %d: %s",
5061                         idx, result.fail_msg)
5062         errs.append(result.fail_msg)
5063         break
5064
5065     if errs:
5066       self.LogWarning("Some disks failed to copy, aborting")
5067       try:
5068         _RemoveDisks(self, instance, target_node=target_node)
5069       finally:
5070         self.cfg.ReleaseDRBDMinors(instance.name)
5071         raise errors.OpExecError("Errors during disk copy: %s" %
5072                                  (",".join(errs),))
5073
5074     instance.primary_node = target_node
5075     self.cfg.Update(instance, feedback_fn)
5076
5077     self.LogInfo("Removing the disks on the original node")
5078     _RemoveDisks(self, instance, target_node=source_node)
5079
5080     # Only start the instance if it's marked as up
5081     if instance.admin_up:
5082       self.LogInfo("Starting instance %s on node %s",
5083                    instance.name, target_node)
5084
5085       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5086                                            ignore_secondaries=True)
5087       if not disks_ok:
5088         _ShutdownInstanceDisks(self, instance)
5089         raise errors.OpExecError("Can't activate the instance's disks")
5090
5091       result = self.rpc.call_instance_start(target_node, instance, None, None)
5092       msg = result.fail_msg
5093       if msg:
5094         _ShutdownInstanceDisks(self, instance)
5095         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5096                                  (instance.name, target_node, msg))
5097
5098
5099 class LUMigrateNode(LogicalUnit):
5100   """Migrate all instances from a node.
5101
5102   """
5103   HPATH = "node-migrate"
5104   HTYPE = constants.HTYPE_NODE
5105   _OP_REQP = ["node_name", "live"]
5106   REQ_BGL = False
5107
5108   def ExpandNames(self):
5109     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5110
5111     self.needed_locks = {
5112       locking.LEVEL_NODE: [self.op.node_name],
5113       }
5114
5115     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5116
5117     # Create tasklets for migrating instances for all instances on this node
5118     names = []
5119     tasklets = []
5120
5121     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5122       logging.debug("Migrating instance %s", inst.name)
5123       names.append(inst.name)
5124
5125       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5126
5127     self.tasklets = tasklets
5128
5129     # Declare instance locks
5130     self.needed_locks[locking.LEVEL_INSTANCE] = names
5131
5132   def DeclareLocks(self, level):
5133     if level == locking.LEVEL_NODE:
5134       self._LockInstancesNodes()
5135
5136   def BuildHooksEnv(self):
5137     """Build hooks env.
5138
5139     This runs on the master, the primary and all the secondaries.
5140
5141     """
5142     env = {
5143       "NODE_NAME": self.op.node_name,
5144       }
5145
5146     nl = [self.cfg.GetMasterNode()]
5147
5148     return (env, nl, nl)
5149
5150
5151 class TLMigrateInstance(Tasklet):
5152   def __init__(self, lu, instance_name, live, cleanup):
5153     """Initializes this class.
5154
5155     """
5156     Tasklet.__init__(self, lu)
5157
5158     # Parameters
5159     self.instance_name = instance_name
5160     self.live = live
5161     self.cleanup = cleanup
5162
5163   def CheckPrereq(self):
5164     """Check prerequisites.
5165
5166     This checks that the instance is in the cluster.
5167
5168     """
5169     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5170     instance = self.cfg.GetInstanceInfo(instance_name)
5171     assert instance is not None
5172
5173     if instance.disk_template != constants.DT_DRBD8:
5174       raise errors.OpPrereqError("Instance's disk layout is not"
5175                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5176
5177     secondary_nodes = instance.secondary_nodes
5178     if not secondary_nodes:
5179       raise errors.ConfigurationError("No secondary node but using"
5180                                       " drbd8 disk template")
5181
5182     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5183
5184     target_node = secondary_nodes[0]
5185     # check memory requirements on the secondary node
5186     _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5187                          instance.name, i_be[constants.BE_MEMORY],
5188                          instance.hypervisor)
5189
5190     # check bridge existance
5191     _CheckInstanceBridgesExist(self, instance, node=target_node)
5192
5193     if not self.cleanup:
5194       _CheckNodeNotDrained(self, target_node)
5195       result = self.rpc.call_instance_migratable(instance.primary_node,
5196                                                  instance)
5197       result.Raise("Can't migrate, please use failover",
5198                    prereq=True, ecode=errors.ECODE_STATE)
5199
5200     self.instance = instance
5201
5202   def _WaitUntilSync(self):
5203     """Poll with custom rpc for disk sync.
5204
5205     This uses our own step-based rpc call.
5206
5207     """
5208     self.feedback_fn("* wait until resync is done")
5209     all_done = False
5210     while not all_done:
5211       all_done = True
5212       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5213                                             self.nodes_ip,
5214                                             self.instance.disks)
5215       min_percent = 100
5216       for node, nres in result.items():
5217         nres.Raise("Cannot resync disks on node %s" % node)
5218         node_done, node_percent = nres.payload
5219         all_done = all_done and node_done
5220         if node_percent is not None:
5221           min_percent = min(min_percent, node_percent)
5222       if not all_done:
5223         if min_percent < 100:
5224           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5225         time.sleep(2)
5226
5227   def _EnsureSecondary(self, node):
5228     """Demote a node to secondary.
5229
5230     """
5231     self.feedback_fn("* switching node %s to secondary mode" % node)
5232
5233     for dev in self.instance.disks:
5234       self.cfg.SetDiskID(dev, node)
5235
5236     result = self.rpc.call_blockdev_close(node, self.instance.name,
5237                                           self.instance.disks)
5238     result.Raise("Cannot change disk to secondary on node %s" % node)
5239
5240   def _GoStandalone(self):
5241     """Disconnect from the network.
5242
5243     """
5244     self.feedback_fn("* changing into standalone mode")
5245     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5246                                                self.instance.disks)
5247     for node, nres in result.items():
5248       nres.Raise("Cannot disconnect disks node %s" % node)
5249
5250   def _GoReconnect(self, multimaster):
5251     """Reconnect to the network.
5252
5253     """
5254     if multimaster:
5255       msg = "dual-master"
5256     else:
5257       msg = "single-master"
5258     self.feedback_fn("* changing disks into %s mode" % msg)
5259     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5260                                            self.instance.disks,
5261                                            self.instance.name, multimaster)
5262     for node, nres in result.items():
5263       nres.Raise("Cannot change disks config on node %s" % node)
5264
5265   def _ExecCleanup(self):
5266     """Try to cleanup after a failed migration.
5267
5268     The cleanup is done by:
5269       - check that the instance is running only on one node
5270         (and update the config if needed)
5271       - change disks on its secondary node to secondary
5272       - wait until disks are fully synchronized
5273       - disconnect from the network
5274       - change disks into single-master mode
5275       - wait again until disks are fully synchronized
5276
5277     """
5278     instance = self.instance
5279     target_node = self.target_node
5280     source_node = self.source_node
5281
5282     # check running on only one node
5283     self.feedback_fn("* checking where the instance actually runs"
5284                      " (if this hangs, the hypervisor might be in"
5285                      " a bad state)")
5286     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5287     for node, result in ins_l.items():
5288       result.Raise("Can't contact node %s" % node)
5289
5290     runningon_source = instance.name in ins_l[source_node].payload
5291     runningon_target = instance.name in ins_l[target_node].payload
5292
5293     if runningon_source and runningon_target:
5294       raise errors.OpExecError("Instance seems to be running on two nodes,"
5295                                " or the hypervisor is confused. You will have"
5296                                " to ensure manually that it runs only on one"
5297                                " and restart this operation.")
5298
5299     if not (runningon_source or runningon_target):
5300       raise errors.OpExecError("Instance does not seem to be running at all."
5301                                " In this case, it's safer to repair by"
5302                                " running 'gnt-instance stop' to ensure disk"
5303                                " shutdown, and then restarting it.")
5304
5305     if runningon_target:
5306       # the migration has actually succeeded, we need to update the config
5307       self.feedback_fn("* instance running on secondary node (%s),"
5308                        " updating config" % target_node)
5309       instance.primary_node = target_node
5310       self.cfg.Update(instance, self.feedback_fn)
5311       demoted_node = source_node
5312     else:
5313       self.feedback_fn("* instance confirmed to be running on its"
5314                        " primary node (%s)" % source_node)
5315       demoted_node = target_node
5316
5317     self._EnsureSecondary(demoted_node)
5318     try:
5319       self._WaitUntilSync()
5320     except errors.OpExecError:
5321       # we ignore here errors, since if the device is standalone, it
5322       # won't be able to sync
5323       pass
5324     self._GoStandalone()
5325     self._GoReconnect(False)
5326     self._WaitUntilSync()
5327
5328     self.feedback_fn("* done")
5329
5330   def _RevertDiskStatus(self):
5331     """Try to revert the disk status after a failed migration.
5332
5333     """
5334     target_node = self.target_node
5335     try:
5336       self._EnsureSecondary(target_node)
5337       self._GoStandalone()
5338       self._GoReconnect(False)
5339       self._WaitUntilSync()
5340     except errors.OpExecError, err:
5341       self.lu.LogWarning("Migration failed and I can't reconnect the"
5342                          " drives: error '%s'\n"
5343                          "Please look and recover the instance status" %
5344                          str(err))
5345
5346   def _AbortMigration(self):
5347     """Call the hypervisor code to abort a started migration.
5348
5349     """
5350     instance = self.instance
5351     target_node = self.target_node
5352     migration_info = self.migration_info
5353
5354     abort_result = self.rpc.call_finalize_migration(target_node,
5355                                                     instance,
5356                                                     migration_info,
5357                                                     False)
5358     abort_msg = abort_result.fail_msg
5359     if abort_msg:
5360       logging.error("Aborting migration failed on target node %s: %s",
5361                     target_node, abort_msg)
5362       # Don't raise an exception here, as we stil have to try to revert the
5363       # disk status, even if this step failed.
5364
5365   def _ExecMigration(self):
5366     """Migrate an instance.
5367
5368     The migrate is done by:
5369       - change the disks into dual-master mode
5370       - wait until disks are fully synchronized again
5371       - migrate the instance
5372       - change disks on the new secondary node (the old primary) to secondary
5373       - wait until disks are fully synchronized
5374       - change disks into single-master mode
5375
5376     """
5377     instance = self.instance
5378     target_node = self.target_node
5379     source_node = self.source_node
5380
5381     self.feedback_fn("* checking disk consistency between source and target")
5382     for dev in instance.disks:
5383       if not _CheckDiskConsistency(self, dev, target_node, False):
5384         raise errors.OpExecError("Disk %s is degraded or not fully"
5385                                  " synchronized on target node,"
5386                                  " aborting migrate." % dev.iv_name)
5387
5388     # First get the migration information from the remote node
5389     result = self.rpc.call_migration_info(source_node, instance)
5390     msg = result.fail_msg
5391     if msg:
5392       log_err = ("Failed fetching source migration information from %s: %s" %
5393                  (source_node, msg))
5394       logging.error(log_err)
5395       raise errors.OpExecError(log_err)
5396
5397     self.migration_info = migration_info = result.payload
5398
5399     # Then switch the disks to master/master mode
5400     self._EnsureSecondary(target_node)
5401     self._GoStandalone()
5402     self._GoReconnect(True)
5403     self._WaitUntilSync()
5404
5405     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5406     result = self.rpc.call_accept_instance(target_node,
5407                                            instance,
5408                                            migration_info,
5409                                            self.nodes_ip[target_node])
5410
5411     msg = result.fail_msg
5412     if msg:
5413       logging.error("Instance pre-migration failed, trying to revert"
5414                     " disk status: %s", msg)
5415       self.feedback_fn("Pre-migration failed, aborting")
5416       self._AbortMigration()
5417       self._RevertDiskStatus()
5418       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5419                                (instance.name, msg))
5420
5421     self.feedback_fn("* migrating instance to %s" % target_node)
5422     time.sleep(10)
5423     result = self.rpc.call_instance_migrate(source_node, instance,
5424                                             self.nodes_ip[target_node],
5425                                             self.live)
5426     msg = result.fail_msg
5427     if msg:
5428       logging.error("Instance migration failed, trying to revert"
5429                     " disk status: %s", msg)
5430       self.feedback_fn("Migration failed, aborting")
5431       self._AbortMigration()
5432       self._RevertDiskStatus()
5433       raise errors.OpExecError("Could not migrate instance %s: %s" %
5434                                (instance.name, msg))
5435     time.sleep(10)
5436
5437     instance.primary_node = target_node
5438     # distribute new instance config to the other nodes
5439     self.cfg.Update(instance, self.feedback_fn)
5440
5441     result = self.rpc.call_finalize_migration(target_node,
5442                                               instance,
5443                                               migration_info,
5444                                               True)
5445     msg = result.fail_msg
5446     if msg:
5447       logging.error("Instance migration succeeded, but finalization failed:"
5448                     " %s", msg)
5449       raise errors.OpExecError("Could not finalize instance migration: %s" %
5450                                msg)
5451
5452     self._EnsureSecondary(source_node)
5453     self._WaitUntilSync()
5454     self._GoStandalone()
5455     self._GoReconnect(False)
5456     self._WaitUntilSync()
5457
5458     self.feedback_fn("* done")
5459
5460   def Exec(self, feedback_fn):
5461     """Perform the migration.
5462
5463     """
5464     feedback_fn("Migrating instance %s" % self.instance.name)
5465
5466     self.feedback_fn = feedback_fn
5467
5468     self.source_node = self.instance.primary_node
5469     self.target_node = self.instance.secondary_nodes[0]
5470     self.all_nodes = [self.source_node, self.target_node]
5471     self.nodes_ip = {
5472       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5473       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5474       }
5475
5476     if self.cleanup:
5477       return self._ExecCleanup()
5478     else:
5479       return self._ExecMigration()
5480
5481
5482 def _CreateBlockDev(lu, node, instance, device, force_create,
5483                     info, force_open):
5484   """Create a tree of block devices on a given node.
5485
5486   If this device type has to be created on secondaries, create it and
5487   all its children.
5488
5489   If not, just recurse to children keeping the same 'force' value.
5490
5491   @param lu: the lu on whose behalf we execute
5492   @param node: the node on which to create the device
5493   @type instance: L{objects.Instance}
5494   @param instance: the instance which owns the device
5495   @type device: L{objects.Disk}
5496   @param device: the device to create
5497   @type force_create: boolean
5498   @param force_create: whether to force creation of this device; this
5499       will be change to True whenever we find a device which has
5500       CreateOnSecondary() attribute
5501   @param info: the extra 'metadata' we should attach to the device
5502       (this will be represented as a LVM tag)
5503   @type force_open: boolean
5504   @param force_open: this parameter will be passes to the
5505       L{backend.BlockdevCreate} function where it specifies
5506       whether we run on primary or not, and it affects both
5507       the child assembly and the device own Open() execution
5508
5509   """
5510   if device.CreateOnSecondary():
5511     force_create = True
5512
5513   if device.children:
5514     for child in device.children:
5515       _CreateBlockDev(lu, node, instance, child, force_create,
5516                       info, force_open)
5517
5518   if not force_create:
5519     return
5520
5521   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5522
5523
5524 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5525   """Create a single block device on a given node.
5526
5527   This will not recurse over children of the device, so they must be
5528   created in advance.
5529
5530   @param lu: the lu on whose behalf we execute
5531   @param node: the node on which to create the device
5532   @type instance: L{objects.Instance}
5533   @param instance: the instance which owns the device
5534   @type device: L{objects.Disk}
5535   @param device: the device to create
5536   @param info: the extra 'metadata' we should attach to the device
5537       (this will be represented as a LVM tag)
5538   @type force_open: boolean
5539   @param force_open: this parameter will be passes to the
5540       L{backend.BlockdevCreate} function where it specifies
5541       whether we run on primary or not, and it affects both
5542       the child assembly and the device own Open() execution
5543
5544   """
5545   lu.cfg.SetDiskID(device, node)
5546   result = lu.rpc.call_blockdev_create(node, device, device.size,
5547                                        instance.name, force_open, info)
5548   result.Raise("Can't create block device %s on"
5549                " node %s for instance %s" % (device, node, instance.name))
5550   if device.physical_id is None:
5551     device.physical_id = result.payload
5552
5553
5554 def _GenerateUniqueNames(lu, exts):
5555   """Generate a suitable LV name.
5556
5557   This will generate a logical volume name for the given instance.
5558
5559   """
5560   results = []
5561   for val in exts:
5562     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5563     results.append("%s%s" % (new_id, val))
5564   return results
5565
5566
5567 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5568                          p_minor, s_minor):
5569   """Generate a drbd8 device complete with its children.
5570
5571   """
5572   port = lu.cfg.AllocatePort()
5573   vgname = lu.cfg.GetVGName()
5574   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5575   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5576                           logical_id=(vgname, names[0]))
5577   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5578                           logical_id=(vgname, names[1]))
5579   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5580                           logical_id=(primary, secondary, port,
5581                                       p_minor, s_minor,
5582                                       shared_secret),
5583                           children=[dev_data, dev_meta],
5584                           iv_name=iv_name)
5585   return drbd_dev
5586
5587
5588 def _GenerateDiskTemplate(lu, template_name,
5589                           instance_name, primary_node,
5590                           secondary_nodes, disk_info,
5591                           file_storage_dir, file_driver,
5592                           base_index):
5593   """Generate the entire disk layout for a given template type.
5594
5595   """
5596   #TODO: compute space requirements
5597
5598   vgname = lu.cfg.GetVGName()
5599   disk_count = len(disk_info)
5600   disks = []
5601   if template_name == constants.DT_DISKLESS:
5602     pass
5603   elif template_name == constants.DT_PLAIN:
5604     if len(secondary_nodes) != 0:
5605       raise errors.ProgrammerError("Wrong template configuration")
5606
5607     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5608                                       for i in range(disk_count)])
5609     for idx, disk in enumerate(disk_info):
5610       disk_index = idx + base_index
5611       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5612                               logical_id=(vgname, names[idx]),
5613                               iv_name="disk/%d" % disk_index,
5614                               mode=disk["mode"])
5615       disks.append(disk_dev)
5616   elif template_name == constants.DT_DRBD8:
5617     if len(secondary_nodes) != 1:
5618       raise errors.ProgrammerError("Wrong template configuration")
5619     remote_node = secondary_nodes[0]
5620     minors = lu.cfg.AllocateDRBDMinor(
5621       [primary_node, remote_node] * len(disk_info), instance_name)
5622
5623     names = []
5624     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5625                                                for i in range(disk_count)]):
5626       names.append(lv_prefix + "_data")
5627       names.append(lv_prefix + "_meta")
5628     for idx, disk in enumerate(disk_info):
5629       disk_index = idx + base_index
5630       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5631                                       disk["size"], names[idx*2:idx*2+2],
5632                                       "disk/%d" % disk_index,
5633                                       minors[idx*2], minors[idx*2+1])
5634       disk_dev.mode = disk["mode"]
5635       disks.append(disk_dev)
5636   elif template_name == constants.DT_FILE:
5637     if len(secondary_nodes) != 0:
5638       raise errors.ProgrammerError("Wrong template configuration")
5639
5640     for idx, disk in enumerate(disk_info):
5641       disk_index = idx + base_index
5642       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5643                               iv_name="disk/%d" % disk_index,
5644                               logical_id=(file_driver,
5645                                           "%s/disk%d" % (file_storage_dir,
5646                                                          disk_index)),
5647                               mode=disk["mode"])
5648       disks.append(disk_dev)
5649   else:
5650     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5651   return disks
5652
5653
5654 def _GetInstanceInfoText(instance):
5655   """Compute that text that should be added to the disk's metadata.
5656
5657   """
5658   return "originstname+%s" % instance.name
5659
5660
5661 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5662   """Create all disks for an instance.
5663
5664   This abstracts away some work from AddInstance.
5665
5666   @type lu: L{LogicalUnit}
5667   @param lu: the logical unit on whose behalf we execute
5668   @type instance: L{objects.Instance}
5669   @param instance: the instance whose disks we should create
5670   @type to_skip: list
5671   @param to_skip: list of indices to skip
5672   @type target_node: string
5673   @param target_node: if passed, overrides the target node for creation
5674   @rtype: boolean
5675   @return: the success of the creation
5676
5677   """
5678   info = _GetInstanceInfoText(instance)
5679   if target_node is None:
5680     pnode = instance.primary_node
5681     all_nodes = instance.all_nodes
5682   else:
5683     pnode = target_node
5684     all_nodes = [pnode]
5685
5686   if instance.disk_template == constants.DT_FILE:
5687     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5688     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5689
5690     result.Raise("Failed to create directory '%s' on"
5691                  " node %s" % (file_storage_dir, pnode))
5692
5693   # Note: this needs to be kept in sync with adding of disks in
5694   # LUSetInstanceParams
5695   for idx, device in enumerate(instance.disks):
5696     if to_skip and idx in to_skip:
5697       continue
5698     logging.info("Creating volume %s for instance %s",
5699                  device.iv_name, instance.name)
5700     #HARDCODE
5701     for node in all_nodes:
5702       f_create = node == pnode
5703       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5704
5705
5706 def _RemoveDisks(lu, instance, target_node=None):
5707   """Remove all disks for an instance.
5708
5709   This abstracts away some work from `AddInstance()` and
5710   `RemoveInstance()`. Note that in case some of the devices couldn't
5711   be removed, the removal will continue with the other ones (compare
5712   with `_CreateDisks()`).
5713
5714   @type lu: L{LogicalUnit}
5715   @param lu: the logical unit on whose behalf we execute
5716   @type instance: L{objects.Instance}
5717   @param instance: the instance whose disks we should remove
5718   @type target_node: string
5719   @param target_node: used to override the node on which to remove the disks
5720   @rtype: boolean
5721   @return: the success of the removal
5722
5723   """
5724   logging.info("Removing block devices for instance %s", instance.name)
5725
5726   all_result = True
5727   for device in instance.disks:
5728     if target_node:
5729       edata = [(target_node, device)]
5730     else:
5731       edata = device.ComputeNodeTree(instance.primary_node)
5732     for node, disk in edata:
5733       lu.cfg.SetDiskID(disk, node)
5734       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5735       if msg:
5736         lu.LogWarning("Could not remove block device %s on node %s,"
5737                       " continuing anyway: %s", device.iv_name, node, msg)
5738         all_result = False
5739
5740   if instance.disk_template == constants.DT_FILE:
5741     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5742     if target_node:
5743       tgt = target_node
5744     else:
5745       tgt = instance.primary_node
5746     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5747     if result.fail_msg:
5748       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5749                     file_storage_dir, instance.primary_node, result.fail_msg)
5750       all_result = False
5751
5752   return all_result
5753
5754
5755 def _ComputeDiskSize(disk_template, disks):
5756   """Compute disk size requirements in the volume group
5757
5758   """
5759   # Required free disk space as a function of disk and swap space
5760   req_size_dict = {
5761     constants.DT_DISKLESS: None,
5762     constants.DT_PLAIN: sum(d["size"] for d in disks),
5763     # 128 MB are added for drbd metadata for each disk
5764     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5765     constants.DT_FILE: None,
5766   }
5767
5768   if disk_template not in req_size_dict:
5769     raise errors.ProgrammerError("Disk template '%s' size requirement"
5770                                  " is unknown" %  disk_template)
5771
5772   return req_size_dict[disk_template]
5773
5774
5775 def _CheckHVParams(lu, nodenames, hvname, hvparams):
5776   """Hypervisor parameter validation.
5777
5778   This function abstract the hypervisor parameter validation to be
5779   used in both instance create and instance modify.
5780
5781   @type lu: L{LogicalUnit}
5782   @param lu: the logical unit for which we check
5783   @type nodenames: list
5784   @param nodenames: the list of nodes on which we should check
5785   @type hvname: string
5786   @param hvname: the name of the hypervisor we should use
5787   @type hvparams: dict
5788   @param hvparams: the parameters which we need to check
5789   @raise errors.OpPrereqError: if the parameters are not valid
5790
5791   """
5792   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5793                                                   hvname,
5794                                                   hvparams)
5795   for node in nodenames:
5796     info = hvinfo[node]
5797     if info.offline:
5798       continue
5799     info.Raise("Hypervisor parameter validation failed on node %s" % node)
5800
5801
5802 class LUCreateInstance(LogicalUnit):
5803   """Create an instance.
5804
5805   """
5806   HPATH = "instance-add"
5807   HTYPE = constants.HTYPE_INSTANCE
5808   _OP_REQP = ["instance_name", "disks", "disk_template",
5809               "mode", "start",
5810               "wait_for_sync", "ip_check", "nics",
5811               "hvparams", "beparams"]
5812   REQ_BGL = False
5813
5814   def CheckArguments(self):
5815     """Check arguments.
5816
5817     """
5818     # set optional parameters to none if they don't exist
5819     for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5820       if not hasattr(self.op, attr):
5821         setattr(self.op, attr, None)
5822
5823     # do not require name_check to ease forward/backward compatibility
5824     # for tools
5825     if not hasattr(self.op, "name_check"):
5826       self.op.name_check = True
5827     # validate/normalize the instance name
5828     self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
5829     if self.op.ip_check and not self.op.name_check:
5830       # TODO: make the ip check more flexible and not depend on the name check
5831       raise errors.OpPrereqError("Cannot do ip checks without a name check",
5832                                  errors.ECODE_INVAL)
5833     if (self.op.disk_template == constants.DT_FILE and
5834         not constants.ENABLE_FILE_STORAGE):
5835       raise errors.OpPrereqError("File storage disabled at configure time",
5836                                  errors.ECODE_INVAL)
5837     # check disk information: either all adopt, or no adopt
5838     has_adopt = has_no_adopt = False
5839     for disk in self.op.disks:
5840       if "adopt" in disk:
5841         has_adopt = True
5842       else:
5843         has_no_adopt = True
5844     if has_adopt and has_no_adopt:
5845       raise errors.OpPrereqError("Either all disks have are adoped or none is",
5846                                  errors.ECODE_INVAL)
5847     if has_adopt:
5848       if self.op.disk_template != constants.DT_PLAIN:
5849         raise errors.OpPrereqError("Disk adoption is only supported for the"
5850                                    " 'plain' disk template",
5851                                    errors.ECODE_INVAL)
5852       if self.op.iallocator is not None:
5853         raise errors.OpPrereqError("Disk adoption not allowed with an"
5854                                    " iallocator script", errors.ECODE_INVAL)
5855       if self.op.mode == constants.INSTANCE_IMPORT:
5856         raise errors.OpPrereqError("Disk adoption not allowed for"
5857                                    " instance import", errors.ECODE_INVAL)
5858
5859     self.adopt_disks = has_adopt
5860
5861   def ExpandNames(self):
5862     """ExpandNames for CreateInstance.
5863
5864     Figure out the right locks for instance creation.
5865
5866     """
5867     self.needed_locks = {}
5868
5869     # cheap checks, mostly valid constants given
5870
5871     # verify creation mode
5872     if self.op.mode not in (constants.INSTANCE_CREATE,
5873                             constants.INSTANCE_IMPORT):
5874       raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5875                                  self.op.mode, errors.ECODE_INVAL)
5876
5877     # disk template and mirror node verification
5878     _CheckDiskTemplate(self.op.disk_template)
5879
5880     if self.op.hypervisor is None:
5881       self.op.hypervisor = self.cfg.GetHypervisorType()
5882
5883     cluster = self.cfg.GetClusterInfo()
5884     enabled_hvs = cluster.enabled_hypervisors
5885     if self.op.hypervisor not in enabled_hvs:
5886       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5887                                  " cluster (%s)" % (self.op.hypervisor,
5888                                   ",".join(enabled_hvs)),
5889                                  errors.ECODE_STATE)
5890
5891     # check hypervisor parameter syntax (locally)
5892     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5893     filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5894                                   self.op.hvparams)
5895     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5896     hv_type.CheckParameterSyntax(filled_hvp)
5897     self.hv_full = filled_hvp
5898     # check that we don't specify global parameters on an instance
5899     _CheckGlobalHvParams(self.op.hvparams)
5900
5901     # fill and remember the beparams dict
5902     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5903     self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5904                                     self.op.beparams)
5905
5906     #### instance parameters check
5907
5908     # instance name verification
5909     if self.op.name_check:
5910       hostname1 = utils.GetHostInfo(self.op.instance_name)
5911       self.op.instance_name = instance_name = hostname1.name
5912       # used in CheckPrereq for ip ping check
5913       self.check_ip = hostname1.ip
5914     else:
5915       instance_name = self.op.instance_name
5916       self.check_ip = None
5917
5918     # this is just a preventive check, but someone might still add this
5919     # instance in the meantime, and creation will fail at lock-add time
5920     if instance_name in self.cfg.GetInstanceList():
5921       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5922                                  instance_name, errors.ECODE_EXISTS)
5923
5924     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5925
5926     # NIC buildup
5927     self.nics = []
5928     for idx, nic in enumerate(self.op.nics):
5929       nic_mode_req = nic.get("mode", None)
5930       nic_mode = nic_mode_req
5931       if nic_mode is None:
5932         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5933
5934       # in routed mode, for the first nic, the default ip is 'auto'
5935       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5936         default_ip_mode = constants.VALUE_AUTO
5937       else:
5938         default_ip_mode = constants.VALUE_NONE
5939
5940       # ip validity checks
5941       ip = nic.get("ip", default_ip_mode)
5942       if ip is None or ip.lower() == constants.VALUE_NONE:
5943         nic_ip = None
5944       elif ip.lower() == constants.VALUE_AUTO:
5945         if not self.op.name_check:
5946           raise errors.OpPrereqError("IP address set to auto but name checks"
5947                                      " have been skipped. Aborting.",
5948                                      errors.ECODE_INVAL)
5949         nic_ip = hostname1.ip
5950       else:
5951         if not utils.IsValidIP(ip):
5952           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5953                                      " like a valid IP" % ip,
5954                                      errors.ECODE_INVAL)
5955         nic_ip = ip
5956
5957       # TODO: check the ip address for uniqueness
5958       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5959         raise errors.OpPrereqError("Routed nic mode requires an ip address",
5960                                    errors.ECODE_INVAL)
5961
5962       # MAC address verification
5963       mac = nic.get("mac", constants.VALUE_AUTO)
5964       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5965         mac = utils.NormalizeAndValidateMac(mac)
5966
5967         try:
5968           self.cfg.ReserveMAC(mac, self.proc.GetECId())
5969         except errors.ReservationError:
5970           raise errors.OpPrereqError("MAC address %s already in use"
5971                                      " in cluster" % mac,
5972                                      errors.ECODE_NOTUNIQUE)
5973
5974       # bridge verification
5975       bridge = nic.get("bridge", None)
5976       link = nic.get("link", None)
5977       if bridge and link:
5978         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5979                                    " at the same time", errors.ECODE_INVAL)
5980       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5981         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
5982                                    errors.ECODE_INVAL)
5983       elif bridge:
5984         link = bridge
5985
5986       nicparams = {}
5987       if nic_mode_req:
5988         nicparams[constants.NIC_MODE] = nic_mode_req
5989       if link:
5990         nicparams[constants.NIC_LINK] = link
5991
5992       check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5993                                       nicparams)
5994       objects.NIC.CheckParameterSyntax(check_params)
5995       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5996
5997     # disk checks/pre-build
5998     self.disks = []
5999     for disk in self.op.disks:
6000       mode = disk.get("mode", constants.DISK_RDWR)
6001       if mode not in constants.DISK_ACCESS_SET:
6002         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6003                                    mode, errors.ECODE_INVAL)
6004       size = disk.get("size", None)
6005       if size is None:
6006         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6007       try:
6008         size = int(size)
6009       except (TypeError, ValueError):
6010         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6011                                    errors.ECODE_INVAL)
6012       new_disk = {"size": size, "mode": mode}
6013       if "adopt" in disk:
6014         new_disk["adopt"] = disk["adopt"]
6015       self.disks.append(new_disk)
6016
6017     # file storage checks
6018     if (self.op.file_driver and
6019         not self.op.file_driver in constants.FILE_DRIVER):
6020       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6021                                  self.op.file_driver, errors.ECODE_INVAL)
6022
6023     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6024       raise errors.OpPrereqError("File storage directory path not absolute",
6025                                  errors.ECODE_INVAL)
6026
6027     ### Node/iallocator related checks
6028     if [self.op.iallocator, self.op.pnode].count(None) != 1:
6029       raise errors.OpPrereqError("One and only one of iallocator and primary"
6030                                  " node must be given",
6031                                  errors.ECODE_INVAL)
6032
6033     if self.op.iallocator:
6034       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6035     else:
6036       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6037       nodelist = [self.op.pnode]
6038       if self.op.snode is not None:
6039         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6040         nodelist.append(self.op.snode)
6041       self.needed_locks[locking.LEVEL_NODE] = nodelist
6042
6043     # in case of import lock the source node too
6044     if self.op.mode == constants.INSTANCE_IMPORT:
6045       src_node = getattr(self.op, "src_node", None)
6046       src_path = getattr(self.op, "src_path", None)
6047
6048       if src_path is None:
6049         self.op.src_path = src_path = self.op.instance_name
6050
6051       if src_node is None:
6052         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6053         self.op.src_node = None
6054         if os.path.isabs(src_path):
6055           raise errors.OpPrereqError("Importing an instance from an absolute"
6056                                      " path requires a source node option.",
6057                                      errors.ECODE_INVAL)
6058       else:
6059         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6060         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6061           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6062         if not os.path.isabs(src_path):
6063           self.op.src_path = src_path = \
6064             utils.PathJoin(constants.EXPORT_DIR, src_path)
6065
6066       # On import force_variant must be True, because if we forced it at
6067       # initial install, our only chance when importing it back is that it
6068       # works again!
6069       self.op.force_variant = True
6070
6071     else: # INSTANCE_CREATE
6072       if getattr(self.op, "os_type", None) is None:
6073         raise errors.OpPrereqError("No guest OS specified",
6074                                    errors.ECODE_INVAL)
6075       self.op.force_variant = getattr(self.op, "force_variant", False)
6076
6077   def _RunAllocator(self):
6078     """Run the allocator based on input opcode.
6079
6080     """
6081     nics = [n.ToDict() for n in self.nics]
6082     ial = IAllocator(self.cfg, self.rpc,
6083                      mode=constants.IALLOCATOR_MODE_ALLOC,
6084                      name=self.op.instance_name,
6085                      disk_template=self.op.disk_template,
6086                      tags=[],
6087                      os=self.op.os_type,
6088                      vcpus=self.be_full[constants.BE_VCPUS],
6089                      mem_size=self.be_full[constants.BE_MEMORY],
6090                      disks=self.disks,
6091                      nics=nics,
6092                      hypervisor=self.op.hypervisor,
6093                      )
6094
6095     ial.Run(self.op.iallocator)
6096
6097     if not ial.success:
6098       raise errors.OpPrereqError("Can't compute nodes using"
6099                                  " iallocator '%s': %s" %
6100                                  (self.op.iallocator, ial.info),
6101                                  errors.ECODE_NORES)
6102     if len(ial.result) != ial.required_nodes:
6103       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6104                                  " of nodes (%s), required %s" %
6105                                  (self.op.iallocator, len(ial.result),
6106                                   ial.required_nodes), errors.ECODE_FAULT)
6107     self.op.pnode = ial.result[0]
6108     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6109                  self.op.instance_name, self.op.iallocator,
6110                  utils.CommaJoin(ial.result))
6111     if ial.required_nodes == 2:
6112       self.op.snode = ial.result[1]
6113
6114   def BuildHooksEnv(self):
6115     """Build hooks env.
6116
6117     This runs on master, primary and secondary nodes of the instance.
6118
6119     """
6120     env = {
6121       "ADD_MODE": self.op.mode,
6122       }
6123     if self.op.mode == constants.INSTANCE_IMPORT:
6124       env["SRC_NODE"] = self.op.src_node
6125       env["SRC_PATH"] = self.op.src_path
6126       env["SRC_IMAGES"] = self.src_images
6127
6128     env.update(_BuildInstanceHookEnv(
6129       name=self.op.instance_name,
6130       primary_node=self.op.pnode,
6131       secondary_nodes=self.secondaries,
6132       status=self.op.start,
6133       os_type=self.op.os_type,
6134       memory=self.be_full[constants.BE_MEMORY],
6135       vcpus=self.be_full[constants.BE_VCPUS],
6136       nics=_NICListToTuple(self, self.nics),
6137       disk_template=self.op.disk_template,
6138       disks=[(d["size"], d["mode"]) for d in self.disks],
6139       bep=self.be_full,
6140       hvp=self.hv_full,
6141       hypervisor_name=self.op.hypervisor,
6142     ))
6143
6144     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6145           self.secondaries)
6146     return env, nl, nl
6147
6148   def CheckPrereq(self):
6149     """Check prerequisites.
6150
6151     """
6152     if (not self.cfg.GetVGName() and
6153         self.op.disk_template not in constants.DTS_NOT_LVM):
6154       raise errors.OpPrereqError("Cluster does not support lvm-based"
6155                                  " instances", errors.ECODE_STATE)
6156
6157     if self.op.mode == constants.INSTANCE_IMPORT:
6158       src_node = self.op.src_node
6159       src_path = self.op.src_path
6160
6161       if src_node is None:
6162         locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6163         exp_list = self.rpc.call_export_list(locked_nodes)
6164         found = False
6165         for node in exp_list:
6166           if exp_list[node].fail_msg:
6167             continue
6168           if src_path in exp_list[node].payload:
6169             found = True
6170             self.op.src_node = src_node = node
6171             self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6172                                                          src_path)
6173             break
6174         if not found:
6175           raise errors.OpPrereqError("No export found for relative path %s" %
6176                                       src_path, errors.ECODE_INVAL)
6177
6178       _CheckNodeOnline(self, src_node)
6179       result = self.rpc.call_export_info(src_node, src_path)
6180       result.Raise("No export or invalid export found in dir %s" % src_path)
6181
6182       export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6183       if not export_info.has_section(constants.INISECT_EXP):
6184         raise errors.ProgrammerError("Corrupted export config",
6185                                      errors.ECODE_ENVIRON)
6186
6187       ei_version = export_info.get(constants.INISECT_EXP, 'version')
6188       if (int(ei_version) != constants.EXPORT_VERSION):
6189         raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6190                                    (ei_version, constants.EXPORT_VERSION),
6191                                    errors.ECODE_ENVIRON)
6192
6193       # Check that the new instance doesn't have less disks than the export
6194       instance_disks = len(self.disks)
6195       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6196       if instance_disks < export_disks:
6197         raise errors.OpPrereqError("Not enough disks to import."
6198                                    " (instance: %d, export: %d)" %
6199                                    (instance_disks, export_disks),
6200                                    errors.ECODE_INVAL)
6201
6202       self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6203       disk_images = []
6204       for idx in range(export_disks):
6205         option = 'disk%d_dump' % idx
6206         if export_info.has_option(constants.INISECT_INS, option):
6207           # FIXME: are the old os-es, disk sizes, etc. useful?
6208           export_name = export_info.get(constants.INISECT_INS, option)
6209           image = utils.PathJoin(src_path, export_name)
6210           disk_images.append(image)
6211         else:
6212           disk_images.append(False)
6213
6214       self.src_images = disk_images
6215
6216       old_name = export_info.get(constants.INISECT_INS, 'name')
6217       # FIXME: int() here could throw a ValueError on broken exports
6218       exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
6219       if self.op.instance_name == old_name:
6220         for idx, nic in enumerate(self.nics):
6221           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6222             nic_mac_ini = 'nic%d_mac' % idx
6223             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6224
6225     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6226
6227     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6228     if self.op.ip_check:
6229       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6230         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6231                                    (self.check_ip, self.op.instance_name),
6232                                    errors.ECODE_NOTUNIQUE)
6233
6234     #### mac address generation
6235     # By generating here the mac address both the allocator and the hooks get
6236     # the real final mac address rather than the 'auto' or 'generate' value.
6237     # There is a race condition between the generation and the instance object
6238     # creation, which means that we know the mac is valid now, but we're not
6239     # sure it will be when we actually add the instance. If things go bad
6240     # adding the instance will abort because of a duplicate mac, and the
6241     # creation job will fail.
6242     for nic in self.nics:
6243       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6244         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6245
6246     #### allocator run
6247
6248     if self.op.iallocator is not None:
6249       self._RunAllocator()
6250
6251     #### node related checks
6252
6253     # check primary node
6254     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6255     assert self.pnode is not None, \
6256       "Cannot retrieve locked node %s" % self.op.pnode
6257     if pnode.offline:
6258       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6259                                  pnode.name, errors.ECODE_STATE)
6260     if pnode.drained:
6261       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6262                                  pnode.name, errors.ECODE_STATE)
6263
6264     self.secondaries = []
6265
6266     # mirror node verification
6267     if self.op.disk_template in constants.DTS_NET_MIRROR:
6268       if self.op.snode is None:
6269         raise errors.OpPrereqError("The networked disk templates need"
6270                                    " a mirror node", errors.ECODE_INVAL)
6271       if self.op.snode == pnode.name:
6272         raise errors.OpPrereqError("The secondary node cannot be the"
6273                                    " primary node.", errors.ECODE_INVAL)
6274       _CheckNodeOnline(self, self.op.snode)
6275       _CheckNodeNotDrained(self, self.op.snode)
6276       self.secondaries.append(self.op.snode)
6277
6278     nodenames = [pnode.name] + self.secondaries
6279
6280     req_size = _ComputeDiskSize(self.op.disk_template,
6281                                 self.disks)
6282
6283     # Check lv size requirements, if not adopting
6284     if req_size is not None and not self.adopt_disks:
6285       _CheckNodesFreeDisk(self, nodenames, req_size)
6286
6287     if self.adopt_disks: # instead, we must check the adoption data
6288       all_lvs = set([i["adopt"] for i in self.disks])
6289       if len(all_lvs) != len(self.disks):
6290         raise errors.OpPrereqError("Duplicate volume names given for adoption",
6291                                    errors.ECODE_INVAL)
6292       for lv_name in all_lvs:
6293         try:
6294           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6295         except errors.ReservationError:
6296           raise errors.OpPrereqError("LV named %s used by another instance" %
6297                                      lv_name, errors.ECODE_NOTUNIQUE)
6298
6299       node_lvs = self.rpc.call_lv_list([pnode.name],
6300                                        self.cfg.GetVGName())[pnode.name]
6301       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6302       node_lvs = node_lvs.payload
6303       delta = all_lvs.difference(node_lvs.keys())
6304       if delta:
6305         raise errors.OpPrereqError("Missing logical volume(s): %s" %
6306                                    utils.CommaJoin(delta),
6307                                    errors.ECODE_INVAL)
6308       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6309       if online_lvs:
6310         raise errors.OpPrereqError("Online logical volumes found, cannot"
6311                                    " adopt: %s" % utils.CommaJoin(online_lvs),
6312                                    errors.ECODE_STATE)
6313       # update the size of disk based on what is found
6314       for dsk in self.disks:
6315         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6316
6317     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6318
6319     # os verification
6320     result = self.rpc.call_os_get(pnode.name, self.op.os_type)
6321     result.Raise("OS '%s' not in supported os list for primary node %s" %
6322                  (self.op.os_type, pnode.name),
6323                  prereq=True, ecode=errors.ECODE_INVAL)
6324     if not self.op.force_variant:
6325       _CheckOSVariant(result.payload, self.op.os_type)
6326
6327     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6328
6329     # memory check on primary node
6330     if self.op.start:
6331       _CheckNodeFreeMemory(self, self.pnode.name,
6332                            "creating instance %s" % self.op.instance_name,
6333                            self.be_full[constants.BE_MEMORY],
6334                            self.op.hypervisor)
6335
6336     self.dry_run_result = list(nodenames)
6337
6338   def Exec(self, feedback_fn):
6339     """Create and add the instance to the cluster.
6340
6341     """
6342     instance = self.op.instance_name
6343     pnode_name = self.pnode.name
6344
6345     ht_kind = self.op.hypervisor
6346     if ht_kind in constants.HTS_REQ_PORT:
6347       network_port = self.cfg.AllocatePort()
6348     else:
6349       network_port = None
6350
6351     # this is needed because os.path.join does not accept None arguments
6352     if self.op.file_storage_dir is None:
6353       string_file_storage_dir = ""
6354     else:
6355       string_file_storage_dir = self.op.file_storage_dir
6356
6357     # build the full file storage dir path
6358     file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6359                                       string_file_storage_dir, instance)
6360
6361     disks = _GenerateDiskTemplate(self,
6362                                   self.op.disk_template,
6363                                   instance, pnode_name,
6364                                   self.secondaries,
6365                                   self.disks,
6366                                   file_storage_dir,
6367                                   self.op.file_driver,
6368                                   0)
6369
6370     iobj = objects.Instance(name=instance, os=self.op.os_type,
6371                             primary_node=pnode_name,
6372                             nics=self.nics, disks=disks,
6373                             disk_template=self.op.disk_template,
6374                             admin_up=False,
6375                             network_port=network_port,
6376                             beparams=self.op.beparams,
6377                             hvparams=self.op.hvparams,
6378                             hypervisor=self.op.hypervisor,
6379                             )
6380
6381     if self.adopt_disks:
6382       # rename LVs to the newly-generated names; we need to construct
6383       # 'fake' LV disks with the old data, plus the new unique_id
6384       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6385       rename_to = []
6386       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6387         rename_to.append(t_dsk.logical_id)
6388         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6389         self.cfg.SetDiskID(t_dsk, pnode_name)
6390       result = self.rpc.call_blockdev_rename(pnode_name,
6391                                              zip(tmp_disks, rename_to))
6392       result.Raise("Failed to rename adoped LVs")
6393     else:
6394       feedback_fn("* creating instance disks...")
6395       try:
6396         _CreateDisks(self, iobj)
6397       except errors.OpExecError:
6398         self.LogWarning("Device creation failed, reverting...")
6399         try:
6400           _RemoveDisks(self, iobj)
6401         finally:
6402           self.cfg.ReleaseDRBDMinors(instance)
6403           raise
6404
6405     feedback_fn("adding instance %s to cluster config" % instance)
6406
6407     self.cfg.AddInstance(iobj, self.proc.GetECId())
6408
6409     # Declare that we don't want to remove the instance lock anymore, as we've
6410     # added the instance to the config
6411     del self.remove_locks[locking.LEVEL_INSTANCE]
6412     # Unlock all the nodes
6413     if self.op.mode == constants.INSTANCE_IMPORT:
6414       nodes_keep = [self.op.src_node]
6415       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6416                        if node != self.op.src_node]
6417       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6418       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6419     else:
6420       self.context.glm.release(locking.LEVEL_NODE)
6421       del self.acquired_locks[locking.LEVEL_NODE]
6422
6423     if self.op.wait_for_sync:
6424       disk_abort = not _WaitForSync(self, iobj)
6425     elif iobj.disk_template in constants.DTS_NET_MIRROR:
6426       # make sure the disks are not degraded (still sync-ing is ok)
6427       time.sleep(15)
6428       feedback_fn("* checking mirrors status")
6429       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6430     else:
6431       disk_abort = False
6432
6433     if disk_abort:
6434       _RemoveDisks(self, iobj)
6435       self.cfg.RemoveInstance(iobj.name)
6436       # Make sure the instance lock gets removed
6437       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6438       raise errors.OpExecError("There are some degraded disks for"
6439                                " this instance")
6440
6441     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6442       if self.op.mode == constants.INSTANCE_CREATE:
6443         feedback_fn("* running the instance OS create scripts...")
6444         # FIXME: pass debug option from opcode to backend
6445         result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6446                                                self.op.debug_level)
6447         result.Raise("Could not add os for instance %s"
6448                      " on node %s" % (instance, pnode_name))
6449
6450       elif self.op.mode == constants.INSTANCE_IMPORT:
6451         feedback_fn("* running the instance OS import scripts...")
6452         src_node = self.op.src_node
6453         src_images = self.src_images
6454         cluster_name = self.cfg.GetClusterName()
6455         # FIXME: pass debug option from opcode to backend
6456         import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6457                                                          src_node, src_images,
6458                                                          cluster_name,
6459                                                          self.op.debug_level)
6460         msg = import_result.fail_msg
6461         if msg:
6462           self.LogWarning("Error while importing the disk images for instance"
6463                           " %s on node %s: %s" % (instance, pnode_name, msg))
6464       else:
6465         # also checked in the prereq part
6466         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6467                                      % self.op.mode)
6468
6469     if self.op.start:
6470       iobj.admin_up = True
6471       self.cfg.Update(iobj, feedback_fn)
6472       logging.info("Starting instance %s on node %s", instance, pnode_name)
6473       feedback_fn("* starting instance...")
6474       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6475       result.Raise("Could not start instance")
6476
6477     return list(iobj.all_nodes)
6478
6479
6480 class LUConnectConsole(NoHooksLU):
6481   """Connect to an instance's console.
6482
6483   This is somewhat special in that it returns the command line that
6484   you need to run on the master node in order to connect to the
6485   console.
6486
6487   """
6488   _OP_REQP = ["instance_name"]
6489   REQ_BGL = False
6490
6491   def ExpandNames(self):
6492     self._ExpandAndLockInstance()
6493
6494   def CheckPrereq(self):
6495     """Check prerequisites.
6496
6497     This checks that the instance is in the cluster.
6498
6499     """
6500     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6501     assert self.instance is not None, \
6502       "Cannot retrieve locked instance %s" % self.op.instance_name
6503     _CheckNodeOnline(self, self.instance.primary_node)
6504
6505   def Exec(self, feedback_fn):
6506     """Connect to the console of an instance
6507
6508     """
6509     instance = self.instance
6510     node = instance.primary_node
6511
6512     node_insts = self.rpc.call_instance_list([node],
6513                                              [instance.hypervisor])[node]
6514     node_insts.Raise("Can't get node information from %s" % node)
6515
6516     if instance.name not in node_insts.payload:
6517       raise errors.OpExecError("Instance %s is not running." % instance.name)
6518
6519     logging.debug("Connecting to console of %s on %s", instance.name, node)
6520
6521     hyper = hypervisor.GetHypervisor(instance.hypervisor)
6522     cluster = self.cfg.GetClusterInfo()
6523     # beparams and hvparams are passed separately, to avoid editing the
6524     # instance and then saving the defaults in the instance itself.
6525     hvparams = cluster.FillHV(instance)
6526     beparams = cluster.FillBE(instance)
6527     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6528
6529     # build ssh cmdline
6530     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6531
6532
6533 class LUReplaceDisks(LogicalUnit):
6534   """Replace the disks of an instance.
6535
6536   """
6537   HPATH = "mirrors-replace"
6538   HTYPE = constants.HTYPE_INSTANCE
6539   _OP_REQP = ["instance_name", "mode", "disks"]
6540   REQ_BGL = False
6541
6542   def CheckArguments(self):
6543     if not hasattr(self.op, "remote_node"):
6544       self.op.remote_node = None
6545     if not hasattr(self.op, "iallocator"):
6546       self.op.iallocator = None
6547     if not hasattr(self.op, "early_release"):
6548       self.op.early_release = False
6549
6550     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6551                                   self.op.iallocator)
6552
6553   def ExpandNames(self):
6554     self._ExpandAndLockInstance()
6555
6556     if self.op.iallocator is not None:
6557       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6558
6559     elif self.op.remote_node is not None:
6560       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6561       self.op.remote_node = remote_node
6562
6563       # Warning: do not remove the locking of the new secondary here
6564       # unless DRBD8.AddChildren is changed to work in parallel;
6565       # currently it doesn't since parallel invocations of
6566       # FindUnusedMinor will conflict
6567       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6568       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6569
6570     else:
6571       self.needed_locks[locking.LEVEL_NODE] = []
6572       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6573
6574     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6575                                    self.op.iallocator, self.op.remote_node,
6576                                    self.op.disks, False, self.op.early_release)
6577
6578     self.tasklets = [self.replacer]
6579
6580   def DeclareLocks(self, level):
6581     # If we're not already locking all nodes in the set we have to declare the
6582     # instance's primary/secondary nodes.
6583     if (level == locking.LEVEL_NODE and
6584         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6585       self._LockInstancesNodes()
6586
6587   def BuildHooksEnv(self):
6588     """Build hooks env.
6589
6590     This runs on the master, the primary and all the secondaries.
6591
6592     """
6593     instance = self.replacer.instance
6594     env = {
6595       "MODE": self.op.mode,
6596       "NEW_SECONDARY": self.op.remote_node,
6597       "OLD_SECONDARY": instance.secondary_nodes[0],
6598       }
6599     env.update(_BuildInstanceHookEnvByObject(self, instance))
6600     nl = [
6601       self.cfg.GetMasterNode(),
6602       instance.primary_node,
6603       ]
6604     if self.op.remote_node is not None:
6605       nl.append(self.op.remote_node)
6606     return env, nl, nl
6607
6608
6609 class LUEvacuateNode(LogicalUnit):
6610   """Relocate the secondary instances from a node.
6611
6612   """
6613   HPATH = "node-evacuate"
6614   HTYPE = constants.HTYPE_NODE
6615   _OP_REQP = ["node_name"]
6616   REQ_BGL = False
6617
6618   def CheckArguments(self):
6619     if not hasattr(self.op, "remote_node"):
6620       self.op.remote_node = None
6621     if not hasattr(self.op, "iallocator"):
6622       self.op.iallocator = None
6623     if not hasattr(self.op, "early_release"):
6624       self.op.early_release = False
6625
6626     TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6627                                   self.op.remote_node,
6628                                   self.op.iallocator)
6629
6630   def ExpandNames(self):
6631     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6632
6633     self.needed_locks = {}
6634
6635     # Declare node locks
6636     if self.op.iallocator is not None:
6637       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6638
6639     elif self.op.remote_node is not None:
6640       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6641
6642       # Warning: do not remove the locking of the new secondary here
6643       # unless DRBD8.AddChildren is changed to work in parallel;
6644       # currently it doesn't since parallel invocations of
6645       # FindUnusedMinor will conflict
6646       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6647       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6648
6649     else:
6650       raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6651
6652     # Create tasklets for replacing disks for all secondary instances on this
6653     # node
6654     names = []
6655     tasklets = []
6656
6657     for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6658       logging.debug("Replacing disks for instance %s", inst.name)
6659       names.append(inst.name)
6660
6661       replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6662                                 self.op.iallocator, self.op.remote_node, [],
6663                                 True, self.op.early_release)
6664       tasklets.append(replacer)
6665
6666     self.tasklets = tasklets
6667     self.instance_names = names
6668
6669     # Declare instance locks
6670     self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6671
6672   def DeclareLocks(self, level):
6673     # If we're not already locking all nodes in the set we have to declare the
6674     # instance's primary/secondary nodes.
6675     if (level == locking.LEVEL_NODE and
6676         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6677       self._LockInstancesNodes()
6678
6679   def BuildHooksEnv(self):
6680     """Build hooks env.
6681
6682     This runs on the master, the primary and all the secondaries.
6683
6684     """
6685     env = {
6686       "NODE_NAME": self.op.node_name,
6687       }
6688
6689     nl = [self.cfg.GetMasterNode()]
6690
6691     if self.op.remote_node is not None:
6692       env["NEW_SECONDARY"] = self.op.remote_node
6693       nl.append(self.op.remote_node)
6694
6695     return (env, nl, nl)
6696
6697
6698 class TLReplaceDisks(Tasklet):
6699   """Replaces disks for an instance.
6700
6701   Note: Locking is not within the scope of this class.
6702
6703   """
6704   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6705                disks, delay_iallocator, early_release):
6706     """Initializes this class.
6707
6708     """
6709     Tasklet.__init__(self, lu)
6710
6711     # Parameters
6712     self.instance_name = instance_name
6713     self.mode = mode
6714     self.iallocator_name = iallocator_name
6715     self.remote_node = remote_node
6716     self.disks = disks
6717     self.delay_iallocator = delay_iallocator
6718     self.early_release = early_release
6719
6720     # Runtime data
6721     self.instance = None
6722     self.new_node = None
6723     self.target_node = None
6724     self.other_node = None
6725     self.remote_node_info = None
6726     self.node_secondary_ip = None
6727
6728   @staticmethod
6729   def CheckArguments(mode, remote_node, iallocator):
6730     """Helper function for users of this class.
6731
6732     """
6733     # check for valid parameter combination
6734     if mode == constants.REPLACE_DISK_CHG:
6735       if remote_node is None and iallocator is None:
6736         raise errors.OpPrereqError("When changing the secondary either an"
6737                                    " iallocator script must be used or the"
6738                                    " new node given", errors.ECODE_INVAL)
6739
6740       if remote_node is not None and iallocator is not None:
6741         raise errors.OpPrereqError("Give either the iallocator or the new"
6742                                    " secondary, not both", errors.ECODE_INVAL)
6743
6744     elif remote_node is not None or iallocator is not None:
6745       # Not replacing the secondary
6746       raise errors.OpPrereqError("The iallocator and new node options can"
6747                                  " only be used when changing the"
6748                                  " secondary node", errors.ECODE_INVAL)
6749
6750   @staticmethod
6751   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6752     """Compute a new secondary node using an IAllocator.
6753
6754     """
6755     ial = IAllocator(lu.cfg, lu.rpc,
6756                      mode=constants.IALLOCATOR_MODE_RELOC,
6757                      name=instance_name,
6758                      relocate_from=relocate_from)
6759
6760     ial.Run(iallocator_name)
6761
6762     if not ial.success:
6763       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6764                                  " %s" % (iallocator_name, ial.info),
6765                                  errors.ECODE_NORES)
6766
6767     if len(ial.result) != ial.required_nodes:
6768       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6769                                  " of nodes (%s), required %s" %
6770                                  (iallocator_name,
6771                                   len(ial.result), ial.required_nodes),
6772                                  errors.ECODE_FAULT)
6773
6774     remote_node_name = ial.result[0]
6775
6776     lu.LogInfo("Selected new secondary for instance '%s': %s",
6777                instance_name, remote_node_name)
6778
6779     return remote_node_name
6780
6781   def _FindFaultyDisks(self, node_name):
6782     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6783                                     node_name, True)
6784
6785   def CheckPrereq(self):
6786     """Check prerequisites.
6787
6788     This checks that the instance is in the cluster.
6789
6790     """
6791     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6792     assert instance is not None, \
6793       "Cannot retrieve locked instance %s" % self.instance_name
6794
6795     if instance.disk_template != constants.DT_DRBD8:
6796       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6797                                  " instances", errors.ECODE_INVAL)
6798
6799     if len(instance.secondary_nodes) != 1:
6800       raise errors.OpPrereqError("The instance has a strange layout,"
6801                                  " expected one secondary but found %d" %
6802                                  len(instance.secondary_nodes),
6803                                  errors.ECODE_FAULT)
6804
6805     if not self.delay_iallocator:
6806       self._CheckPrereq2()
6807
6808   def _CheckPrereq2(self):
6809     """Check prerequisites, second part.
6810
6811     This function should always be part of CheckPrereq. It was separated and is
6812     now called from Exec because during node evacuation iallocator was only
6813     called with an unmodified cluster model, not taking planned changes into
6814     account.
6815
6816     """
6817     instance = self.instance
6818     secondary_node = instance.secondary_nodes[0]
6819
6820     if self.iallocator_name is None:
6821       remote_node = self.remote_node
6822     else:
6823       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6824                                        instance.name, instance.secondary_nodes)
6825
6826     if remote_node is not None:
6827       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6828       assert self.remote_node_info is not None, \
6829         "Cannot retrieve locked node %s" % remote_node
6830     else:
6831       self.remote_node_info = None
6832
6833     if remote_node == self.instance.primary_node:
6834       raise errors.OpPrereqError("The specified node is the primary node of"
6835                                  " the instance.", errors.ECODE_INVAL)
6836
6837     if remote_node == secondary_node:
6838       raise errors.OpPrereqError("The specified node is already the"
6839                                  " secondary node of the instance.",
6840                                  errors.ECODE_INVAL)
6841
6842     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6843                                     constants.REPLACE_DISK_CHG):
6844       raise errors.OpPrereqError("Cannot specify disks to be replaced",
6845                                  errors.ECODE_INVAL)
6846
6847     if self.mode == constants.REPLACE_DISK_AUTO:
6848       faulty_primary = self._FindFaultyDisks(instance.primary_node)
6849       faulty_secondary = self._FindFaultyDisks(secondary_node)
6850
6851       if faulty_primary and faulty_secondary:
6852         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6853                                    " one node and can not be repaired"
6854                                    " automatically" % self.instance_name,
6855                                    errors.ECODE_STATE)
6856
6857       if faulty_primary:
6858         self.disks = faulty_primary
6859         self.target_node = instance.primary_node
6860         self.other_node = secondary_node
6861         check_nodes = [self.target_node, self.other_node]
6862       elif faulty_secondary:
6863         self.disks = faulty_secondary
6864         self.target_node = secondary_node
6865         self.other_node = instance.primary_node
6866         check_nodes = [self.target_node, self.other_node]
6867       else:
6868         self.disks = []
6869         check_nodes = []
6870
6871     else:
6872       # Non-automatic modes
6873       if self.mode == constants.REPLACE_DISK_PRI:
6874         self.target_node = instance.primary_node
6875         self.other_node = secondary_node
6876         check_nodes = [self.target_node, self.other_node]
6877
6878       elif self.mode == constants.REPLACE_DISK_SEC:
6879         self.target_node = secondary_node
6880         self.other_node = instance.primary_node
6881         check_nodes = [self.target_node, self.other_node]
6882
6883       elif self.mode == constants.REPLACE_DISK_CHG:
6884         self.new_node = remote_node
6885         self.other_node = instance.primary_node
6886         self.target_node = secondary_node
6887         check_nodes = [self.new_node, self.other_node]
6888
6889         _CheckNodeNotDrained(self.lu, remote_node)
6890
6891         old_node_info = self.cfg.GetNodeInfo(secondary_node)
6892         assert old_node_info is not None
6893         if old_node_info.offline and not self.early_release:
6894           # doesn't make sense to delay the release
6895           self.early_release = True
6896           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
6897                           " early-release mode", secondary_node)
6898
6899       else:
6900         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6901                                      self.mode)
6902
6903       # If not specified all disks should be replaced
6904       if not self.disks:
6905         self.disks = range(len(self.instance.disks))
6906
6907     for node in check_nodes:
6908       _CheckNodeOnline(self.lu, node)
6909
6910     # Check whether disks are valid
6911     for disk_idx in self.disks:
6912       instance.FindDisk(disk_idx)
6913
6914     # Get secondary node IP addresses
6915     node_2nd_ip = {}
6916
6917     for node_name in [self.target_node, self.other_node, self.new_node]:
6918       if node_name is not None:
6919         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6920
6921     self.node_secondary_ip = node_2nd_ip
6922
6923   def Exec(self, feedback_fn):
6924     """Execute disk replacement.
6925
6926     This dispatches the disk replacement to the appropriate handler.
6927
6928     """
6929     if self.delay_iallocator:
6930       self._CheckPrereq2()
6931
6932     if not self.disks:
6933       feedback_fn("No disks need replacement")
6934       return
6935
6936     feedback_fn("Replacing disk(s) %s for %s" %
6937                 (utils.CommaJoin(self.disks), self.instance.name))
6938
6939     activate_disks = (not self.instance.admin_up)
6940
6941     # Activate the instance disks if we're replacing them on a down instance
6942     if activate_disks:
6943       _StartInstanceDisks(self.lu, self.instance, True)
6944
6945     try:
6946       # Should we replace the secondary node?
6947       if self.new_node is not None:
6948         fn = self._ExecDrbd8Secondary
6949       else:
6950         fn = self._ExecDrbd8DiskOnly
6951
6952       return fn(feedback_fn)
6953
6954     finally:
6955       # Deactivate the instance disks if we're replacing them on a
6956       # down instance
6957       if activate_disks:
6958         _SafeShutdownInstanceDisks(self.lu, self.instance)
6959
6960   def _CheckVolumeGroup(self, nodes):
6961     self.lu.LogInfo("Checking volume groups")
6962
6963     vgname = self.cfg.GetVGName()
6964
6965     # Make sure volume group exists on all involved nodes
6966     results = self.rpc.call_vg_list(nodes)
6967     if not results:
6968       raise errors.OpExecError("Can't list volume groups on the nodes")
6969
6970     for node in nodes:
6971       res = results[node]
6972       res.Raise("Error checking node %s" % node)
6973       if vgname not in res.payload:
6974         raise errors.OpExecError("Volume group '%s' not found on node %s" %
6975                                  (vgname, node))
6976
6977   def _CheckDisksExistence(self, nodes):
6978     # Check disk existence
6979     for idx, dev in enumerate(self.instance.disks):
6980       if idx not in self.disks:
6981         continue
6982
6983       for node in nodes:
6984         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6985         self.cfg.SetDiskID(dev, node)
6986
6987         result = self.rpc.call_blockdev_find(node, dev)
6988
6989         msg = result.fail_msg
6990         if msg or not result.payload:
6991           if not msg:
6992             msg = "disk not found"
6993           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6994                                    (idx, node, msg))
6995
6996   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6997     for idx, dev in enumerate(self.instance.disks):
6998       if idx not in self.disks:
6999         continue
7000
7001       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7002                       (idx, node_name))
7003
7004       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7005                                    ldisk=ldisk):
7006         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7007                                  " replace disks for instance %s" %
7008                                  (node_name, self.instance.name))
7009
7010   def _CreateNewStorage(self, node_name):
7011     vgname = self.cfg.GetVGName()
7012     iv_names = {}
7013
7014     for idx, dev in enumerate(self.instance.disks):
7015       if idx not in self.disks:
7016         continue
7017
7018       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7019
7020       self.cfg.SetDiskID(dev, node_name)
7021
7022       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7023       names = _GenerateUniqueNames(self.lu, lv_names)
7024
7025       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7026                              logical_id=(vgname, names[0]))
7027       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7028                              logical_id=(vgname, names[1]))
7029
7030       new_lvs = [lv_data, lv_meta]
7031       old_lvs = dev.children
7032       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7033
7034       # we pass force_create=True to force the LVM creation
7035       for new_lv in new_lvs:
7036         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7037                         _GetInstanceInfoText(self.instance), False)
7038
7039     return iv_names
7040
7041   def _CheckDevices(self, node_name, iv_names):
7042     for name, (dev, _, _) in iv_names.iteritems():
7043       self.cfg.SetDiskID(dev, node_name)
7044
7045       result = self.rpc.call_blockdev_find(node_name, dev)
7046
7047       msg = result.fail_msg
7048       if msg or not result.payload:
7049         if not msg:
7050           msg = "disk not found"
7051         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7052                                  (name, msg))
7053
7054       if result.payload.is_degraded:
7055         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7056
7057   def _RemoveOldStorage(self, node_name, iv_names):
7058     for name, (_, old_lvs, _) in iv_names.iteritems():
7059       self.lu.LogInfo("Remove logical volumes for %s" % name)
7060
7061       for lv in old_lvs:
7062         self.cfg.SetDiskID(lv, node_name)
7063
7064         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7065         if msg:
7066           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7067                              hint="remove unused LVs manually")
7068
7069   def _ReleaseNodeLock(self, node_name):
7070     """Releases the lock for a given node."""
7071     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7072
7073   def _ExecDrbd8DiskOnly(self, feedback_fn):
7074     """Replace a disk on the primary or secondary for DRBD 8.
7075
7076     The algorithm for replace is quite complicated:
7077
7078       1. for each disk to be replaced:
7079
7080         1. create new LVs on the target node with unique names
7081         1. detach old LVs from the drbd device
7082         1. rename old LVs to name_replaced.<time_t>
7083         1. rename new LVs to old LVs
7084         1. attach the new LVs (with the old names now) to the drbd device
7085
7086       1. wait for sync across all devices
7087
7088       1. for each modified disk:
7089
7090         1. remove old LVs (which have the name name_replaces.<time_t>)
7091
7092     Failures are not very well handled.
7093
7094     """
7095     steps_total = 6
7096
7097     # Step: check device activation
7098     self.lu.LogStep(1, steps_total, "Check device existence")
7099     self._CheckDisksExistence([self.other_node, self.target_node])
7100     self._CheckVolumeGroup([self.target_node, self.other_node])
7101
7102     # Step: check other node consistency
7103     self.lu.LogStep(2, steps_total, "Check peer consistency")
7104     self._CheckDisksConsistency(self.other_node,
7105                                 self.other_node == self.instance.primary_node,
7106                                 False)
7107
7108     # Step: create new storage
7109     self.lu.LogStep(3, steps_total, "Allocate new storage")
7110     iv_names = self._CreateNewStorage(self.target_node)
7111
7112     # Step: for each lv, detach+rename*2+attach
7113     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7114     for dev, old_lvs, new_lvs in iv_names.itervalues():
7115       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7116
7117       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7118                                                      old_lvs)
7119       result.Raise("Can't detach drbd from local storage on node"
7120                    " %s for device %s" % (self.target_node, dev.iv_name))
7121       #dev.children = []
7122       #cfg.Update(instance)
7123
7124       # ok, we created the new LVs, so now we know we have the needed
7125       # storage; as such, we proceed on the target node to rename
7126       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7127       # using the assumption that logical_id == physical_id (which in
7128       # turn is the unique_id on that node)
7129
7130       # FIXME(iustin): use a better name for the replaced LVs
7131       temp_suffix = int(time.time())
7132       ren_fn = lambda d, suff: (d.physical_id[0],
7133                                 d.physical_id[1] + "_replaced-%s" % suff)
7134
7135       # Build the rename list based on what LVs exist on the node
7136       rename_old_to_new = []
7137       for to_ren in old_lvs:
7138         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7139         if not result.fail_msg and result.payload:
7140           # device exists
7141           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7142
7143       self.lu.LogInfo("Renaming the old LVs on the target node")
7144       result = self.rpc.call_blockdev_rename(self.target_node,
7145                                              rename_old_to_new)
7146       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7147
7148       # Now we rename the new LVs to the old LVs
7149       self.lu.LogInfo("Renaming the new LVs on the target node")
7150       rename_new_to_old = [(new, old.physical_id)
7151                            for old, new in zip(old_lvs, new_lvs)]
7152       result = self.rpc.call_blockdev_rename(self.target_node,
7153                                              rename_new_to_old)
7154       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7155
7156       for old, new in zip(old_lvs, new_lvs):
7157         new.logical_id = old.logical_id
7158         self.cfg.SetDiskID(new, self.target_node)
7159
7160       for disk in old_lvs:
7161         disk.logical_id = ren_fn(disk, temp_suffix)
7162         self.cfg.SetDiskID(disk, self.target_node)
7163
7164       # Now that the new lvs have the old name, we can add them to the device
7165       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7166       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7167                                                   new_lvs)
7168       msg = result.fail_msg
7169       if msg:
7170         for new_lv in new_lvs:
7171           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7172                                                new_lv).fail_msg
7173           if msg2:
7174             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7175                                hint=("cleanup manually the unused logical"
7176                                      "volumes"))
7177         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7178
7179       dev.children = new_lvs
7180
7181       self.cfg.Update(self.instance, feedback_fn)
7182
7183     cstep = 5
7184     if self.early_release:
7185       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7186       cstep += 1
7187       self._RemoveOldStorage(self.target_node, iv_names)
7188       # WARNING: we release both node locks here, do not do other RPCs
7189       # than WaitForSync to the primary node
7190       self._ReleaseNodeLock([self.target_node, self.other_node])
7191
7192     # Wait for sync
7193     # This can fail as the old devices are degraded and _WaitForSync
7194     # does a combined result over all disks, so we don't check its return value
7195     self.lu.LogStep(cstep, steps_total, "Sync devices")
7196     cstep += 1
7197     _WaitForSync(self.lu, self.instance)
7198
7199     # Check all devices manually
7200     self._CheckDevices(self.instance.primary_node, iv_names)
7201
7202     # Step: remove old storage
7203     if not self.early_release:
7204       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7205       cstep += 1
7206       self._RemoveOldStorage(self.target_node, iv_names)
7207
7208   def _ExecDrbd8Secondary(self, feedback_fn):
7209     """Replace the secondary node for DRBD 8.
7210
7211     The algorithm for replace is quite complicated:
7212       - for all disks of the instance:
7213         - create new LVs on the new node with same names
7214         - shutdown the drbd device on the old secondary
7215         - disconnect the drbd network on the primary
7216         - create the drbd device on the new secondary
7217         - network attach the drbd on the primary, using an artifice:
7218           the drbd code for Attach() will connect to the network if it
7219           finds a device which is connected to the good local disks but
7220           not network enabled
7221       - wait for sync across all devices
7222       - remove all disks from the old secondary
7223
7224     Failures are not very well handled.
7225
7226     """
7227     steps_total = 6
7228
7229     # Step: check device activation
7230     self.lu.LogStep(1, steps_total, "Check device existence")
7231     self._CheckDisksExistence([self.instance.primary_node])
7232     self._CheckVolumeGroup([self.instance.primary_node])
7233
7234     # Step: check other node consistency
7235     self.lu.LogStep(2, steps_total, "Check peer consistency")
7236     self._CheckDisksConsistency(self.instance.primary_node, True, True)
7237
7238     # Step: create new storage
7239     self.lu.LogStep(3, steps_total, "Allocate new storage")
7240     for idx, dev in enumerate(self.instance.disks):
7241       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7242                       (self.new_node, idx))
7243       # we pass force_create=True to force LVM creation
7244       for new_lv in dev.children:
7245         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7246                         _GetInstanceInfoText(self.instance), False)
7247
7248     # Step 4: dbrd minors and drbd setups changes
7249     # after this, we must manually remove the drbd minors on both the
7250     # error and the success paths
7251     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7252     minors = self.cfg.AllocateDRBDMinor([self.new_node
7253                                          for dev in self.instance.disks],
7254                                         self.instance.name)
7255     logging.debug("Allocated minors %r", minors)
7256
7257     iv_names = {}
7258     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7259       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7260                       (self.new_node, idx))
7261       # create new devices on new_node; note that we create two IDs:
7262       # one without port, so the drbd will be activated without
7263       # networking information on the new node at this stage, and one
7264       # with network, for the latter activation in step 4
7265       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7266       if self.instance.primary_node == o_node1:
7267         p_minor = o_minor1
7268       else:
7269         assert self.instance.primary_node == o_node2, "Three-node instance?"
7270         p_minor = o_minor2
7271
7272       new_alone_id = (self.instance.primary_node, self.new_node, None,
7273                       p_minor, new_minor, o_secret)
7274       new_net_id = (self.instance.primary_node, self.new_node, o_port,
7275                     p_minor, new_minor, o_secret)
7276
7277       iv_names[idx] = (dev, dev.children, new_net_id)
7278       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7279                     new_net_id)
7280       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7281                               logical_id=new_alone_id,
7282                               children=dev.children,
7283                               size=dev.size)
7284       try:
7285         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7286                               _GetInstanceInfoText(self.instance), False)
7287       except errors.GenericError:
7288         self.cfg.ReleaseDRBDMinors(self.instance.name)
7289         raise
7290
7291     # We have new devices, shutdown the drbd on the old secondary
7292     for idx, dev in enumerate(self.instance.disks):
7293       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7294       self.cfg.SetDiskID(dev, self.target_node)
7295       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7296       if msg:
7297         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7298                            "node: %s" % (idx, msg),
7299                            hint=("Please cleanup this device manually as"
7300                                  " soon as possible"))
7301
7302     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7303     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7304                                                self.node_secondary_ip,
7305                                                self.instance.disks)\
7306                                               [self.instance.primary_node]
7307
7308     msg = result.fail_msg
7309     if msg:
7310       # detaches didn't succeed (unlikely)
7311       self.cfg.ReleaseDRBDMinors(self.instance.name)
7312       raise errors.OpExecError("Can't detach the disks from the network on"
7313                                " old node: %s" % (msg,))
7314
7315     # if we managed to detach at least one, we update all the disks of
7316     # the instance to point to the new secondary
7317     self.lu.LogInfo("Updating instance configuration")
7318     for dev, _, new_logical_id in iv_names.itervalues():
7319       dev.logical_id = new_logical_id
7320       self.cfg.SetDiskID(dev, self.instance.primary_node)
7321
7322     self.cfg.Update(self.instance, feedback_fn)
7323
7324     # and now perform the drbd attach
7325     self.lu.LogInfo("Attaching primary drbds to new secondary"
7326                     " (standalone => connected)")
7327     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7328                                             self.new_node],
7329                                            self.node_secondary_ip,
7330                                            self.instance.disks,
7331                                            self.instance.name,
7332                                            False)
7333     for to_node, to_result in result.items():
7334       msg = to_result.fail_msg
7335       if msg:
7336         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7337                            to_node, msg,
7338                            hint=("please do a gnt-instance info to see the"
7339                                  " status of disks"))
7340     cstep = 5
7341     if self.early_release:
7342       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7343       cstep += 1
7344       self._RemoveOldStorage(self.target_node, iv_names)
7345       # WARNING: we release all node locks here, do not do other RPCs
7346       # than WaitForSync to the primary node
7347       self._ReleaseNodeLock([self.instance.primary_node,
7348                              self.target_node,
7349                              self.new_node])
7350
7351     # Wait for sync
7352     # This can fail as the old devices are degraded and _WaitForSync
7353     # does a combined result over all disks, so we don't check its return value
7354     self.lu.LogStep(cstep, steps_total, "Sync devices")
7355     cstep += 1
7356     _WaitForSync(self.lu, self.instance)
7357
7358     # Check all devices manually
7359     self._CheckDevices(self.instance.primary_node, iv_names)
7360
7361     # Step: remove old storage
7362     if not self.early_release:
7363       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7364       self._RemoveOldStorage(self.target_node, iv_names)
7365
7366
7367 class LURepairNodeStorage(NoHooksLU):
7368   """Repairs the volume group on a node.
7369
7370   """
7371   _OP_REQP = ["node_name"]
7372   REQ_BGL = False
7373
7374   def CheckArguments(self):
7375     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7376
7377   def ExpandNames(self):
7378     self.needed_locks = {
7379       locking.LEVEL_NODE: [self.op.node_name],
7380       }
7381
7382   def _CheckFaultyDisks(self, instance, node_name):
7383     """Ensure faulty disks abort the opcode or at least warn."""
7384     try:
7385       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7386                                   node_name, True):
7387         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7388                                    " node '%s'" % (instance.name, node_name),
7389                                    errors.ECODE_STATE)
7390     except errors.OpPrereqError, err:
7391       if self.op.ignore_consistency:
7392         self.proc.LogWarning(str(err.args[0]))
7393       else:
7394         raise
7395
7396   def CheckPrereq(self):
7397     """Check prerequisites.
7398
7399     """
7400     storage_type = self.op.storage_type
7401
7402     if (constants.SO_FIX_CONSISTENCY not in
7403         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7404       raise errors.OpPrereqError("Storage units of type '%s' can not be"
7405                                  " repaired" % storage_type,
7406                                  errors.ECODE_INVAL)
7407
7408     # Check whether any instance on this node has faulty disks
7409     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7410       if not inst.admin_up:
7411         continue
7412       check_nodes = set(inst.all_nodes)
7413       check_nodes.discard(self.op.node_name)
7414       for inst_node_name in check_nodes:
7415         self._CheckFaultyDisks(inst, inst_node_name)
7416
7417   def Exec(self, feedback_fn):
7418     feedback_fn("Repairing storage unit '%s' on %s ..." %
7419                 (self.op.name, self.op.node_name))
7420
7421     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7422     result = self.rpc.call_storage_execute(self.op.node_name,
7423                                            self.op.storage_type, st_args,
7424                                            self.op.name,
7425                                            constants.SO_FIX_CONSISTENCY)
7426     result.Raise("Failed to repair storage unit '%s' on %s" %
7427                  (self.op.name, self.op.node_name))
7428
7429
7430 class LUNodeEvacuationStrategy(NoHooksLU):
7431   """Computes the node evacuation strategy.
7432
7433   """
7434   _OP_REQP = ["nodes"]
7435   REQ_BGL = False
7436
7437   def CheckArguments(self):
7438     if not hasattr(self.op, "remote_node"):
7439       self.op.remote_node = None
7440     if not hasattr(self.op, "iallocator"):
7441       self.op.iallocator = None
7442     if self.op.remote_node is not None and self.op.iallocator is not None:
7443       raise errors.OpPrereqError("Give either the iallocator or the new"
7444                                  " secondary, not both", errors.ECODE_INVAL)
7445
7446   def ExpandNames(self):
7447     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7448     self.needed_locks = locks = {}
7449     if self.op.remote_node is None:
7450       locks[locking.LEVEL_NODE] = locking.ALL_SET
7451     else:
7452       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7453       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7454
7455   def CheckPrereq(self):
7456     pass
7457
7458   def Exec(self, feedback_fn):
7459     if self.op.remote_node is not None:
7460       instances = []
7461       for node in self.op.nodes:
7462         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7463       result = []
7464       for i in instances:
7465         if i.primary_node == self.op.remote_node:
7466           raise errors.OpPrereqError("Node %s is the primary node of"
7467                                      " instance %s, cannot use it as"
7468                                      " secondary" %
7469                                      (self.op.remote_node, i.name),
7470                                      errors.ECODE_INVAL)
7471         result.append([i.name, self.op.remote_node])
7472     else:
7473       ial = IAllocator(self.cfg, self.rpc,
7474                        mode=constants.IALLOCATOR_MODE_MEVAC,
7475                        evac_nodes=self.op.nodes)
7476       ial.Run(self.op.iallocator, validate=True)
7477       if not ial.success:
7478         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7479                                  errors.ECODE_NORES)
7480       result = ial.result
7481     return result
7482
7483
7484 class LUGrowDisk(LogicalUnit):
7485   """Grow a disk of an instance.
7486
7487   """
7488   HPATH = "disk-grow"
7489   HTYPE = constants.HTYPE_INSTANCE
7490   _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7491   REQ_BGL = False
7492
7493   def ExpandNames(self):
7494     self._ExpandAndLockInstance()
7495     self.needed_locks[locking.LEVEL_NODE] = []
7496     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7497
7498   def DeclareLocks(self, level):
7499     if level == locking.LEVEL_NODE:
7500       self._LockInstancesNodes()
7501
7502   def BuildHooksEnv(self):
7503     """Build hooks env.
7504
7505     This runs on the master, the primary and all the secondaries.
7506
7507     """
7508     env = {
7509       "DISK": self.op.disk,
7510       "AMOUNT": self.op.amount,
7511       }
7512     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7513     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7514     return env, nl, nl
7515
7516   def CheckPrereq(self):
7517     """Check prerequisites.
7518
7519     This checks that the instance is in the cluster.
7520
7521     """
7522     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7523     assert instance is not None, \
7524       "Cannot retrieve locked instance %s" % self.op.instance_name
7525     nodenames = list(instance.all_nodes)
7526     for node in nodenames:
7527       _CheckNodeOnline(self, node)
7528
7529
7530     self.instance = instance
7531
7532     if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
7533       raise errors.OpPrereqError("Instance's disk layout does not support"
7534                                  " growing.", errors.ECODE_INVAL)
7535
7536     self.disk = instance.FindDisk(self.op.disk)
7537
7538     _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7539
7540   def Exec(self, feedback_fn):
7541     """Execute disk grow.
7542
7543     """
7544     instance = self.instance
7545     disk = self.disk
7546     for node in instance.all_nodes:
7547       self.cfg.SetDiskID(disk, node)
7548       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7549       result.Raise("Grow request failed to node %s" % node)
7550
7551       # TODO: Rewrite code to work properly
7552       # DRBD goes into sync mode for a short amount of time after executing the
7553       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7554       # calling "resize" in sync mode fails. Sleeping for a short amount of
7555       # time is a work-around.
7556       time.sleep(5)
7557
7558     disk.RecordGrow(self.op.amount)
7559     self.cfg.Update(instance, feedback_fn)
7560     if self.op.wait_for_sync:
7561       disk_abort = not _WaitForSync(self, instance)
7562       if disk_abort:
7563         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7564                              " status.\nPlease check the instance.")
7565
7566
7567 class LUQueryInstanceData(NoHooksLU):
7568   """Query runtime instance data.
7569
7570   """
7571   _OP_REQP = ["instances", "static"]
7572   REQ_BGL = False
7573
7574   def ExpandNames(self):
7575     self.needed_locks = {}
7576     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7577
7578     if not isinstance(self.op.instances, list):
7579       raise errors.OpPrereqError("Invalid argument type 'instances'",
7580                                  errors.ECODE_INVAL)
7581
7582     if self.op.instances:
7583       self.wanted_names = []
7584       for name in self.op.instances:
7585         full_name = _ExpandInstanceName(self.cfg, name)
7586         self.wanted_names.append(full_name)
7587       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7588     else:
7589       self.wanted_names = None
7590       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7591
7592     self.needed_locks[locking.LEVEL_NODE] = []
7593     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7594
7595   def DeclareLocks(self, level):
7596     if level == locking.LEVEL_NODE:
7597       self._LockInstancesNodes()
7598
7599   def CheckPrereq(self):
7600     """Check prerequisites.
7601
7602     This only checks the optional instance list against the existing names.
7603
7604     """
7605     if self.wanted_names is None:
7606       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7607
7608     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7609                              in self.wanted_names]
7610     return
7611
7612   def _ComputeBlockdevStatus(self, node, instance_name, dev):
7613     """Returns the status of a block device
7614
7615     """
7616     if self.op.static or not node:
7617       return None
7618
7619     self.cfg.SetDiskID(dev, node)
7620
7621     result = self.rpc.call_blockdev_find(node, dev)
7622     if result.offline:
7623       return None
7624
7625     result.Raise("Can't compute disk status for %s" % instance_name)
7626
7627     status = result.payload
7628     if status is None:
7629       return None
7630
7631     return (status.dev_path, status.major, status.minor,
7632             status.sync_percent, status.estimated_time,
7633             status.is_degraded, status.ldisk_status)
7634
7635   def _ComputeDiskStatus(self, instance, snode, dev):
7636     """Compute block device status.
7637
7638     """
7639     if dev.dev_type in constants.LDS_DRBD:
7640       # we change the snode then (otherwise we use the one passed in)
7641       if dev.logical_id[0] == instance.primary_node:
7642         snode = dev.logical_id[1]
7643       else:
7644         snode = dev.logical_id[0]
7645
7646     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7647                                               instance.name, dev)
7648     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7649
7650     if dev.children:
7651       dev_children = [self._ComputeDiskStatus(instance, snode, child)
7652                       for child in dev.children]
7653     else:
7654       dev_children = []
7655
7656     data = {
7657       "iv_name": dev.iv_name,
7658       "dev_type": dev.dev_type,
7659       "logical_id": dev.logical_id,
7660       "physical_id": dev.physical_id,
7661       "pstatus": dev_pstatus,
7662       "sstatus": dev_sstatus,
7663       "children": dev_children,
7664       "mode": dev.mode,
7665       "size": dev.size,
7666       }
7667
7668     return data
7669
7670   def Exec(self, feedback_fn):
7671     """Gather and return data"""
7672     result = {}
7673
7674     cluster = self.cfg.GetClusterInfo()
7675
7676     for instance in self.wanted_instances:
7677       if not self.op.static:
7678         remote_info = self.rpc.call_instance_info(instance.primary_node,
7679                                                   instance.name,
7680                                                   instance.hypervisor)
7681         remote_info.Raise("Error checking node %s" % instance.primary_node)
7682         remote_info = remote_info.payload
7683         if remote_info and "state" in remote_info:
7684           remote_state = "up"
7685         else:
7686           remote_state = "down"
7687       else:
7688         remote_state = None
7689       if instance.admin_up:
7690         config_state = "up"
7691       else:
7692         config_state = "down"
7693
7694       disks = [self._ComputeDiskStatus(instance, None, device)
7695                for device in instance.disks]
7696
7697       idict = {
7698         "name": instance.name,
7699         "config_state": config_state,
7700         "run_state": remote_state,
7701         "pnode": instance.primary_node,
7702         "snodes": instance.secondary_nodes,
7703         "os": instance.os,
7704         # this happens to be the same format used for hooks
7705         "nics": _NICListToTuple(self, instance.nics),
7706         "disks": disks,
7707         "hypervisor": instance.hypervisor,
7708         "network_port": instance.network_port,
7709         "hv_instance": instance.hvparams,
7710         "hv_actual": cluster.FillHV(instance, skip_globals=True),
7711         "be_instance": instance.beparams,
7712         "be_actual": cluster.FillBE(instance),
7713         "serial_no": instance.serial_no,
7714         "mtime": instance.mtime,
7715         "ctime": instance.ctime,
7716         "uuid": instance.uuid,
7717         }
7718
7719       result[instance.name] = idict
7720
7721     return result
7722
7723
7724 class LUSetInstanceParams(LogicalUnit):
7725   """Modifies an instances's parameters.
7726
7727   """
7728   HPATH = "instance-modify"
7729   HTYPE = constants.HTYPE_INSTANCE
7730   _OP_REQP = ["instance_name"]
7731   REQ_BGL = False
7732
7733   def CheckArguments(self):
7734     if not hasattr(self.op, 'nics'):
7735       self.op.nics = []
7736     if not hasattr(self.op, 'disks'):
7737       self.op.disks = []
7738     if not hasattr(self.op, 'beparams'):
7739       self.op.beparams = {}
7740     if not hasattr(self.op, 'hvparams'):
7741       self.op.hvparams = {}
7742     if not hasattr(self.op, "disk_template"):
7743       self.op.disk_template = None
7744     if not hasattr(self.op, "remote_node"):
7745       self.op.remote_node = None
7746     self.op.force = getattr(self.op, "force", False)
7747     if not (self.op.nics or self.op.disks or self.op.disk_template or
7748             self.op.hvparams or self.op.beparams):
7749       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7750
7751     if self.op.hvparams:
7752       _CheckGlobalHvParams(self.op.hvparams)
7753
7754     # Disk validation
7755     disk_addremove = 0
7756     for disk_op, disk_dict in self.op.disks:
7757       if disk_op == constants.DDM_REMOVE:
7758         disk_addremove += 1
7759         continue
7760       elif disk_op == constants.DDM_ADD:
7761         disk_addremove += 1
7762       else:
7763         if not isinstance(disk_op, int):
7764           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7765         if not isinstance(disk_dict, dict):
7766           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7767           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7768
7769       if disk_op == constants.DDM_ADD:
7770         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7771         if mode not in constants.DISK_ACCESS_SET:
7772           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7773                                      errors.ECODE_INVAL)
7774         size = disk_dict.get('size', None)
7775         if size is None:
7776           raise errors.OpPrereqError("Required disk parameter size missing",
7777                                      errors.ECODE_INVAL)
7778         try:
7779           size = int(size)
7780         except (TypeError, ValueError), err:
7781           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7782                                      str(err), errors.ECODE_INVAL)
7783         disk_dict['size'] = size
7784       else:
7785         # modification of disk
7786         if 'size' in disk_dict:
7787           raise errors.OpPrereqError("Disk size change not possible, use"
7788                                      " grow-disk", errors.ECODE_INVAL)
7789
7790     if disk_addremove > 1:
7791       raise errors.OpPrereqError("Only one disk add or remove operation"
7792                                  " supported at a time", errors.ECODE_INVAL)
7793
7794     if self.op.disks and self.op.disk_template is not None:
7795       raise errors.OpPrereqError("Disk template conversion and other disk"
7796                                  " changes not supported at the same time",
7797                                  errors.ECODE_INVAL)
7798
7799     if self.op.disk_template:
7800       _CheckDiskTemplate(self.op.disk_template)
7801       if (self.op.disk_template in constants.DTS_NET_MIRROR and
7802           self.op.remote_node is None):
7803         raise errors.OpPrereqError("Changing the disk template to a mirrored"
7804                                    " one requires specifying a secondary node",
7805                                    errors.ECODE_INVAL)
7806
7807     # NIC validation
7808     nic_addremove = 0
7809     for nic_op, nic_dict in self.op.nics:
7810       if nic_op == constants.DDM_REMOVE:
7811         nic_addremove += 1
7812         continue
7813       elif nic_op == constants.DDM_ADD:
7814         nic_addremove += 1
7815       else:
7816         if not isinstance(nic_op, int):
7817           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
7818         if not isinstance(nic_dict, dict):
7819           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7820           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7821
7822       # nic_dict should be a dict
7823       nic_ip = nic_dict.get('ip', None)
7824       if nic_ip is not None:
7825         if nic_ip.lower() == constants.VALUE_NONE:
7826           nic_dict['ip'] = None
7827         else:
7828           if not utils.IsValidIP(nic_ip):
7829             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
7830                                        errors.ECODE_INVAL)
7831
7832       nic_bridge = nic_dict.get('bridge', None)
7833       nic_link = nic_dict.get('link', None)
7834       if nic_bridge and nic_link:
7835         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7836                                    " at the same time", errors.ECODE_INVAL)
7837       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7838         nic_dict['bridge'] = None
7839       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7840         nic_dict['link'] = None
7841
7842       if nic_op == constants.DDM_ADD:
7843         nic_mac = nic_dict.get('mac', None)
7844         if nic_mac is None:
7845           nic_dict['mac'] = constants.VALUE_AUTO
7846
7847       if 'mac' in nic_dict:
7848         nic_mac = nic_dict['mac']
7849         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7850           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
7851
7852         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7853           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7854                                      " modifying an existing nic",
7855                                      errors.ECODE_INVAL)
7856
7857     if nic_addremove > 1:
7858       raise errors.OpPrereqError("Only one NIC add or remove operation"
7859                                  " supported at a time", errors.ECODE_INVAL)
7860
7861   def ExpandNames(self):
7862     self._ExpandAndLockInstance()
7863     self.needed_locks[locking.LEVEL_NODE] = []
7864     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7865
7866   def DeclareLocks(self, level):
7867     if level == locking.LEVEL_NODE:
7868       self._LockInstancesNodes()
7869       if self.op.disk_template and self.op.remote_node:
7870         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7871         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
7872
7873   def BuildHooksEnv(self):
7874     """Build hooks env.
7875
7876     This runs on the master, primary and secondaries.
7877
7878     """
7879     args = dict()
7880     if constants.BE_MEMORY in self.be_new:
7881       args['memory'] = self.be_new[constants.BE_MEMORY]
7882     if constants.BE_VCPUS in self.be_new:
7883       args['vcpus'] = self.be_new[constants.BE_VCPUS]
7884     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7885     # information at all.
7886     if self.op.nics:
7887       args['nics'] = []
7888       nic_override = dict(self.op.nics)
7889       c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7890       for idx, nic in enumerate(self.instance.nics):
7891         if idx in nic_override:
7892           this_nic_override = nic_override[idx]
7893         else:
7894           this_nic_override = {}
7895         if 'ip' in this_nic_override:
7896           ip = this_nic_override['ip']
7897         else:
7898           ip = nic.ip
7899         if 'mac' in this_nic_override:
7900           mac = this_nic_override['mac']
7901         else:
7902           mac = nic.mac
7903         if idx in self.nic_pnew:
7904           nicparams = self.nic_pnew[idx]
7905         else:
7906           nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7907         mode = nicparams[constants.NIC_MODE]
7908         link = nicparams[constants.NIC_LINK]
7909         args['nics'].append((ip, mac, mode, link))
7910       if constants.DDM_ADD in nic_override:
7911         ip = nic_override[constants.DDM_ADD].get('ip', None)
7912         mac = nic_override[constants.DDM_ADD]['mac']
7913         nicparams = self.nic_pnew[constants.DDM_ADD]
7914         mode = nicparams[constants.NIC_MODE]
7915         link = nicparams[constants.NIC_LINK]
7916         args['nics'].append((ip, mac, mode, link))
7917       elif constants.DDM_REMOVE in nic_override:
7918         del args['nics'][-1]
7919
7920     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7921     if self.op.disk_template:
7922       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
7923     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7924     return env, nl, nl
7925
7926   @staticmethod
7927   def _GetUpdatedParams(old_params, update_dict,
7928                         default_values, parameter_types):
7929     """Return the new params dict for the given params.
7930
7931     @type old_params: dict
7932     @param old_params: old parameters
7933     @type update_dict: dict
7934     @param update_dict: dict containing new parameter values,
7935                         or constants.VALUE_DEFAULT to reset the
7936                         parameter to its default value
7937     @type default_values: dict
7938     @param default_values: default values for the filled parameters
7939     @type parameter_types: dict
7940     @param parameter_types: dict mapping target dict keys to types
7941                             in constants.ENFORCEABLE_TYPES
7942     @rtype: (dict, dict)
7943     @return: (new_parameters, filled_parameters)
7944
7945     """
7946     params_copy = copy.deepcopy(old_params)
7947     for key, val in update_dict.iteritems():
7948       if val == constants.VALUE_DEFAULT:
7949         try:
7950           del params_copy[key]
7951         except KeyError:
7952           pass
7953       else:
7954         params_copy[key] = val
7955     utils.ForceDictType(params_copy, parameter_types)
7956     params_filled = objects.FillDict(default_values, params_copy)
7957     return (params_copy, params_filled)
7958
7959   def CheckPrereq(self):
7960     """Check prerequisites.
7961
7962     This only checks the instance list against the existing names.
7963
7964     """
7965     self.force = self.op.force
7966
7967     # checking the new params on the primary/secondary nodes
7968
7969     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7970     cluster = self.cluster = self.cfg.GetClusterInfo()
7971     assert self.instance is not None, \
7972       "Cannot retrieve locked instance %s" % self.op.instance_name
7973     pnode = instance.primary_node
7974     nodelist = list(instance.all_nodes)
7975
7976     if self.op.disk_template:
7977       if instance.disk_template == self.op.disk_template:
7978         raise errors.OpPrereqError("Instance already has disk template %s" %
7979                                    instance.disk_template, errors.ECODE_INVAL)
7980
7981       if (instance.disk_template,
7982           self.op.disk_template) not in self._DISK_CONVERSIONS:
7983         raise errors.OpPrereqError("Unsupported disk template conversion from"
7984                                    " %s to %s" % (instance.disk_template,
7985                                                   self.op.disk_template),
7986                                    errors.ECODE_INVAL)
7987       if self.op.disk_template in constants.DTS_NET_MIRROR:
7988         _CheckNodeOnline(self, self.op.remote_node)
7989         _CheckNodeNotDrained(self, self.op.remote_node)
7990         disks = [{"size": d.size} for d in instance.disks]
7991         required = _ComputeDiskSize(self.op.disk_template, disks)
7992         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
7993         _CheckInstanceDown(self, instance, "cannot change disk template")
7994
7995     # hvparams processing
7996     if self.op.hvparams:
7997       i_hvdict, hv_new = self._GetUpdatedParams(
7998                              instance.hvparams, self.op.hvparams,
7999                              cluster.hvparams[instance.hypervisor],
8000                              constants.HVS_PARAMETER_TYPES)
8001       # local check
8002       hypervisor.GetHypervisor(
8003         instance.hypervisor).CheckParameterSyntax(hv_new)
8004       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8005       self.hv_new = hv_new # the new actual values
8006       self.hv_inst = i_hvdict # the new dict (without defaults)
8007     else:
8008       self.hv_new = self.hv_inst = {}
8009
8010     # beparams processing
8011     if self.op.beparams:
8012       i_bedict, be_new = self._GetUpdatedParams(
8013                              instance.beparams, self.op.beparams,
8014                              cluster.beparams[constants.PP_DEFAULT],
8015                              constants.BES_PARAMETER_TYPES)
8016       self.be_new = be_new # the new actual values
8017       self.be_inst = i_bedict # the new dict (without defaults)
8018     else:
8019       self.be_new = self.be_inst = {}
8020
8021     self.warn = []
8022
8023     if constants.BE_MEMORY in self.op.beparams and not self.force:
8024       mem_check_list = [pnode]
8025       if be_new[constants.BE_AUTO_BALANCE]:
8026         # either we changed auto_balance to yes or it was from before
8027         mem_check_list.extend(instance.secondary_nodes)
8028       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8029                                                   instance.hypervisor)
8030       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8031                                          instance.hypervisor)
8032       pninfo = nodeinfo[pnode]
8033       msg = pninfo.fail_msg
8034       if msg:
8035         # Assume the primary node is unreachable and go ahead
8036         self.warn.append("Can't get info from primary node %s: %s" %
8037                          (pnode,  msg))
8038       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8039         self.warn.append("Node data from primary node %s doesn't contain"
8040                          " free memory information" % pnode)
8041       elif instance_info.fail_msg:
8042         self.warn.append("Can't get instance runtime information: %s" %
8043                         instance_info.fail_msg)
8044       else:
8045         if instance_info.payload:
8046           current_mem = int(instance_info.payload['memory'])
8047         else:
8048           # Assume instance not running
8049           # (there is a slight race condition here, but it's not very probable,
8050           # and we have no other way to check)
8051           current_mem = 0
8052         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8053                     pninfo.payload['memory_free'])
8054         if miss_mem > 0:
8055           raise errors.OpPrereqError("This change will prevent the instance"
8056                                      " from starting, due to %d MB of memory"
8057                                      " missing on its primary node" % miss_mem,
8058                                      errors.ECODE_NORES)
8059
8060       if be_new[constants.BE_AUTO_BALANCE]:
8061         for node, nres in nodeinfo.items():
8062           if node not in instance.secondary_nodes:
8063             continue
8064           msg = nres.fail_msg
8065           if msg:
8066             self.warn.append("Can't get info from secondary node %s: %s" %
8067                              (node, msg))
8068           elif not isinstance(nres.payload.get('memory_free', None), int):
8069             self.warn.append("Secondary node %s didn't return free"
8070                              " memory information" % node)
8071           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8072             self.warn.append("Not enough memory to failover instance to"
8073                              " secondary node %s" % node)
8074
8075     # NIC processing
8076     self.nic_pnew = {}
8077     self.nic_pinst = {}
8078     for nic_op, nic_dict in self.op.nics:
8079       if nic_op == constants.DDM_REMOVE:
8080         if not instance.nics:
8081           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8082                                      errors.ECODE_INVAL)
8083         continue
8084       if nic_op != constants.DDM_ADD:
8085         # an existing nic
8086         if not instance.nics:
8087           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8088                                      " no NICs" % nic_op,
8089                                      errors.ECODE_INVAL)
8090         if nic_op < 0 or nic_op >= len(instance.nics):
8091           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8092                                      " are 0 to %d" %
8093                                      (nic_op, len(instance.nics) - 1),
8094                                      errors.ECODE_INVAL)
8095         old_nic_params = instance.nics[nic_op].nicparams
8096         old_nic_ip = instance.nics[nic_op].ip
8097       else:
8098         old_nic_params = {}
8099         old_nic_ip = None
8100
8101       update_params_dict = dict([(key, nic_dict[key])
8102                                  for key in constants.NICS_PARAMETERS
8103                                  if key in nic_dict])
8104
8105       if 'bridge' in nic_dict:
8106         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8107
8108       new_nic_params, new_filled_nic_params = \
8109           self._GetUpdatedParams(old_nic_params, update_params_dict,
8110                                  cluster.nicparams[constants.PP_DEFAULT],
8111                                  constants.NICS_PARAMETER_TYPES)
8112       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8113       self.nic_pinst[nic_op] = new_nic_params
8114       self.nic_pnew[nic_op] = new_filled_nic_params
8115       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8116
8117       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8118         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8119         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8120         if msg:
8121           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8122           if self.force:
8123             self.warn.append(msg)
8124           else:
8125             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8126       if new_nic_mode == constants.NIC_MODE_ROUTED:
8127         if 'ip' in nic_dict:
8128           nic_ip = nic_dict['ip']
8129         else:
8130           nic_ip = old_nic_ip
8131         if nic_ip is None:
8132           raise errors.OpPrereqError('Cannot set the nic ip to None'
8133                                      ' on a routed nic', errors.ECODE_INVAL)
8134       if 'mac' in nic_dict:
8135         nic_mac = nic_dict['mac']
8136         if nic_mac is None:
8137           raise errors.OpPrereqError('Cannot set the nic mac to None',
8138                                      errors.ECODE_INVAL)
8139         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8140           # otherwise generate the mac
8141           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8142         else:
8143           # or validate/reserve the current one
8144           try:
8145             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8146           except errors.ReservationError:
8147             raise errors.OpPrereqError("MAC address %s already in use"
8148                                        " in cluster" % nic_mac,
8149                                        errors.ECODE_NOTUNIQUE)
8150
8151     # DISK processing
8152     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8153       raise errors.OpPrereqError("Disk operations not supported for"
8154                                  " diskless instances",
8155                                  errors.ECODE_INVAL)
8156     for disk_op, _ in self.op.disks:
8157       if disk_op == constants.DDM_REMOVE:
8158         if len(instance.disks) == 1:
8159           raise errors.OpPrereqError("Cannot remove the last disk of"
8160                                      " an instance", errors.ECODE_INVAL)
8161         _CheckInstanceDown(self, instance, "cannot remove disks")
8162
8163       if (disk_op == constants.DDM_ADD and
8164           len(instance.nics) >= constants.MAX_DISKS):
8165         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8166                                    " add more" % constants.MAX_DISKS,
8167                                    errors.ECODE_STATE)
8168       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8169         # an existing disk
8170         if disk_op < 0 or disk_op >= len(instance.disks):
8171           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8172                                      " are 0 to %d" %
8173                                      (disk_op, len(instance.disks)),
8174                                      errors.ECODE_INVAL)
8175
8176     return
8177
8178   def _ConvertPlainToDrbd(self, feedback_fn):
8179     """Converts an instance from plain to drbd.
8180
8181     """
8182     feedback_fn("Converting template to drbd")
8183     instance = self.instance
8184     pnode = instance.primary_node
8185     snode = self.op.remote_node
8186
8187     # create a fake disk info for _GenerateDiskTemplate
8188     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8189     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8190                                       instance.name, pnode, [snode],
8191                                       disk_info, None, None, 0)
8192     info = _GetInstanceInfoText(instance)
8193     feedback_fn("Creating aditional volumes...")
8194     # first, create the missing data and meta devices
8195     for disk in new_disks:
8196       # unfortunately this is... not too nice
8197       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8198                             info, True)
8199       for child in disk.children:
8200         _CreateSingleBlockDev(self, snode, instance, child, info, True)
8201     # at this stage, all new LVs have been created, we can rename the
8202     # old ones
8203     feedback_fn("Renaming original volumes...")
8204     rename_list = [(o, n.children[0].logical_id)
8205                    for (o, n) in zip(instance.disks, new_disks)]
8206     result = self.rpc.call_blockdev_rename(pnode, rename_list)
8207     result.Raise("Failed to rename original LVs")
8208
8209     feedback_fn("Initializing DRBD devices...")
8210     # all child devices are in place, we can now create the DRBD devices
8211     for disk in new_disks:
8212       for node in [pnode, snode]:
8213         f_create = node == pnode
8214         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8215
8216     # at this point, the instance has been modified
8217     instance.disk_template = constants.DT_DRBD8
8218     instance.disks = new_disks
8219     self.cfg.Update(instance, feedback_fn)
8220
8221     # disks are created, waiting for sync
8222     disk_abort = not _WaitForSync(self, instance)
8223     if disk_abort:
8224       raise errors.OpExecError("There are some degraded disks for"
8225                                " this instance, please cleanup manually")
8226
8227   def _ConvertDrbdToPlain(self, feedback_fn):
8228     """Converts an instance from drbd to plain.
8229
8230     """
8231     instance = self.instance
8232     assert len(instance.secondary_nodes) == 1
8233     pnode = instance.primary_node
8234     snode = instance.secondary_nodes[0]
8235     feedback_fn("Converting template to plain")
8236
8237     old_disks = instance.disks
8238     new_disks = [d.children[0] for d in old_disks]
8239
8240     # copy over size and mode
8241     for parent, child in zip(old_disks, new_disks):
8242       child.size = parent.size
8243       child.mode = parent.mode
8244
8245     # update instance structure
8246     instance.disks = new_disks
8247     instance.disk_template = constants.DT_PLAIN
8248     self.cfg.Update(instance, feedback_fn)
8249
8250     feedback_fn("Removing volumes on the secondary node...")
8251     for disk in old_disks:
8252       self.cfg.SetDiskID(disk, snode)
8253       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8254       if msg:
8255         self.LogWarning("Could not remove block device %s on node %s,"
8256                         " continuing anyway: %s", disk.iv_name, snode, msg)
8257
8258     feedback_fn("Removing unneeded volumes on the primary node...")
8259     for idx, disk in enumerate(old_disks):
8260       meta = disk.children[1]
8261       self.cfg.SetDiskID(meta, pnode)
8262       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8263       if msg:
8264         self.LogWarning("Could not remove metadata for disk %d on node %s,"
8265                         " continuing anyway: %s", idx, pnode, msg)
8266
8267
8268   def Exec(self, feedback_fn):
8269     """Modifies an instance.
8270
8271     All parameters take effect only at the next restart of the instance.
8272
8273     """
8274     # Process here the warnings from CheckPrereq, as we don't have a
8275     # feedback_fn there.
8276     for warn in self.warn:
8277       feedback_fn("WARNING: %s" % warn)
8278
8279     result = []
8280     instance = self.instance
8281     # disk changes
8282     for disk_op, disk_dict in self.op.disks:
8283       if disk_op == constants.DDM_REMOVE:
8284         # remove the last disk
8285         device = instance.disks.pop()
8286         device_idx = len(instance.disks)
8287         for node, disk in device.ComputeNodeTree(instance.primary_node):
8288           self.cfg.SetDiskID(disk, node)
8289           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8290           if msg:
8291             self.LogWarning("Could not remove disk/%d on node %s: %s,"
8292                             " continuing anyway", device_idx, node, msg)
8293         result.append(("disk/%d" % device_idx, "remove"))
8294       elif disk_op == constants.DDM_ADD:
8295         # add a new disk
8296         if instance.disk_template == constants.DT_FILE:
8297           file_driver, file_path = instance.disks[0].logical_id
8298           file_path = os.path.dirname(file_path)
8299         else:
8300           file_driver = file_path = None
8301         disk_idx_base = len(instance.disks)
8302         new_disk = _GenerateDiskTemplate(self,
8303                                          instance.disk_template,
8304                                          instance.name, instance.primary_node,
8305                                          instance.secondary_nodes,
8306                                          [disk_dict],
8307                                          file_path,
8308                                          file_driver,
8309                                          disk_idx_base)[0]
8310         instance.disks.append(new_disk)
8311         info = _GetInstanceInfoText(instance)
8312
8313         logging.info("Creating volume %s for instance %s",
8314                      new_disk.iv_name, instance.name)
8315         # Note: this needs to be kept in sync with _CreateDisks
8316         #HARDCODE
8317         for node in instance.all_nodes:
8318           f_create = node == instance.primary_node
8319           try:
8320             _CreateBlockDev(self, node, instance, new_disk,
8321                             f_create, info, f_create)
8322           except errors.OpExecError, err:
8323             self.LogWarning("Failed to create volume %s (%s) on"
8324                             " node %s: %s",
8325                             new_disk.iv_name, new_disk, node, err)
8326         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8327                        (new_disk.size, new_disk.mode)))
8328       else:
8329         # change a given disk
8330         instance.disks[disk_op].mode = disk_dict['mode']
8331         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8332
8333     if self.op.disk_template:
8334       r_shut = _ShutdownInstanceDisks(self, instance)
8335       if not r_shut:
8336         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8337                                  " proceed with disk template conversion")
8338       mode = (instance.disk_template, self.op.disk_template)
8339       try:
8340         self._DISK_CONVERSIONS[mode](self, feedback_fn)
8341       except:
8342         self.cfg.ReleaseDRBDMinors(instance.name)
8343         raise
8344       result.append(("disk_template", self.op.disk_template))
8345
8346     # NIC changes
8347     for nic_op, nic_dict in self.op.nics:
8348       if nic_op == constants.DDM_REMOVE:
8349         # remove the last nic
8350         del instance.nics[-1]
8351         result.append(("nic.%d" % len(instance.nics), "remove"))
8352       elif nic_op == constants.DDM_ADD:
8353         # mac and bridge should be set, by now
8354         mac = nic_dict['mac']
8355         ip = nic_dict.get('ip', None)
8356         nicparams = self.nic_pinst[constants.DDM_ADD]
8357         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8358         instance.nics.append(new_nic)
8359         result.append(("nic.%d" % (len(instance.nics) - 1),
8360                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
8361                        (new_nic.mac, new_nic.ip,
8362                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8363                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8364                        )))
8365       else:
8366         for key in 'mac', 'ip':
8367           if key in nic_dict:
8368             setattr(instance.nics[nic_op], key, nic_dict[key])
8369         if nic_op in self.nic_pinst:
8370           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8371         for key, val in nic_dict.iteritems():
8372           result.append(("nic.%s/%d" % (key, nic_op), val))
8373
8374     # hvparams changes
8375     if self.op.hvparams:
8376       instance.hvparams = self.hv_inst
8377       for key, val in self.op.hvparams.iteritems():
8378         result.append(("hv/%s" % key, val))
8379
8380     # beparams changes
8381     if self.op.beparams:
8382       instance.beparams = self.be_inst
8383       for key, val in self.op.beparams.iteritems():
8384         result.append(("be/%s" % key, val))
8385
8386     self.cfg.Update(instance, feedback_fn)
8387
8388     return result
8389
8390   _DISK_CONVERSIONS = {
8391     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8392     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8393     }
8394
8395 class LUQueryExports(NoHooksLU):
8396   """Query the exports list
8397
8398   """
8399   _OP_REQP = ['nodes']
8400   REQ_BGL = False
8401
8402   def ExpandNames(self):
8403     self.needed_locks = {}
8404     self.share_locks[locking.LEVEL_NODE] = 1
8405     if not self.op.nodes:
8406       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8407     else:
8408       self.needed_locks[locking.LEVEL_NODE] = \
8409         _GetWantedNodes(self, self.op.nodes)
8410
8411   def CheckPrereq(self):
8412     """Check prerequisites.
8413
8414     """
8415     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8416
8417   def Exec(self, feedback_fn):
8418     """Compute the list of all the exported system images.
8419
8420     @rtype: dict
8421     @return: a dictionary with the structure node->(export-list)
8422         where export-list is a list of the instances exported on
8423         that node.
8424
8425     """
8426     rpcresult = self.rpc.call_export_list(self.nodes)
8427     result = {}
8428     for node in rpcresult:
8429       if rpcresult[node].fail_msg:
8430         result[node] = False
8431       else:
8432         result[node] = rpcresult[node].payload
8433
8434     return result
8435
8436
8437 class LUExportInstance(LogicalUnit):
8438   """Export an instance to an image in the cluster.
8439
8440   """
8441   HPATH = "instance-export"
8442   HTYPE = constants.HTYPE_INSTANCE
8443   _OP_REQP = ["instance_name", "target_node", "shutdown"]
8444   REQ_BGL = False
8445
8446   def CheckArguments(self):
8447     """Check the arguments.
8448
8449     """
8450     _CheckBooleanOpField(self.op, "remove_instance")
8451     _CheckBooleanOpField(self.op, "ignore_remove_failures")
8452
8453     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8454                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
8455     self.remove_instance = getattr(self.op, "remove_instance", False)
8456     self.ignore_remove_failures = getattr(self.op, "ignore_remove_failures",
8457                                           False)
8458
8459     if self.remove_instance and not self.op.shutdown:
8460       raise errors.OpPrereqError("Can not remove instance without shutting it"
8461                                  " down before")
8462
8463   def ExpandNames(self):
8464     self._ExpandAndLockInstance()
8465
8466     # FIXME: lock only instance primary and destination node
8467     #
8468     # Sad but true, for now we have do lock all nodes, as we don't know where
8469     # the previous export might be, and and in this LU we search for it and
8470     # remove it from its current node. In the future we could fix this by:
8471     #  - making a tasklet to search (share-lock all), then create the new one,
8472     #    then one to remove, after
8473     #  - removing the removal operation altogether
8474     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8475
8476   def DeclareLocks(self, level):
8477     """Last minute lock declaration."""
8478     # All nodes are locked anyway, so nothing to do here.
8479
8480   def BuildHooksEnv(self):
8481     """Build hooks env.
8482
8483     This will run on the master, primary node and target node.
8484
8485     """
8486     env = {
8487       "EXPORT_NODE": self.op.target_node,
8488       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8489       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8490       # TODO: Generic function for boolean env variables
8491       "REMOVE_INSTANCE": str(bool(self.remove_instance)),
8492       }
8493     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8494     nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8495           self.op.target_node]
8496     return env, nl, nl
8497
8498   def CheckPrereq(self):
8499     """Check prerequisites.
8500
8501     This checks that the instance and node names are valid.
8502
8503     """
8504     instance_name = self.op.instance_name
8505     self.instance = self.cfg.GetInstanceInfo(instance_name)
8506     assert self.instance is not None, \
8507           "Cannot retrieve locked instance %s" % self.op.instance_name
8508     _CheckNodeOnline(self, self.instance.primary_node)
8509
8510     self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8511     self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8512     assert self.dst_node is not None
8513
8514     _CheckNodeOnline(self, self.dst_node.name)
8515     _CheckNodeNotDrained(self, self.dst_node.name)
8516
8517     # instance disk type verification
8518     # TODO: Implement export support for file-based disks
8519     for disk in self.instance.disks:
8520       if disk.dev_type == constants.LD_FILE:
8521         raise errors.OpPrereqError("Export not supported for instances with"
8522                                    " file-based disks", errors.ECODE_INVAL)
8523
8524   def Exec(self, feedback_fn):
8525     """Export an instance to an image in the cluster.
8526
8527     """
8528     instance = self.instance
8529     dst_node = self.dst_node
8530     src_node = instance.primary_node
8531
8532     if self.op.shutdown:
8533       # shutdown the instance, but not the disks
8534       feedback_fn("Shutting down instance %s" % instance.name)
8535       result = self.rpc.call_instance_shutdown(src_node, instance,
8536                                                self.shutdown_timeout)
8537       # TODO: Maybe ignore failures if ignore_remove_failures is set
8538       result.Raise("Could not shutdown instance %s on"
8539                    " node %s" % (instance.name, src_node))
8540
8541     vgname = self.cfg.GetVGName()
8542
8543     snap_disks = []
8544
8545     # set the disks ID correctly since call_instance_start needs the
8546     # correct drbd minor to create the symlinks
8547     for disk in instance.disks:
8548       self.cfg.SetDiskID(disk, src_node)
8549
8550     activate_disks = (not instance.admin_up)
8551
8552     if activate_disks:
8553       # Activate the instance disks if we'exporting a stopped instance
8554       feedback_fn("Activating disks for %s" % instance.name)
8555       _StartInstanceDisks(self, instance, None)
8556
8557     try:
8558       # per-disk results
8559       dresults = []
8560       try:
8561         for idx, disk in enumerate(instance.disks):
8562           feedback_fn("Creating a snapshot of disk/%s on node %s" %
8563                       (idx, src_node))
8564
8565           # result.payload will be a snapshot of an lvm leaf of the one we
8566           # passed
8567           result = self.rpc.call_blockdev_snapshot(src_node, disk)
8568           msg = result.fail_msg
8569           if msg:
8570             self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8571                             idx, src_node, msg)
8572             snap_disks.append(False)
8573           else:
8574             disk_id = (vgname, result.payload)
8575             new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8576                                    logical_id=disk_id, physical_id=disk_id,
8577                                    iv_name=disk.iv_name)
8578             snap_disks.append(new_dev)
8579
8580       finally:
8581         if self.op.shutdown and instance.admin_up and not self.remove_instance:
8582           feedback_fn("Starting instance %s" % instance.name)
8583           result = self.rpc.call_instance_start(src_node, instance, None, None)
8584           msg = result.fail_msg
8585           if msg:
8586             _ShutdownInstanceDisks(self, instance)
8587             raise errors.OpExecError("Could not start instance: %s" % msg)
8588
8589       # TODO: check for size
8590
8591       cluster_name = self.cfg.GetClusterName()
8592       for idx, dev in enumerate(snap_disks):
8593         feedback_fn("Exporting snapshot %s from %s to %s" %
8594                     (idx, src_node, dst_node.name))
8595         if dev:
8596           # FIXME: pass debug from opcode to backend
8597           result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8598                                                  instance, cluster_name,
8599                                                  idx, self.op.debug_level)
8600           msg = result.fail_msg
8601           if msg:
8602             self.LogWarning("Could not export disk/%s from node %s to"
8603                             " node %s: %s", idx, src_node, dst_node.name, msg)
8604             dresults.append(False)
8605           else:
8606             dresults.append(True)
8607           msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8608           if msg:
8609             self.LogWarning("Could not remove snapshot for disk/%d from node"
8610                             " %s: %s", idx, src_node, msg)
8611         else:
8612           dresults.append(False)
8613
8614       feedback_fn("Finalizing export on %s" % dst_node.name)
8615       result = self.rpc.call_finalize_export(dst_node.name, instance,
8616                                              snap_disks)
8617       fin_resu = True
8618       msg = result.fail_msg
8619       if msg:
8620         self.LogWarning("Could not finalize export for instance %s"
8621                         " on node %s: %s", instance.name, dst_node.name, msg)
8622         fin_resu = False
8623
8624     finally:
8625       if activate_disks:
8626         feedback_fn("Deactivating disks for %s" % instance.name)
8627         _ShutdownInstanceDisks(self, instance)
8628
8629     # Remove instance if requested
8630     if self.remove_instance:
8631       feedback_fn("Removing instance %s" % instance.name)
8632       _RemoveInstance(self, feedback_fn, instance, self.ignore_remove_failures)
8633
8634     nodelist = self.cfg.GetNodeList()
8635     nodelist.remove(dst_node.name)
8636
8637     # on one-node clusters nodelist will be empty after the removal
8638     # if we proceed the backup would be removed because OpQueryExports
8639     # substitutes an empty list with the full cluster node list.
8640     iname = instance.name
8641     if nodelist:
8642       feedback_fn("Removing old exports for instance %s" % iname)
8643       exportlist = self.rpc.call_export_list(nodelist)
8644       for node in exportlist:
8645         if exportlist[node].fail_msg:
8646           continue
8647         if iname in exportlist[node].payload:
8648           msg = self.rpc.call_export_remove(node, iname).fail_msg
8649           if msg:
8650             self.LogWarning("Could not remove older export for instance %s"
8651                             " on node %s: %s", iname, node, msg)
8652
8653     return fin_resu, dresults
8654
8655
8656 class LURemoveExport(NoHooksLU):
8657   """Remove exports related to the named instance.
8658
8659   """
8660   _OP_REQP = ["instance_name"]
8661   REQ_BGL = False
8662
8663   def ExpandNames(self):
8664     self.needed_locks = {}
8665     # We need all nodes to be locked in order for RemoveExport to work, but we
8666     # don't need to lock the instance itself, as nothing will happen to it (and
8667     # we can remove exports also for a removed instance)
8668     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8669
8670   def CheckPrereq(self):
8671     """Check prerequisites.
8672     """
8673     pass
8674
8675   def Exec(self, feedback_fn):
8676     """Remove any export.
8677
8678     """
8679     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8680     # If the instance was not found we'll try with the name that was passed in.
8681     # This will only work if it was an FQDN, though.
8682     fqdn_warn = False
8683     if not instance_name:
8684       fqdn_warn = True
8685       instance_name = self.op.instance_name
8686
8687     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8688     exportlist = self.rpc.call_export_list(locked_nodes)
8689     found = False
8690     for node in exportlist:
8691       msg = exportlist[node].fail_msg
8692       if msg:
8693         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8694         continue
8695       if instance_name in exportlist[node].payload:
8696         found = True
8697         result = self.rpc.call_export_remove(node, instance_name)
8698         msg = result.fail_msg
8699         if msg:
8700           logging.error("Could not remove export for instance %s"
8701                         " on node %s: %s", instance_name, node, msg)
8702
8703     if fqdn_warn and not found:
8704       feedback_fn("Export not found. If trying to remove an export belonging"
8705                   " to a deleted instance please use its Fully Qualified"
8706                   " Domain Name.")
8707
8708
8709 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8710   """Generic tags LU.
8711
8712   This is an abstract class which is the parent of all the other tags LUs.
8713
8714   """
8715
8716   def ExpandNames(self):
8717     self.needed_locks = {}
8718     if self.op.kind == constants.TAG_NODE:
8719       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
8720       self.needed_locks[locking.LEVEL_NODE] = self.op.name
8721     elif self.op.kind == constants.TAG_INSTANCE:
8722       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
8723       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
8724
8725   def CheckPrereq(self):
8726     """Check prerequisites.
8727
8728     """
8729     if self.op.kind == constants.TAG_CLUSTER:
8730       self.target = self.cfg.GetClusterInfo()
8731     elif self.op.kind == constants.TAG_NODE:
8732       self.target = self.cfg.GetNodeInfo(self.op.name)
8733     elif self.op.kind == constants.TAG_INSTANCE:
8734       self.target = self.cfg.GetInstanceInfo(self.op.name)
8735     else:
8736       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8737                                  str(self.op.kind), errors.ECODE_INVAL)
8738
8739
8740 class LUGetTags(TagsLU):
8741   """Returns the tags of a given object.
8742
8743   """
8744   _OP_REQP = ["kind", "name"]
8745   REQ_BGL = False
8746
8747   def Exec(self, feedback_fn):
8748     """Returns the tag list.
8749
8750     """
8751     return list(self.target.GetTags())
8752
8753
8754 class LUSearchTags(NoHooksLU):
8755   """Searches the tags for a given pattern.
8756
8757   """
8758   _OP_REQP = ["pattern"]
8759   REQ_BGL = False
8760
8761   def ExpandNames(self):
8762     self.needed_locks = {}
8763
8764   def CheckPrereq(self):
8765     """Check prerequisites.
8766
8767     This checks the pattern passed for validity by compiling it.
8768
8769     """
8770     try:
8771       self.re = re.compile(self.op.pattern)
8772     except re.error, err:
8773       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8774                                  (self.op.pattern, err), errors.ECODE_INVAL)
8775
8776   def Exec(self, feedback_fn):
8777     """Returns the tag list.
8778
8779     """
8780     cfg = self.cfg
8781     tgts = [("/cluster", cfg.GetClusterInfo())]
8782     ilist = cfg.GetAllInstancesInfo().values()
8783     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8784     nlist = cfg.GetAllNodesInfo().values()
8785     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8786     results = []
8787     for path, target in tgts:
8788       for tag in target.GetTags():
8789         if self.re.search(tag):
8790           results.append((path, tag))
8791     return results
8792
8793
8794 class LUAddTags(TagsLU):
8795   """Sets a tag on a given object.
8796
8797   """
8798   _OP_REQP = ["kind", "name", "tags"]
8799   REQ_BGL = False
8800
8801   def CheckPrereq(self):
8802     """Check prerequisites.
8803
8804     This checks the type and length of the tag name and value.
8805
8806     """
8807     TagsLU.CheckPrereq(self)
8808     for tag in self.op.tags:
8809       objects.TaggableObject.ValidateTag(tag)
8810
8811   def Exec(self, feedback_fn):
8812     """Sets the tag.
8813
8814     """
8815     try:
8816       for tag in self.op.tags:
8817         self.target.AddTag(tag)
8818     except errors.TagError, err:
8819       raise errors.OpExecError("Error while setting tag: %s" % str(err))
8820     self.cfg.Update(self.target, feedback_fn)
8821
8822
8823 class LUDelTags(TagsLU):
8824   """Delete a list of tags from a given object.
8825
8826   """
8827   _OP_REQP = ["kind", "name", "tags"]
8828   REQ_BGL = False
8829
8830   def CheckPrereq(self):
8831     """Check prerequisites.
8832
8833     This checks that we have the given tag.
8834
8835     """
8836     TagsLU.CheckPrereq(self)
8837     for tag in self.op.tags:
8838       objects.TaggableObject.ValidateTag(tag)
8839     del_tags = frozenset(self.op.tags)
8840     cur_tags = self.target.GetTags()
8841     if not del_tags <= cur_tags:
8842       diff_tags = del_tags - cur_tags
8843       diff_names = ["'%s'" % tag for tag in diff_tags]
8844       diff_names.sort()
8845       raise errors.OpPrereqError("Tag(s) %s not found" %
8846                                  (",".join(diff_names)), errors.ECODE_NOENT)
8847
8848   def Exec(self, feedback_fn):
8849     """Remove the tag from the object.
8850
8851     """
8852     for tag in self.op.tags:
8853       self.target.RemoveTag(tag)
8854     self.cfg.Update(self.target, feedback_fn)
8855
8856
8857 class LUTestDelay(NoHooksLU):
8858   """Sleep for a specified amount of time.
8859
8860   This LU sleeps on the master and/or nodes for a specified amount of
8861   time.
8862
8863   """
8864   _OP_REQP = ["duration", "on_master", "on_nodes"]
8865   REQ_BGL = False
8866
8867   def ExpandNames(self):
8868     """Expand names and set required locks.
8869
8870     This expands the node list, if any.
8871
8872     """
8873     self.needed_locks = {}
8874     if self.op.on_nodes:
8875       # _GetWantedNodes can be used here, but is not always appropriate to use
8876       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8877       # more information.
8878       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8879       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8880
8881   def CheckPrereq(self):
8882     """Check prerequisites.
8883
8884     """
8885
8886   def Exec(self, feedback_fn):
8887     """Do the actual sleep.
8888
8889     """
8890     if self.op.on_master:
8891       if not utils.TestDelay(self.op.duration):
8892         raise errors.OpExecError("Error during master delay test")
8893     if self.op.on_nodes:
8894       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8895       for node, node_result in result.items():
8896         node_result.Raise("Failure during rpc call to node %s" % node)
8897
8898
8899 class IAllocator(object):
8900   """IAllocator framework.
8901
8902   An IAllocator instance has three sets of attributes:
8903     - cfg that is needed to query the cluster
8904     - input data (all members of the _KEYS class attribute are required)
8905     - four buffer attributes (in|out_data|text), that represent the
8906       input (to the external script) in text and data structure format,
8907       and the output from it, again in two formats
8908     - the result variables from the script (success, info, nodes) for
8909       easy usage
8910
8911   """
8912   # pylint: disable-msg=R0902
8913   # lots of instance attributes
8914   _ALLO_KEYS = [
8915     "name", "mem_size", "disks", "disk_template",
8916     "os", "tags", "nics", "vcpus", "hypervisor",
8917     ]
8918   _RELO_KEYS = [
8919     "name", "relocate_from",
8920     ]
8921   _EVAC_KEYS = [
8922     "evac_nodes",
8923     ]
8924
8925   def __init__(self, cfg, rpc, mode, **kwargs):
8926     self.cfg = cfg
8927     self.rpc = rpc
8928     # init buffer variables
8929     self.in_text = self.out_text = self.in_data = self.out_data = None
8930     # init all input fields so that pylint is happy
8931     self.mode = mode
8932     self.mem_size = self.disks = self.disk_template = None
8933     self.os = self.tags = self.nics = self.vcpus = None
8934     self.hypervisor = None
8935     self.relocate_from = None
8936     self.name = None
8937     self.evac_nodes = None
8938     # computed fields
8939     self.required_nodes = None
8940     # init result fields
8941     self.success = self.info = self.result = None
8942     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8943       keyset = self._ALLO_KEYS
8944       fn = self._AddNewInstance
8945     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8946       keyset = self._RELO_KEYS
8947       fn = self._AddRelocateInstance
8948     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8949       keyset = self._EVAC_KEYS
8950       fn = self._AddEvacuateNodes
8951     else:
8952       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8953                                    " IAllocator" % self.mode)
8954     for key in kwargs:
8955       if key not in keyset:
8956         raise errors.ProgrammerError("Invalid input parameter '%s' to"
8957                                      " IAllocator" % key)
8958       setattr(self, key, kwargs[key])
8959
8960     for key in keyset:
8961       if key not in kwargs:
8962         raise errors.ProgrammerError("Missing input parameter '%s' to"
8963                                      " IAllocator" % key)
8964     self._BuildInputData(fn)
8965
8966   def _ComputeClusterData(self):
8967     """Compute the generic allocator input data.
8968
8969     This is the data that is independent of the actual operation.
8970
8971     """
8972     cfg = self.cfg
8973     cluster_info = cfg.GetClusterInfo()
8974     # cluster data
8975     data = {
8976       "version": constants.IALLOCATOR_VERSION,
8977       "cluster_name": cfg.GetClusterName(),
8978       "cluster_tags": list(cluster_info.GetTags()),
8979       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8980       # we don't have job IDs
8981       }
8982     iinfo = cfg.GetAllInstancesInfo().values()
8983     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8984
8985     # node data
8986     node_results = {}
8987     node_list = cfg.GetNodeList()
8988
8989     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8990       hypervisor_name = self.hypervisor
8991     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8992       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8993     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8994       hypervisor_name = cluster_info.enabled_hypervisors[0]
8995
8996     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8997                                         hypervisor_name)
8998     node_iinfo = \
8999       self.rpc.call_all_instances_info(node_list,
9000                                        cluster_info.enabled_hypervisors)
9001     for nname, nresult in node_data.items():
9002       # first fill in static (config-based) values
9003       ninfo = cfg.GetNodeInfo(nname)
9004       pnr = {
9005         "tags": list(ninfo.GetTags()),
9006         "primary_ip": ninfo.primary_ip,
9007         "secondary_ip": ninfo.secondary_ip,
9008         "offline": ninfo.offline,
9009         "drained": ninfo.drained,
9010         "master_candidate": ninfo.master_candidate,
9011         }
9012
9013       if not (ninfo.offline or ninfo.drained):
9014         nresult.Raise("Can't get data for node %s" % nname)
9015         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9016                                 nname)
9017         remote_info = nresult.payload
9018
9019         for attr in ['memory_total', 'memory_free', 'memory_dom0',
9020                      'vg_size', 'vg_free', 'cpu_total']:
9021           if attr not in remote_info:
9022             raise errors.OpExecError("Node '%s' didn't return attribute"
9023                                      " '%s'" % (nname, attr))
9024           if not isinstance(remote_info[attr], int):
9025             raise errors.OpExecError("Node '%s' returned invalid value"
9026                                      " for '%s': %s" %
9027                                      (nname, attr, remote_info[attr]))
9028         # compute memory used by primary instances
9029         i_p_mem = i_p_up_mem = 0
9030         for iinfo, beinfo in i_list:
9031           if iinfo.primary_node == nname:
9032             i_p_mem += beinfo[constants.BE_MEMORY]
9033             if iinfo.name not in node_iinfo[nname].payload:
9034               i_used_mem = 0
9035             else:
9036               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9037             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9038             remote_info['memory_free'] -= max(0, i_mem_diff)
9039
9040             if iinfo.admin_up:
9041               i_p_up_mem += beinfo[constants.BE_MEMORY]
9042
9043         # compute memory used by instances
9044         pnr_dyn = {
9045           "total_memory": remote_info['memory_total'],
9046           "reserved_memory": remote_info['memory_dom0'],
9047           "free_memory": remote_info['memory_free'],
9048           "total_disk": remote_info['vg_size'],
9049           "free_disk": remote_info['vg_free'],
9050           "total_cpus": remote_info['cpu_total'],
9051           "i_pri_memory": i_p_mem,
9052           "i_pri_up_memory": i_p_up_mem,
9053           }
9054         pnr.update(pnr_dyn)
9055
9056       node_results[nname] = pnr
9057     data["nodes"] = node_results
9058
9059     # instance data
9060     instance_data = {}
9061     for iinfo, beinfo in i_list:
9062       nic_data = []
9063       for nic in iinfo.nics:
9064         filled_params = objects.FillDict(
9065             cluster_info.nicparams[constants.PP_DEFAULT],
9066             nic.nicparams)
9067         nic_dict = {"mac": nic.mac,
9068                     "ip": nic.ip,
9069                     "mode": filled_params[constants.NIC_MODE],
9070                     "link": filled_params[constants.NIC_LINK],
9071                    }
9072         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9073           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9074         nic_data.append(nic_dict)
9075       pir = {
9076         "tags": list(iinfo.GetTags()),
9077         "admin_up": iinfo.admin_up,
9078         "vcpus": beinfo[constants.BE_VCPUS],
9079         "memory": beinfo[constants.BE_MEMORY],
9080         "os": iinfo.os,
9081         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9082         "nics": nic_data,
9083         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9084         "disk_template": iinfo.disk_template,
9085         "hypervisor": iinfo.hypervisor,
9086         }
9087       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9088                                                  pir["disks"])
9089       instance_data[iinfo.name] = pir
9090
9091     data["instances"] = instance_data
9092
9093     self.in_data = data
9094
9095   def _AddNewInstance(self):
9096     """Add new instance data to allocator structure.
9097
9098     This in combination with _AllocatorGetClusterData will create the
9099     correct structure needed as input for the allocator.
9100
9101     The checks for the completeness of the opcode must have already been
9102     done.
9103
9104     """
9105     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9106
9107     if self.disk_template in constants.DTS_NET_MIRROR:
9108       self.required_nodes = 2
9109     else:
9110       self.required_nodes = 1
9111     request = {
9112       "name": self.name,
9113       "disk_template": self.disk_template,
9114       "tags": self.tags,
9115       "os": self.os,
9116       "vcpus": self.vcpus,
9117       "memory": self.mem_size,
9118       "disks": self.disks,
9119       "disk_space_total": disk_space,
9120       "nics": self.nics,
9121       "required_nodes": self.required_nodes,
9122       }
9123     return request
9124
9125   def _AddRelocateInstance(self):
9126     """Add relocate instance data to allocator structure.
9127
9128     This in combination with _IAllocatorGetClusterData will create the
9129     correct structure needed as input for the allocator.
9130
9131     The checks for the completeness of the opcode must have already been
9132     done.
9133
9134     """
9135     instance = self.cfg.GetInstanceInfo(self.name)
9136     if instance is None:
9137       raise errors.ProgrammerError("Unknown instance '%s' passed to"
9138                                    " IAllocator" % self.name)
9139
9140     if instance.disk_template not in constants.DTS_NET_MIRROR:
9141       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9142                                  errors.ECODE_INVAL)
9143
9144     if len(instance.secondary_nodes) != 1:
9145       raise errors.OpPrereqError("Instance has not exactly one secondary node",
9146                                  errors.ECODE_STATE)
9147
9148     self.required_nodes = 1
9149     disk_sizes = [{'size': disk.size} for disk in instance.disks]
9150     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9151
9152     request = {
9153       "name": self.name,
9154       "disk_space_total": disk_space,
9155       "required_nodes": self.required_nodes,
9156       "relocate_from": self.relocate_from,
9157       }
9158     return request
9159
9160   def _AddEvacuateNodes(self):
9161     """Add evacuate nodes data to allocator structure.
9162
9163     """
9164     request = {
9165       "evac_nodes": self.evac_nodes
9166       }
9167     return request
9168
9169   def _BuildInputData(self, fn):
9170     """Build input data structures.
9171
9172     """
9173     self._ComputeClusterData()
9174
9175     request = fn()
9176     request["type"] = self.mode
9177     self.in_data["request"] = request
9178
9179     self.in_text = serializer.Dump(self.in_data)
9180
9181   def Run(self, name, validate=True, call_fn=None):
9182     """Run an instance allocator and return the results.
9183
9184     """
9185     if call_fn is None:
9186       call_fn = self.rpc.call_iallocator_runner
9187
9188     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9189     result.Raise("Failure while running the iallocator script")
9190
9191     self.out_text = result.payload
9192     if validate:
9193       self._ValidateResult()
9194
9195   def _ValidateResult(self):
9196     """Process the allocator results.
9197
9198     This will process and if successful save the result in
9199     self.out_data and the other parameters.
9200
9201     """
9202     try:
9203       rdict = serializer.Load(self.out_text)
9204     except Exception, err:
9205       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9206
9207     if not isinstance(rdict, dict):
9208       raise errors.OpExecError("Can't parse iallocator results: not a dict")
9209
9210     # TODO: remove backwards compatiblity in later versions
9211     if "nodes" in rdict and "result" not in rdict:
9212       rdict["result"] = rdict["nodes"]
9213       del rdict["nodes"]
9214
9215     for key in "success", "info", "result":
9216       if key not in rdict:
9217         raise errors.OpExecError("Can't parse iallocator results:"
9218                                  " missing key '%s'" % key)
9219       setattr(self, key, rdict[key])
9220
9221     if not isinstance(rdict["result"], list):
9222       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9223                                " is not a list")
9224     self.out_data = rdict
9225
9226
9227 class LUTestAllocator(NoHooksLU):
9228   """Run allocator tests.
9229
9230   This LU runs the allocator tests
9231
9232   """
9233   _OP_REQP = ["direction", "mode", "name"]
9234
9235   def CheckPrereq(self):
9236     """Check prerequisites.
9237
9238     This checks the opcode parameters depending on the director and mode test.
9239
9240     """
9241     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9242       for attr in ["name", "mem_size", "disks", "disk_template",
9243                    "os", "tags", "nics", "vcpus"]:
9244         if not hasattr(self.op, attr):
9245           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9246                                      attr, errors.ECODE_INVAL)
9247       iname = self.cfg.ExpandInstanceName(self.op.name)
9248       if iname is not None:
9249         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9250                                    iname, errors.ECODE_EXISTS)
9251       if not isinstance(self.op.nics, list):
9252         raise errors.OpPrereqError("Invalid parameter 'nics'",
9253                                    errors.ECODE_INVAL)
9254       for row in self.op.nics:
9255         if (not isinstance(row, dict) or
9256             "mac" not in row or
9257             "ip" not in row or
9258             "bridge" not in row):
9259           raise errors.OpPrereqError("Invalid contents of the 'nics'"
9260                                      " parameter", errors.ECODE_INVAL)
9261       if not isinstance(self.op.disks, list):
9262         raise errors.OpPrereqError("Invalid parameter 'disks'",
9263                                    errors.ECODE_INVAL)
9264       for row in self.op.disks:
9265         if (not isinstance(row, dict) or
9266             "size" not in row or
9267             not isinstance(row["size"], int) or
9268             "mode" not in row or
9269             row["mode"] not in ['r', 'w']):
9270           raise errors.OpPrereqError("Invalid contents of the 'disks'"
9271                                      " parameter", errors.ECODE_INVAL)
9272       if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9273         self.op.hypervisor = self.cfg.GetHypervisorType()
9274     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9275       if not hasattr(self.op, "name"):
9276         raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9277                                    errors.ECODE_INVAL)
9278       fname = _ExpandInstanceName(self.cfg, self.op.name)
9279       self.op.name = fname
9280       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9281     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9282       if not hasattr(self.op, "evac_nodes"):
9283         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9284                                    " opcode input", errors.ECODE_INVAL)
9285     else:
9286       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9287                                  self.op.mode, errors.ECODE_INVAL)
9288
9289     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9290       if not hasattr(self.op, "allocator") or self.op.allocator is None:
9291         raise errors.OpPrereqError("Missing allocator name",
9292                                    errors.ECODE_INVAL)
9293     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9294       raise errors.OpPrereqError("Wrong allocator test '%s'" %
9295                                  self.op.direction, errors.ECODE_INVAL)
9296
9297   def Exec(self, feedback_fn):
9298     """Run the allocator test.
9299
9300     """
9301     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9302       ial = IAllocator(self.cfg, self.rpc,
9303                        mode=self.op.mode,
9304                        name=self.op.name,
9305                        mem_size=self.op.mem_size,
9306                        disks=self.op.disks,
9307                        disk_template=self.op.disk_template,
9308                        os=self.op.os,
9309                        tags=self.op.tags,
9310                        nics=self.op.nics,
9311                        vcpus=self.op.vcpus,
9312                        hypervisor=self.op.hypervisor,
9313                        )
9314     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9315       ial = IAllocator(self.cfg, self.rpc,
9316                        mode=self.op.mode,
9317                        name=self.op.name,
9318                        relocate_from=list(self.relocate_from),
9319                        )
9320     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9321       ial = IAllocator(self.cfg, self.rpc,
9322                        mode=self.op.mode,
9323                        evac_nodes=self.op.evac_nodes)
9324     else:
9325       raise errors.ProgrammerError("Uncatched mode %s in"
9326                                    " LUTestAllocator.Exec", self.op.mode)
9327
9328     if self.op.direction == constants.IALLOCATOR_DIR_IN:
9329       result = ial.in_text
9330     else:
9331       ial.Run(self.op.allocator, validate=False)
9332       result = ial.out_text
9333     return result